File size: 3,301 Bytes
e6eef41
 
1a55c15
2a690cd
37ed8ac
 
 
 
 
 
 
 
 
c4dc6bd
e6eef41
956bcba
1a55c15
 
 
37ed8ac
 
 
 
1a55c15
45f2d3a
1a55c15
df6ff7d
45f2d3a
9d68039
45f2d3a
 
9d68039
37ed8ac
 
 
 
 
 
 
 
 
 
 
9d68039
45f2d3a
1a55c15
45f2d3a
1a55c15
45f2d3a
1a55c15
45f2d3a
 
1a55c15
 
37ed8ac
45f2d3a
 
 
 
 
1a55c15
45f2d3a
 
37ed8ac
1a55c15
45f2d3a
1a55c15
 
 
37ed8ac
1a55c15
 
37ed8ac
1a55c15
37ed8ac
e6eef41
 
 
 
1a55c15
37ed8ac
1a55c15
 
37ed8ac
 
e6eef41
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import gradio as gr
import pandas as pd
import difflib

# Load your CSV data with error handling
try:
    df = pd.read_csv("dialect_data.csv")
    print("CSV loaded successfully!")
    print("Available columns:", df.columns.tolist())
except Exception as e:
    print(f"Error loading CSV: {e}")
    # Create empty dataframe as fallback
    df = pd.DataFrame(columns=['Dialect Bengali', 'Translation in Ac English', 'Meaning'])

# Function to translate using your CSV data
def translate_text(text):
    if not text.strip():
        return "Please enter a phrase or question"
    
    # Check if dataframe is empty
    if df.empty:
        return "Database not loaded. Please check your CSV file."
    
    # Normalize text
    def normalize_phrase(phrase):
        return ''.join(char for char in phrase.lower() if char.isalpha() or char.isspace())
    
    user_clean = normalize_phrase(text)
    
    # Get all phrases from CSV for matching
    all_phrases = []
    for index, row in df.iterrows():
        try:
            csv_phrase = str(row['Dialect Bengali'])
            clean_csv = normalize_phrase(csv_phrase)
            translation = str(row['Translation in Ac English'])
            meaning = str(row['Meaning'])
            all_phrases.append((clean_csv, translation, meaning, csv_phrase))
        except:
            continue
    
    if not all_phrases:
        return "No phrases found in database. Check CSV format."
    
    # 1. First try exact match
    for clean_csv, translation, meaning, original in all_phrases:
        if user_clean == clean_csv:
            return f"βœ… EXACT MATCH:\nπŸ’¬ {translation}\nπŸ“– {meaning}"
    
    # 2. Try close matches with high probability
    close_matches = difflib.get_close_matches(
        user_clean, 
        [clean_csv for clean_csv, trans, meaning, orig in all_phrases], 
        n=3,
        cutoff=0.6
    )
    
    if close_matches:
        suggestions = []
        for match in close_matches:
            for clean_csv, translation, meaning, original in all_phrases:
                if clean_csv == match:
                    similarity = int(difflib.SequenceMatcher(None, user_clean, clean_csv).ratio() * 100)
                    if similarity >= 60:
                        suggestions.append(f"🎯 '{original}' ({similarity}% match)\nπŸ’¬ {translation}\nπŸ“– {meaning}")
        
        if suggestions:
            return "πŸ” SIMILAR PHRASES FOUND:\n\n" + "\n\n".join(suggestions)
    
    # 3. Show sample available phrases
    sample_phrases = [orig for clean_csv, trans, meaning, orig in all_phrases[:5]]
    return "❓ ASK DIFFERENT QUESTION\n\n" + \
           "πŸ“‹ Try these phrases:\n" + \
           "\n".join([f"β€’ '{phrase}'" for phrase in sample_phrases]) + \
           "\n\nπŸ’‘ Example: 'Kita kobor?', 'Goto kali', 'gesle ni'"

# Create the app interface
demo = gr.Interface(
    fn=translate_text,
    inputs=gr.Textbox(label="Type phrase in our language", 
                     placeholder="Example: Kita kobor?, Goto kali, gesle ni..."),
    outputs=gr.Textbox(label="Translation Result"),
    title="🌍 Smart Dialect Translator",
    description="Translates with smart matching - finds similar phrases",
    examples=[["gesle ni"], ["Kita kobor?"], ["Goto kali"]]
)

# Launch the app
demo.launch()