Spaces:

anisgtboi
/

my-dialect-translator-app

Sleeping

App Files Files Community

anisgtboi commited on Sep 2

Commit

ea5da31

verified ·

1 Parent(s): 84e57d5

Update app.py

Browse files

Files changed (1) hide show

app.py +166 -366

app.py CHANGED Viewed

@@ -1,386 +1,186 @@
 # app.py
-# Enhanced Gradio app: Fixed translation with professional UI
 import os
-import re
 import traceback
-import torch
 import gradio as gr
-from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
-# ---------- Translation (Using NLLB Model) ----------
-DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# NLLB model configuration
-MODEL_NAME = "facebook/nllb-200-distilled-600M"
-LANGUAGE_CODES = {
-    "English": "eng_Latn",
-    "Bengali": "ben_Beng"
-}
-_translation_model = None
-_tokenizer = None
-def load_translation_model():
-    global _translation_model, _tokenizer
-    if _translation_model is None:
-        try:
-            _tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-            _translation_model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(DEVICE)
-        except Exception as e:
-            print(f"Error loading NLLB model: {e}")
-            raise Exception(f"Failed to load NLLB model: {e}")
-    return _tokenizer, _translation_model
-# Enhanced Benglish to Bengali transliteration mapping
-BENGLISH_TO_BENGALI_MAP = {
-    # Vowels
-    'a': 'া', 'aa': 'া', 'i': 'ি', 'ee': 'ী', 'u': 'ু', 'oo': 'ূ', 'ri': 'ৃ', 'e': 'ে', 'oi': 'ৈ', 'o': 'ো', 'ou': 'ৌ',
-    'A': 'আ', 'Aa': 'আ', 'I': 'ই', 'Ee': 'ঈ', 'U': 'উ', 'Oo': 'ঊ', 'Ri': 'ঋ', 'E': 'এ', 'Oi': 'ঐ', 'O': 'ও', 'Ou': 'ঔ',
-    # Consonants
-    'k': 'ক', 'kh': 'খ', 'g': 'গ', 'gh': 'ঘ', 'ng': 'ং', 'ch': 'চ', 'chh': 'ছ', 'j': 'জ', 'jh': 'ঝ', 'yn': 'ঞ',
-    't': 'ট', 'th': 'ঠ', 'd': 'ড', 'dh': 'ঢ', 'n': 'ণ', 'p': 'প', 'ph': 'ফ', 'f': 'ফ', 'b': 'ব', 'bh': 'ভ', 'v': 'ভ',
-    'm': 'ম', 'y': 'য', 'r': 'র', 'l': 'ল', 'w': 'ও', 'sh': 'শ', 'ss': 'श', 's': 'স', 'h': 'হ', 'x': 'ক্ষ', 'z': 'জ',
-    'rh': 'ড়', 'rhh': 'ঢ়', 'y': 'য়', 'tt': 'ৎ', 'ng': 'ঁ',
-    # Numbers
-    '0': '০', '1': '১', '2': '২', '3': '৩', '4': '৪', '5': '৫', '6': '৬', '7': '৭', '8': '৮', '9': '৯',
-}
-def is_benglish(text: str):
-    """Check if text appears to be Benglish (Bengali in English script)"""
-    if not text.strip():
-        return False
-    # Check if text contains any Bengali characters
-    bengali_unicode_range = '\u0980-\u09FF'
-    if re.search(f'[{bengali_unicode_range}]', text):
-        return False
-    # Check if text contains English letters and common Benglish patterns
-    if re.search(r'[a-zA-Z]', text) and not re.search(r'[^a-zA-Z0-9\s\.\,\?\!]', text):
-        return True
-    return False
-def transliterate_benglish_to_bengali(text: str):
-    """Simple transliteration from Benglish to Bengali"""
-    # Common word mappings
-    common_words = {
-        'ami': 'আমি', 'tumi': 'তুমি', 'se': 'সে', 'amra': 'আমরা', 'tomra': 'তোমরা', 'tara': 'তারা',
-        'kothay': 'কোথায়', 'ki': 'কী', 'kemon': 'কেমন', 'kno': 'কেন', 'kobe': 'কবে', 'kor': 'কর',
-        'ache': 'আছে', 'nay': 'নয়', 'holo': 'হলো', 'hobe': 'হবে', 'chai': 'চাই', 'ne': 'নে',
-        'valo': 'ভালো', 'kharap': 'খারাপ', 'sundor': 'সুন্দর', 'bhalo': 'ভালো', 'odin': 'অদিন',
-        'ekhon': 'এখন', 'age': 'আগে', 'pore': 'পরে', 'sobar': 'সবার', 'jonno': 'জন্য',
-        'tui': 'তুই', 'tor': 'তোর', 'amar': 'আমার', 'tomar': 'তোমার', 'tar': 'তার',
-        'achi': 'আছি', 'achis': 'আছিস', 'achho': 'আচ্ছ', 'achhen': 'আছেন', 'achhe': 'আছে',
-        'kothao': 'কোথাও', 'kono': 'কোনো', 'keu': 'কেউ', 'kichu': 'কিছু', 'sob': 'সব',
-        'jodi': 'যদি', 'tahole': 'তাহলে', 'kintu': 'কিন্তু', 'je': 'যে', 'na': 'না',
-        'ha': 'হা', 're': 'রে', 'o': 'ও', 'aro': 'আরও', 'onek': 'অনেক', 'valo': 'ভালো'
-    }
-    # Replace common words first (case insensitive)
-    for eng, ben in common_words.items():
-        text = re.sub(r'\b' + eng + r'\b', ben, text, flags=re.IGNORECASE)
-    # Simple character mapping for remaining text
-    for eng, ben in BENGLISH_TO_BENGALI_MAP.items():
-        if eng:
-            text = text.replace(eng, ben)
-    return text
-def translate_text(text: str, src_lang: str, tgt_lang: str):
-    if not text or not text.strip():
-        return ""
-    # If source language is Bengali and text appears to be Benglish, transliterate it
-    original_text = text
-    if src_lang == "Bengali" and is_benglish(text):
-        text = transliterate_benglish_to_bengali(text)
-        print(f"Transliterated '{original_text}' to '{text}'")
     try:
-        tokenizer, model = load_translation_model()
     except Exception as e:
-        tb = traceback.format_exc()
-        return f"Error loading translation model: {e}\n{tb}"
     try:
-        # Get language codes
-        src_code = LANGUAGE_CODES[src_lang]
-        tgt_code = LANGUAGE_CODES[tgt_lang]
-        # Tokenize and translate
-        tokenizer.src_lang = src_code
-        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
-        inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
-        # Generate translation
-        translated_tokens = model.generate(
-            **inputs,
-            forced_bos_token_id=tokenizer.lang_code_to_id[tgt_code],
-            max_length=512
-        )
-        result = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
-        return result
     except Exception as e:
-        return f"[translate error: {e}]"
-# ---------- Gradio UI (Mobile-Friendly Dark Theme) ----------
-css = """
-/* General App Container */
-.app {
-  background-color: #1c1c1c;   /* dark background */
-  font-family: 'Poppins', sans-serif;
-  color: #f5f5f5;
-  padding: 20px;
-  max-width: 400px;  /* mobile size */
-  margin: auto;
-  border-radius: 20px;
-}
-/* Text Card Areas */
-.card {
-  background-color: #2c2c2c;
-  border-radius: 15px;
-  padding: 15px;
-  margin: 15px 0;
-  box-shadow: 0 4px 8px rgba(0,0,0,0.3);
-  min-height: 120px;
-  display: flex;
-  flex-direction: column;
-  justify-content: space-between;
-}
-/* Copy Icon Button (inside card) */
-.card .copy-btn {
-  align-self: flex-end;
-  background: none;
-  border: none;
-  color: #ffcc00;   /* yellow icon */
-  font-size: 20px;
-  cursor: pointer;
-}
-/* Language Selector Row */
-.lang-row {
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  gap: 12px;
-  margin: 10px 0;
-}
-.lang-row select {
-  background: #3a3a3a;
-  color: #fff;
-  border: none;
-  border-radius: 12px;
-  padding: 8px 16px;
-  font-size: 14px;
-  cursor: pointer;
-}
-/* Swap Button */
-.swap-btn {
-  background: #ff0057;
-  color: white;
-  border: none;
-  border-radius: 50%;
-  padding: 10px;
-  cursor: pointer;
-  font-size: 16px;
-  box-shadow: 0 3px 6px rgba(0,0,0,0.4);
-}
-/* Translate Button */
-.translate-btn {
-  width: 100%;
-  background: #ff0057;
-  color: white;
-  font-weight: bold;
-  font-size: 16px;
-  border: none;
-  padding: 14px;
-  border-radius: 12px;
-  margin-top: 15px;
-  cursor: pointer;
-  box-shadow: 0 3px 8px rgba(0,0,0,0.5);
-}
-.translate-btn:hover {
-  background: #e6004d;
-}
-/* Text Areas */
-.text-input, .text-output {
-  background-color: #2c2c2c;
-  color: #f5f5f5;
-  border: 1px solid #444;
-  border-radius: 12px;
-  padding: 12px;
-  width: 100%;
-  min-height: 120px;
-  resize: vertical;
-  font-family: inherit;
-}
-.text-input:focus {
-  outline: none;
-  border-color: #ff0057;
-}
-/* Header */
-.header {
-  text-align: center;
-  margin-bottom: 20px;
-}
-.header h1 {
-  font-size: 24px;
-  margin-bottom: 5px;
-  color: #ffcc00;
-}
-.header p {
-  font-size: 14px;
-  color: #aaa;
-}
-/* Quick Phrases */
-.quick-phrases {
-  display: grid;
-  grid-template-columns: 1fr 1fr;
-  gap: 10px;
-  margin: 15px 0;
-}
-.quick-btn {
-  background: #3a3a3a;
-  color: #fff;
-  border: none;
-  border-radius: 8px;
-  padding: 8px;
-  font-size: 12px;
-  cursor: pointer;
-  transition: background 0.2s;
-}
-.quick-btn:hover {
-  background: #4a4a4a;
-}
-/* Footer */
-.footer {
-  text-align: center;
-  margin-top: 20px;
-  font-size: 12px;
-  color: #888;
-}
-"""
-with gr.Blocks(title="NLLB English ↔ Bengali Translator", css=css) as demo:
-    gr.HTML("""
-    <div class="header">
-        <h1>🌐 NLLB Translator</h1>
-        <p>English ↔ Bengali with Benglish Support</p>
-    </div>
-    """)
-    with gr.Column(elem_classes="app"):
-        # Language selection
-        with gr.Row(elem_classes="lang-row"):
-            src_lang = gr.Dropdown(
-                choices=["English", "Bengali"],
-                value="English",
-                label="From",
-                elem_classes="lang-select"
-            )
-            swap_btn = gr.Button("⇄", elem_classes="swap-btn")
-            tgt_lang = gr.Dropdown(
-                choices=["Bengali", "English"],
-                value="Bengali",
-                label="To",
-                elem_classes="lang-select"
-            )
-        # Input and output text areas
-        input_text = gr.Textbox(
-            lines=4,
-            label="Input Text",
-            placeholder="Type or paste text to translate here...",
-            elem_classes="text-input card"
-        )
-        output_text = gr.Textbox(
-            lines=4,
-            label="Translation",
-            interactive=False,
-            elem_classes="text-output card"
-        )
-        # Quick phrases
-        gr.Markdown("**Quick Phrases:**")
-        with gr.Row(elem_classes="quick-phrases"):
-            quick1 = gr.Button("Hello, how are you?", elem_classes="quick-btn")
-            quick2 = gr.Button("Thank you very much", elem_classes="quick-btn")
-            quick3 = gr.Button("What is your name?", elem_classes="quick-btn")
-            quick4 = gr.Button("Kemon achis?", elem_classes="quick-btn")
-        # Translate button
-        translate_btn = gr.Button("Translate", elem_classes="translate-btn")
-        gr.HTML("""
-        <div class="footer">
-            <p>Powered by Meta NLLB • Professional Translation</p>
-        </div>
-        """)
-    # Update target language when source changes
-    def update_target_lang(src_lang):
-        return "Bengali" if src_lang == "English" else "English"
-    src_lang.change(update_target_lang, inputs=src_lang, outputs=tgt_lang)
-    # Swap languages function
-    def swap_languages(src_lang, tgt_lang, input_text, output_text):
-        new_src = tgt_lang
-        new_tgt = src_lang
-        new_input = output_text
-        new_output = input_text
-        return new_src, new_tgt, new_input, new_output
-    swap_btn.click(
-        swap_languages,
-        inputs=[src_lang, tgt_lang, input_text, output_text],
-        outputs=[src_lang, tgt_lang, input_text, output_text]
-    )
-    # Quick phrase events
-    quick_phrases = {
-        quick1: "Hello, how are you?",
-        quick2: "Thank you very much.",
-        quick3: "What is your name?",
-        quick4: "Kemon achis?"
-    }
-    for btn, phrase in quick_phrases.items():
-        btn.click(lambda p=phrase: p, inputs=None, outputs=input_text)
-    # Translate function
-    translate_btn.click(
-        fn=translate_text,
-        inputs=[input_text, src_lang, tgt_lang],
-        outputs=output_text
-    )
 if __name__ == "__main__":
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=int(os.environ.get("PORT", 7860)),
-        share=True
-    )

 # app.py
 import os
 import traceback
 import gradio as gr
+from typing import Tuple
+# Try to import transformers; if not available, the app will error and tell you to add requirements.
+try:
+    from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
+except Exception as e:
+    pipeline = None
+# Optional: Hugging Face hosted-inference fallback
+try:
+    from huggingface_hub import InferenceApi
+except Exception:
+    InferenceApi = None
+# ---------- CONFIG ----------
+# Lightweight models that work well on CPU / Spaces:
+MODEL_EN_TO_BN = "shhossain/opus-mt-en-to-bn"   # small finetuned en -> bn (≈75M params)
+MODEL_BN_TO_EN = "Helsinki-NLP/opus-mt-bn-en"  # bn -> en
+# If you prefer other model ids, change the strings above.
+# Language labels for UI
+DIRECTION_CHOICES = ["English → Bengali", "Bengali → English"]
+# ---------- GLOBALS ----------
+local_pipeline = None
+local_model_name = None
+use_api_fallback = False
+inference_client = None
+# ---------- HELPERS ----------
+def try_load_local(model_name: str) -> Tuple[bool, str]:
+    """Try to load a local transformers pipeline for translation.
+       Returns (success, message)."""
+    global local_pipeline, local_model_name, use_api_fallback
+    if pipeline is None:
+        return False, "transformers not installed (add to requirements.txt)"
     try:
+        # Use the 'translation' pipeline (Marian / MarianMT based models)
+        local_pipeline = pipeline("translation", model=model_name, device=-1, max_length=512)
+        local_model_name = model_name
+        use_api_fallback = False
+        return True, f"Loaded local model: {model_name}"
     except Exception as e:
+        use_api_fallback = True
+        return False, f"Local load failed: {str(e)}"
+def try_init_inference_api(token_env="HF_API_TOKEN", model_name_fallback=None):
+    """Initialize huggingface_hub Inference API client if token present."""
+    global inference_client, use_api_fallback
+    token = os.environ.get(token_env)
+    if not token:
+        return False, "No HF_API_TOKEN found in env (set Space secret HF_API_TOKEN)"
+    if InferenceApi is None:
+        return False, "huggingface_hub not installed (add to requirements.txt)"
     try:
+        inference_client = InferenceApi(repo_id=model_name_fallback or "facebook/nllb-200-distilled-600M", token=token)
+        use_api_fallback = True
+        return True, "Inference API client ready"
     except Exception as e:
+        return False, f"Inference API init failed: {str(e)}"
+def translate_with_local(text: str):
+    global local_pipeline
+    if local_pipeline is None:
+        raise RuntimeError("Local pipeline not loaded")
+    out = local_pipeline(text, max_length=512)
+    if isinstance(out, list) and len(out) > 0:
+        # many Marian models use 'translation_text' or 'generated_text'
+        res = out[0].get("translation_text") if isinstance(out[0], dict) else None
+        if not res:
+            # fallback to first value in dict
+            if isinstance(out[0], dict):
+                res = list(out[0].values())[0]
+        return res or str(out)
+    return str(out)
+def translate_with_api(text: str, model_name: str):
+    global inference_client
+    if inference_client is None:
+        raise RuntimeError("Inference client not ready")
+    # Note: the Inference API will run the model hosted on HF; for Marian models, you just pass the text.
+    res = inference_client(inputs=text, parameters={})
+    # API returns either list or dict; try to extract text
+    if isinstance(res, list) and len(res) > 0:
+        first = res[0]
+        if isinstance(first, dict):
+            return first.get("translation_text") or first.get("generated_text") or str(first)
+        return str(first)
+    if isinstance(res, dict):
+        return res.get("translation_text") or res.get("generated_text") or str(res)
+    return str(res)
+# ---------- ON START: try local load (best-effort) ----------
+# We'll pre-load both directions lazily on first use; try EN->BN by default
+_success, _msg = try_load_local(MODEL_EN_TO_BN)
+print("Model load attempt:", _success, _msg)
+# If local load failed, but user supplied HF_API_TOKEN in Secrets, init inference client as fallback
+if use_api_fallback:
+    ok, msg = try_init_inference_api(model_name_fallback=MODEL_EN_TO_BN)
+    print("Inference API init:", ok, msg)
+# ---------- TRANSLATION FUNCTION FOR UI ----------
+def translate_text(text: str, direction: str):
+    """Main translate function: returns (translation, status, analysis)"""
+    if not text or not text.strip():
+        return "", "Please type text to translate", ""
+    try:
+        model_name = MODEL_EN_TO_BN if direction == DIRECTION_CHOICES[0] else MODEL_BN_TO_EN
+        # If local model not loaded or different than needed, try loading it
+        global local_model_name
+        if local_pipeline is None or local_model_name != model_name:
+            ok, msg = try_load_local(model_name)
+            print("Reload attempt:", ok, msg)
+            # if local load failed, try to init API if token present
+            if not ok and inference_client is None:
+                ok2, msg2 = try_init_inference_api(model_name_fallback=model_name)
+                print("Fallback init:", ok2, msg2)
+        # If local available, use it
+        if local_pipeline is not None and local_model_name == model_name:
+            translated = translate_with_local(text)
+            status = f"Local model used: {local_model_name}"
+        else:
+            # fallback to hosted inference
+            if inference_client is None:
+                return "", "No model available locally and no HF_API_TOKEN set for API fallback. Set HF_API_TOKEN in Space secrets.", ""
+            translated = translate_with_api(text, model_name)
+            status = f"Hosted Inference API used: {model_name}"
+        # small "analysis" block: length, word count, suggestions
+        words = len(text.split())
+        analysis = f"Input words: {words}. Output length: {len(translated.split())} words."
+        return translated, status, analysis
+    except Exception as e:
+        tb = traceback.format_exc()
+        return "", f"Error: {str(e)}", tb
+# ---------- GRADIO APP UI ----------
+with gr.Blocks(title="English ↔ Bengali — Fast Translator") as demo:
+    gr.Markdown("# English ↔ Bengali — Fast Translator")
+    gr.Markdown(
+        "Small, fast models (OPUS-MT) used for speed. If local loading fails the app will use the Hugging Face Inference API (requires HF_API_TOKEN set in Space secrets)."
+    )
+    with gr.Row():
+        direction = gr.Radio(label="Direction", choices=DIRECTION_CHOICES, value=DIRECTION_CHOICES[0])
+        swap = gr.Button("Swap")
+    input_text = gr.Textbox(label="Input text", lines=4, placeholder="Type in English or Bengali...")
+    translate_btn = gr.Button("Translate", variant="primary")
+    with gr.Row():
+        out_translation = gr.Textbox(label="Translation", lines=4)
+        out_status = gr.Textbox(label="Status / Tips", lines=2)
+    out_analysis = gr.Textbox(label="Analysis / Notes", lines=3)
+    # examples
+    with gr.Row():
+        ex1 = gr.Button("Hello, how are you?")
+        ex2 = gr.Button("Ami bhalo achi")
+        ex3 = gr.Button("Where is the market?")
+    # wiring
+    def do_swap(cur):
+        return DIRECTION_CHOICES[1] if cur == DIRECTION_CHOICES[0] else DIRECTION_CHOICES[0]
+    swap.click(do_swap, inputs=direction, outputs=direction)
+    translate_btn.click(translate_text, inputs=[input_text, direction], outputs=[out_translation, out_status, out_analysis])
+    ex1.click(lambda: "Hello, how are you?", outputs=input_text)
+    ex2.click(lambda: "Ami bhalo achi", outputs=input_text)
+    ex3.click(lambda: "Where is the market?", outputs=input_text)
+    gr.Markdown("---")
+    gr.Markdown("If the app shows `No model available` error: go to Space Settings → Secrets and add `HF_API_TOKEN` (your Hugging Face token).")
+# Launch if run directly
 if __name__ == "__main__":
+    demo.launch(debug=True)