Spaces:

SagarVelamuri
/

TranslationSpace

Sleeping

App Files Files Community

SagarVelamuri commited on Sep 5

Commit

683b0a0

verified ·

1 Parent(s): a0b8bba

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -193

app.py CHANGED Viewed

@@ -1,26 +1,25 @@
-import os, traceback, types, torch
-import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-# Robust import for IndicProcessor (fallback too)
 try:
     from IndicTransToolkit import IndicProcessor
 except Exception:
     from IndicTransToolkit.IndicTransToolkit import IndicProcessor
 # -------- Config --------
 TOKENIZER_ID = os.getenv("TOKENIZER_ID", "ai4bharat/indictrans2-en-indic-1B")
 MODEL_ID     = os.getenv("MODEL_ID",     "law-ai/InLegalTrans-En2Indic-1B")
-TOKENIZER_REV = os.getenv("TOKENIZER_REV", None)
-MODEL_REV     = os.getenv("MODEL_REV",     None)
 SRC_CODE = "eng_Latn"
 HI_CODE  = "hin_Deva"
 TE_CODE  = "tel_Telu"
-# ------- Load model -------
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-dtype  = torch.float16 if torch.cuda.is_available() else torch.float32
 tok_kwargs = dict(trust_remote_code=True, use_fast=True)
 if TOKENIZER_REV: tok_kwargs["revision"] = TOKENIZER_REV
@@ -29,53 +28,45 @@ tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_ID, **tok_kwargs)
 mdl_kwargs = dict(trust_remote_code=True, attn_implementation="eager",
                   low_cpu_mem_usage=True, dtype=dtype)
 if MODEL_REV: mdl_kwargs["revision"] = MODEL_REV
-model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID, **mdl_kwargs).to(device)
-model.eval()
 if getattr(model.generation_config, "pad_token_id", None) is None:
-    model.generation_config.pad_token_id = (
-        getattr(tokenizer, "pad_token_id", None) or getattr(tokenizer, "eos_token_id", 0)
-    )
-if getattr(model.generation_config, "eos_token_id", None) is None and getattr(tokenizer, "eos_token_id", None) is not None:
     model.generation_config.eos_token_id = tokenizer.eos_token_id
 def _ensure_vocab_consistency(md, tok):
     try:
         actual_vocab = md.get_output_embeddings().weight.shape[0]
-    except Exception:
-        actual_vocab = None
-    if actual_vocab is not None:
         md.config.vocab_size = actual_vocab
-        try: md.generation_config.vocab_size = actual_vocab
-        except Exception: pass
     else:
-        vs = getattr(tok, "vocab_size", None)
-        if vs is None:
-            try: vs = len(tok)
-            except Exception: vs = 64000
         md.config.vocab_size = vs
-        try: md.generation_config.vocab_size = vs
-        except Exception: pass
-    if not hasattr(md.config, "get_text_config") or not callable(getattr(md.config, "get_text_config", None)):
-        def _get_text_config(self): return self
-        md.config.get_text_config = types.MethodType(_get_text_config, md.config)
 _ensure_vocab_consistency(model, tokenizer)
 for obj in (model.config, model.generation_config):
     try: setattr(obj, "use_cache", False)
-    except Exception: pass
 ip = IndicProcessor(inference=True)
 # -------- Inference --------
 @torch.inference_mode()
 def _translate_to_lang(text, tgt_code, num_beams, max_new_tokens, temperature, top_p, top_k):
     batch = ip.preprocess_batch([text], src_lang=SRC_CODE, tgt_lang=tgt_code)
-    enc = tokenizer(
-        batch, max_length=256, truncation=True, padding="longest",
-        return_tensors="pt", return_attention_mask=True
-    ).to(device)
-    do_sample = (temperature is not None) and (float(temperature) > 0)
     out = model.generate(
         **enc,
         max_new_tokens=int(max_new_tokens),
@@ -84,195 +75,103 @@ def _translate_to_lang(text, tgt_code, num_beams, max_new_tokens, temperature, t
         temperature=float(temperature) if do_sample else None,
         top_p=float(top_p) if do_sample else None,
         top_k=int(top_k) if do_sample else None,
-        use_cache=False, early_stopping=False,
-        pad_token_id=model.generation_config.pad_token_id,
     )
-    decoded = tokenizer.batch_decode(out, skip_special_tokens=True, clean_up_tokenization_spaces=True)
     final = ip.postprocess_batch(decoded, lang=tgt_code)
     return final[0].strip()
 def translate_dual(text, num_beams, max_new_tokens, temperature, top_p, top_k):
-    text = (text or "").strip()
     if not text: return "", ""
     try:
         hi = _translate_to_lang(text, HI_CODE, num_beams, max_new_tokens, temperature, top_p, top_k)
     except Exception as e:
-        print("HI ERROR:\n", traceback.format_exc())
-        hi = f"⚠️ Hindi translation failed: {type(e).__name__}: {str(e).splitlines()[-1]}"
     try:
         te = _translate_to_lang(text, TE_CODE, num_beams, max_new_tokens, temperature, top_p, top_k)
     except Exception as e:
-        print("TE ERROR:\n", traceback.format_exc())
-        te = f"⚠️ Telugu translation failed: {type(e).__name__}: {str(e).splitlines()[-1]}"
     return hi, te
-# -------- Theme --------
-THEME = gr.themes.Soft(primary_hue="blue", neutral_hue="slate").set(
-    body_background_fill="#0b1220",
-    body_text_color="#f2f6ff",
-    body_text_color_subdued="#cbd5e1",
-    block_background_fill="#0f172a",
-    block_border_color="#223144",
-    block_title_text_color="#ffffff",
-    input_background_fill="#0b1220",
-    input_border_color="#3b516c",
-    button_primary_background_fill="#3b82f6",
-    button_primary_text_color="#ffffff",
 )
-# -------- CSS --------
 CUSTOM_CSS = """
-* { box-sizing: border-box; }
-html, body { height: 100%; background:#0b1220; margin:0; padding:0; }
-.gradio-container { height: 100vh !important; width: 100vw !important; max-width: 100vw !important; margin: 0; padding: 8px; }
-/* Header */
-#hdr { height: 60px; display:flex; flex-direction:column; align-items:center; justify-content:center; gap:4px;
-       background:#162434; border:1px solid #223144; border-radius:12px; margin-bottom:8px; }
-#title { color:#ffffff; font-weight:900; font-size:20px; margin:0; letter-spacing:.2px; }
-#subtitle { color:#b8cae1; font-size:12.5px; margin:0; }
-/* Main grid (use Group, not Row -> no split-handles) */
-#main {
-  height: calc(100vh - 60px - 16px);  /* header + outer padding */
-  display: grid;
-  grid-template-columns: 20% 40% 40%;
-  gap: 10px;
 }
-/* Panels */
-.panel { border:1px solid #223144; border-radius:12px; background:#0f172a;
-         display:flex; flex-direction:column; min-height:0; overflow:hidden; }
-.panel-h {
-  display:flex; align-items:center; justify-content:space-between;
-  padding:10px 12px; background:#081422; border-bottom:1px solid #243244;
-  color:#ffffff; font-weight:900; letter-spacing:.25px; font-size:15px;
 }
-.panel-b { flex:1 1 auto; min-height:0; padding:10px 12px; }
-/* Left column: internal scroll only */
-#left { height: 100%; }
-#adv-inner { height: 100%; overflow:auto; padding-right:6px; }
-/* Remove pill-like label chips; make labels crisp */
-.gradio-container label,
-.gradio-container .label,
-.gradio-container .label > span {
-  background: transparent !important;
-  box-shadow: none !important;
-  border: none !important;
-  color: #ffffff !important;
-  font-weight: 800 !important;
 }
-/* Middle split: 75% input / 25% buttons */
-#middle { display:grid; grid-template-rows: 75% 25%; height:100%; gap:10px; }
-/* Right split: 50% / 50% */
-#right { display:grid; grid-template-rows: 1fr 1fr; height:100%; gap:10px; }
-/* Text areas fill */
-.textwrap { height:100%; min-height:0; display:flex; }
-.textwrap > div { flex:1 1 auto; min-height:0; }
-.textwrap textarea { height:100% !important; }
-/* Inputs */
-textarea, textarea:focus {
-  background:#0b1220 !important; color:#f9fbff !important;
-  font-size:17px !important; line-height:1.55 !important;
-  padding:10px 12px !important; border:1.6px solid #3b516c !important; border-radius:10px !important;
 }
-textarea::placeholder { color:#a6bdd9 !important; }
-textarea:hover { border-color:#6b8db6 !important; }
-textarea:focus { border-color:#60a5fa !important; outline:none !important; }
-/* Buttons area */
-#btnrow { display:flex; align-items:center; justify-content:center; gap:16px; height:100%; }
-#btnrow > button { min-width:180px; height:46px; font-weight:800; border-radius:10px; }
-/* Maximize buttons */
-.max { font-weight:900; padding:4px 10px; border-radius:10px; border:1px solid #3c5a86;
-       background:#122037; color:#ffffff; }
-.max:hover { border-color:#60a5fa; }
-/* Modal */
-#modal { position: fixed; inset: 0; z-index: 9999; background: rgba(2,6,23,.88); display:none; align-items:center; justify-content:center; padding:12px; }
-#modal[style*="display: block"] { display:flex !important; }
-.modal-card { width:min(1280px,96vw); height:min(92vh,900px); background:#0f172a; border:1px solid #335070; border-radius:14px;
-              box-shadow:0 18px 40px rgba(2,6,23,.6); display:flex; flex-direction:column; gap:8px; padding:10px; }
-.modal-title { color:#ffffff; font-weight:800; font-size:18px; margin:0; }
-#fs_box textarea { height: calc(100% - 52px) !important; }
-.modal-actions { display:flex; gap:8px; justify-content:flex-end; }
 """
-# ------------- UI -------------
-with gr.Blocks(theme=THEME, css=CUSTOM_CSS, title="EN→HI / EN→TE Translator") as demo:
-    modal_state = gr.State(value="")  # 'hi' or 'te'
     with gr.Group(elem_id="hdr"):
-        gr.Markdown('<p id="title">English → Hindi & Telugu Translator</p>')
-        gr.Markdown('<p id="subtitle">IndicTrans2 pipeline · law-ai/InLegalTrans-En2Indic-1B</p>')
-    # Main grid built with Group (no split handles)
-    with gr.Group(elem_id="main"):
-        # LEFT (20%) — Advanced Settings
-        with gr.Group(elem_id="left", elem_classes=["panel"]):
-            gr.Markdown('<div class="panel-h">Advanced Settings</div>')
-            with gr.Group(elem_id="adv-inner", elem_classes=["panel-b"]):
-                num_beams   = gr.Slider(1, 8, value=4, step=1, label="Beam search: num_beams")
-                max_new     = gr.Slider(16, 512, value=128, step=8, label="Max new tokens")
-                temperature = gr.Slider(0.0, 1.5, value=0.0, step=0.05, label="Temperature (0 = deterministic)")
                 top_p       = gr.Slider(0.0, 1.0, value=1.0, step=0.01, label="Top-p")
                 top_k       = gr.Slider(0, 100, value=50, step=1, label="Top-k")
-        # MIDDLE (40%) — English (75% input / 25% buttons)
-        with gr.Group(elem_id="middle"):
-            with gr.Group(elem_classes=["panel"]):
-                gr.Markdown('<div class="panel-h">English Text</div>')
-                with gr.Group(elem_classes=["panel-b","textwrap"]):
-                    src = gr.Textbox(placeholder="Type English here…", show_label=False, lines=14)
-            with gr.Group(elem_classes=["panel"]):
-                gr.Markdown('<div class="panel-h">Actions</div>')
-                with gr.Group(elem_classes=["panel-b"], elem_id="btnrow"):
-                    translate_btn = gr.Button("Translate", variant="primary")
-                    clear_btn     = gr.Button("Clear", variant="secondary")
-        # RIGHT (40%) — Hindi (50%) / Telugu (50%)
-        with gr.Group(elem_id="right"):
-            with gr.Group(elem_classes=["panel"]):
-                gr.Markdown('<div class="panel-h">Hindi (hin_Deva)<span></span></div>')
-                with gr.Group(elem_classes=["panel-b","textwrap"]):
-                    hi_out = gr.Textbox(show_copy_button=True, show_label=False, lines=10)
-                with gr.Row():  # small row under box for maximize
-                    hi_max = gr.Button("⤢ Maximize", elem_classes=["max"])
-            with gr.Group(elem_classes=["panel"]):
-                gr.Markdown('<div class="panel-h">Telugu (tel_Telu)<span></span></div>')
-                with gr.Group(elem_classes=["panel-b","textwrap"]):
-                    te_out = gr.Textbox(show_copy_button=True, show_label=False, lines=10)
-                with gr.Row():
-                    te_max = gr.Button("⤢ Maximize", elem_classes=["max"])
-    # Modal
-    with gr.Group(visible=False, elem_id="modal") as modal:
-        modal_title = gr.Markdown('<div class="modal-title">Fullscreen</div>')
-        fs_text = gr.Textbox(lines=22, elem_id="fs_box")
-        with gr.Row(elem_classes=["modal-actions"]):
-            fs_close = gr.Button("Close", variant="secondary")
     # Wiring
-    translate_btn.click(
-        translate_dual,
-        inputs=[src, num_beams, max_new, temperature, top_p, top_k],
-        outputs=[hi_out, te_out],
-        api_name="translate"
-    )
     clear_btn.click(lambda: ("", "", ""), outputs=[src, hi_out, te_out])
-    def open_hi(h): return gr.update(visible=True), "hi", '<div class="modal-title">Hindi (Fullscreen)</div>', h
-    def open_te(t): return gr.update(visible=True), "te", '<div class="modal-title">Telugu (Fullscreen)</div>', t
-    hi_max.click(open_hi, inputs=[hi_out], outputs=[modal, modal_state, modal_title, fs_text])
-    te_max.click(open_te, inputs=[te_out], outputs=[modal, modal_state, modal_title, fs_text])
-    fs_close.click(lambda: (gr.update(visible=False), ""), outputs=[modal, modal_state])
 demo.queue(max_size=48).launch()

+import os, traceback, types, torch, gradio as gr
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+# Robust import for IndicProcessor
 try:
     from IndicTransToolkit import IndicProcessor
 except Exception:
     from IndicTransToolkit.IndicTransToolkit import IndicProcessor
 # -------- Config --------
 TOKENIZER_ID = os.getenv("TOKENIZER_ID", "ai4bharat/indictrans2-en-indic-1B")
 MODEL_ID     = os.getenv("MODEL_ID",     "law-ai/InLegalTrans-En2Indic-1B")
+TOKENIZER_REV, MODEL_REV = os.getenv("TOKENIZER_REV"), os.getenv("MODEL_REV")
 SRC_CODE = "eng_Latn"
 HI_CODE  = "hin_Deva"
 TE_CODE  = "tel_Telu"
+# -------- Model Load --------
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 tok_kwargs = dict(trust_remote_code=True, use_fast=True)
 if TOKENIZER_REV: tok_kwargs["revision"] = TOKENIZER_REV
 mdl_kwargs = dict(trust_remote_code=True, attn_implementation="eager",
                   low_cpu_mem_usage=True, dtype=dtype)
 if MODEL_REV: mdl_kwargs["revision"] = MODEL_REV
+model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID, **mdl_kwargs).to(device).eval()
+# Ensure generation config is correct
 if getattr(model.generation_config, "pad_token_id", None) is None:
+    model.generation_config.pad_token_id = tokenizer.pad_token_id or tokenizer.eos_token_id
+if getattr(model.generation_config, "eos_token_id", None) is None and tokenizer.eos_token_id is not None:
     model.generation_config.eos_token_id = tokenizer.eos_token_id
 def _ensure_vocab_consistency(md, tok):
     try:
         actual_vocab = md.get_output_embeddings().weight.shape[0]
+    except Exception: actual_vocab = None
+    if actual_vocab:
         md.config.vocab_size = actual_vocab
+        md.generation_config.vocab_size = actual_vocab
     else:
+        vs = getattr(tok, "vocab_size", len(tok) if hasattr(tok, "__len__") else 64000)
         md.config.vocab_size = vs
+        md.generation_config.vocab_size = vs
+    if not hasattr(md.config, "get_text_config"):
+        md.config.get_text_config = types.MethodType(lambda self: self, md.config)
 _ensure_vocab_consistency(model, tokenizer)
 for obj in (model.config, model.generation_config):
     try: setattr(obj, "use_cache", False)
+    except: pass
+# Processor
 ip = IndicProcessor(inference=True)
 # -------- Inference --------
 @torch.inference_mode()
 def _translate_to_lang(text, tgt_code, num_beams, max_new_tokens, temperature, top_p, top_k):
     batch = ip.preprocess_batch([text], src_lang=SRC_CODE, tgt_lang=tgt_code)
+    enc = tokenizer(batch, max_length=256, truncation=True, padding="longest",
+                    return_tensors="pt").to(device)
+    do_sample = (temperature and float(temperature) > 0)
     out = model.generate(
         **enc,
         max_new_tokens=int(max_new_tokens),
         temperature=float(temperature) if do_sample else None,
         top_p=float(top_p) if do_sample else None,
         top_k=int(top_k) if do_sample else None,
+        use_cache=False,
     )
+    decoded = tokenizer.batch_decode(out, skip_special_tokens=True)
     final = ip.postprocess_batch(decoded, lang=tgt_code)
     return final[0].strip()
 def translate_dual(text, num_beams, max_new_tokens, temperature, top_p, top_k):
+    text = text.strip()
     if not text: return "", ""
     try:
         hi = _translate_to_lang(text, HI_CODE, num_beams, max_new_tokens, temperature, top_p, top_k)
     except Exception as e:
+        hi = f"⚠️ Hindi error: {type(e).__name__}: {str(e).splitlines()[-1]}"
     try:
         te = _translate_to_lang(text, TE_CODE, num_beams, max_new_tokens, temperature, top_p, top_k)
     except Exception as e:
+        te = f"⚠️ Telugu error: {type(e).__name__}: {str(e).splitlines()[-1]}"
     return hi, te
+# -------- Theme & Styling --------
+THEME = gr.themes.Base(
+    primary_hue="blue", neutral_hue="slate"
+).set(
+    body_background_fill="#f9fafb",
+    body_text_color="#111827",
+    block_background_fill="#ffffff",
+    block_border_color="#e5e7eb",
+    block_title_text_color="#111827",
+    button_primary_background_fill="#2563eb",
+    button_primary_text_color="#ffffff"
 )
 CUSTOM_CSS = """
+#hdr {
+  text-align:center; padding:16px; margin-bottom:16px;
 }
+#hdr h1 { font-size:24px; font-weight:700; margin:0; color:#111827; }
+#hdr p { font-size:14px; color:#6b7280; margin:4px 0 0; }
+.panel {
+  border:1px solid #e5e7eb; border-radius:12px;
+  background:white; box-shadow:0 1px 3px rgba(0,0,0,0.08);
+  padding:12px; display:flex; flex-direction:column;
 }
+.panel h2 {
+  font-size:16px; font-weight:600; margin-bottom:8px; color:#374151;
 }
+textarea {
+  font-size:15px !important; line-height:1.55 !important;
+  padding:10px 12px !important;
+  border:1px solid #d1d5db !important; border-radius:8px !important;
 }
+button { font-weight:600 !important; border-radius:8px !important; }
+button:hover { opacity:0.9; transition:opacity 0.2s; }
 """
+# -------- UI --------
+with gr.Blocks(theme=THEME, css=CUSTOM_CSS, title="EN → Hindi / Telugu Translator") as demo:
     with gr.Group(elem_id="hdr"):
+        gr.Markdown("<h1>English → Hindi & Telugu Translator</h1>")
+        gr.Markdown("<p>Powered by IndicTrans2 · law-ai/InLegalTrans-En2Indic-1B</p>")
+    with gr.Row():
+        # Input Column
+        with gr.Column(scale=2):
+            with gr.Group(elem_classes="panel"):
+                gr.Markdown("<h2>English Input</h2>")
+                src = gr.Textbox(placeholder="Type English text...", lines=12, show_label=False)
+            with gr.Row():
+                translate_btn = gr.Button("👉 Translate", variant="primary")
+                clear_btn     = gr.Button("Clear", variant="secondary")
+        # Output Column
+        with gr.Column(scale=2):
+            with gr.Group(elem_classes="panel"):
+                gr.Markdown("<h2>Hindi Translation</h2>")
+                hi_out = gr.Textbox(lines=6, show_copy_button=True, show_label=False)
+            with gr.Group(elem_classes="panel"):
+                gr.Markdown("<h2>Telugu Translation</h2>")
+                te_out = gr.Textbox(lines=6, show_copy_button=True, show_label=False)
+        # Settings Column
+        with gr.Column(scale=1):
+            with gr.Group(elem_classes="panel"):
+                gr.Markdown("<h2>Advanced Settings</h2>")
+                num_beams   = gr.Slider(1, 8, value=4, step=1, label="Num Beams")
+                max_new     = gr.Slider(16, 512, value=128, step=8, label="Max Tokens")
+                temperature = gr.Slider(0.0, 1.5, value=0.0, step=0.05, label="Temperature")
                 top_p       = gr.Slider(0.0, 1.0, value=1.0, step=0.01, label="Top-p")
                 top_k       = gr.Slider(0, 100, value=50, step=1, label="Top-k")
     # Wiring
+    translate_btn.click(translate_dual, inputs=[src, num_beams, max_new, temperature, top_p, top_k],
+                        outputs=[hi_out, te_out])
     clear_btn.click(lambda: ("", "", ""), outputs=[src, hi_out, te_out])
 demo.queue(max_size=48).launch()