Spaces:

anisgtboi
/

my-dialect-translator-app

Sleeping

App Files Files Community

anisgtboi commited on Sep 1

Commit

b08e390

verified ·

1 Parent(s): d3a831e

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -184

app.py CHANGED Viewed

@@ -1,206 +1,103 @@
 # app.py
-# English -> Bengali translation (facebook/nllb-200-distilled-600M) + FLUX.1 [schnell] image generation
 import os
 import re
-import traceback
 import random
 import torch
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-# Diffusers + FluxPipeline
-try:
-    from diffusers import FluxPipeline
-    _FLUX_AVAILABLE = True
-except Exception:
-    FluxPipeline = None
-    _FLUX_AVAILABLE = False
-# -------- Configuration --------
-TRANSLATION_MODEL = os.environ.get("TRANSLATION_MODEL", "facebook/nllb-200-distilled-600M")
-SRC_LANG = os.environ.get("SRC_LANG", "eng_Latn")
-TGT_LANG = os.environ.get("TGT_LANG", "ben_Beng")
-MAX_LENGTH = int(os.environ.get("MAX_LENGTH", "512"))
-FLUX_MODEL_ID = os.environ.get("FLUX_MODEL_ID", "black-forest-labs/FLUX.1-schnell")
-DEFAULT_IMAGE_STEPS = int(os.environ.get("DEFAULT_IMAGE_STEPS", "2"))
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# -------- Globals --------
 _translation_tokenizer = None
 _translation_model = None
-_flux_pipe = None
-# -------- Helpers: translation --------
-def split_into_sentences(text: str):
-    if not text:
-        return []
-    sentences = re.split(r'(?<=[.!?])\s+', text.strip())
-    return [s.strip() for s in sentences if s.strip()]
 def load_translation_model():
     global _translation_tokenizer, _translation_model
     if _translation_tokenizer is None or _translation_model is None:
-        try:
-            print(f"Loading translation model {TRANSLATION_MODEL} on {DEVICE}...")
-            _translation_tokenizer = AutoTokenizer.from_pretrained(TRANSLATION_MODEL, use_fast=False)
-            _translation_model = AutoModelForSeq2SeqLM.from_pretrained(TRANSLATION_MODEL).to(DEVICE)
-            print("Translation model loaded.")
-        except Exception as e:
-            _translation_tokenizer, _translation_model = None, None
-            raise RuntimeError(f"Failed to load translation model: {e}")
     return _translation_tokenizer, _translation_model
 def _get_forced_bos_token_id(tokenizer):
     try:
-        if hasattr(tokenizer, "lang_code_to_id") and isinstance(tokenizer.lang_code_to_id, dict):
-            if TGT_LANG in tokenizer.lang_code_to_id:
-                return tokenizer.lang_code_to_id[TGT_LANG]
-    except Exception:
-        pass
-    try:
-        tid = tokenizer.convert_tokens_to_ids(TGT_LANG)
-        if tid is not None and tid != tokenizer.unk_token_id:
-            return tid
-    except Exception:
-        pass
-    try:
-        candidate = f"<2{TGT_LANG}>"
-        tid = tokenizer.convert_tokens_to_ids(candidate)
-        if tid is not None and tid != tokenizer.unk_token_id:
-            return tid
-    except Exception:
-        pass
-    return None
-def translate_text(text: str, max_length: int = MAX_LENGTH):
-    if not text or not text.strip():
-        return ""
-    try:
-        tokenizer, model = load_translation_model()
-    except Exception as e:
-        tb = traceback.format_exc()
-        return f"Model load error: {e}\n{tb}"
     sentences = split_into_sentences(text)
     translations = []
-    forced_bos = _get_forced_bos_token_id(tokenizer)
     for s in sentences:
-        if not s:
-            continue
-        try:
-            src_prefixed = f"{SRC_LANG} {s}"
-            inputs = tokenizer(src_prefixed, return_tensors="pt", truncation=True, max_length=max_length)
-            inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
-            gen_kwargs = dict(max_length=max_length + 64, num_beams=5, early_stopping=True)
-            if forced_bos is not None:
-                gen_kwargs["forced_bos_token_id"] = int(forced_bos)
-            elif getattr(model.config, "forced_bos_token_id", None) is not None:
-                gen_kwargs["forced_bos_token_id"] = int(model.config.forced_bos_token_id)
-            generated_tokens = model.generate(**inputs, **gen_kwargs)
-            decoded = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
-            if decoded.startswith(TGT_LANG):
-                decoded = decoded[len(TGT_LANG):].strip()
-            translations.append(decoded)
-        except Exception as e:
-            translations.append(f"[Error translating sentence: {e}]")
     return " ".join(translations)
-# -------- FLUX.1 Schnell image generation --------
-def load_flux_model(model_id: str = FLUX_MODEL_ID):
-    global _flux_pipe
-    if not _FLUX_AVAILABLE:
-        raise RuntimeError("FluxPipeline (diffusers) not available. Install a diffusers version that provides FluxPipeline.")
-    if _flux_pipe is None:
-        try:
-            # prefer bfloat16 on supported hardware for memory efficiency
-            dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else (torch.float16 if torch.cuda.is_available() else torch.float32)
-            print(f"Loading FLUX model {model_id} (dtype={dtype}) on {DEVICE}...")
-            _flux_pipe = FluxPipeline.from_pretrained(model_id, torch_dtype=dtype)
-            # Try enabling model CPU offload if available (reduces VRAM peak)
-            try:
-                _flux_pipe.enable_model_cpu_offload()
-            except Exception:
-                pass
-            # Move pipeline to device when appropriate (some pipelines handle devices internally)
-            try:
-                _flux_pipe.to(DEVICE)
-            except Exception:
-                pass
-            print("Flux model loaded.")
-        except Exception as e:
-            _flux_pipe = None
-            raise RuntimeError(f"Failed to load FLUX model: {e}")
-    return _flux_pipe
-def generate_flux_image(prompt: str, num_inference_steps: int = DEFAULT_IMAGE_STEPS, guidance_scale: float = 0.0):
-    if not prompt or not prompt.strip():
-        return None, "Please enter an image prompt."
-    if not _FLUX_AVAILABLE:
-        return None, "FluxPipeline is not available: install appropriate diffusers package to enable FLUX."
-    try:
-        pipe = load_flux_model()
-        # Use CPU generator by default for reproducibility; Flux pipeline will handle device placement
-        seed = random.randint(0, 2**31 - 1)
-        generator = torch.Generator(device="cpu").manual_seed(seed)
-        out = pipe(
-            prompt=prompt,
-            num_inference_steps=int(num_inference_steps),
-            guidance_scale=float(guidance_scale),
-            generator=generator,
-        )
-        image = out.images[0]
-        return image, f"FLUX.1 Schnell generated (seed={seed}) steps={num_inference_steps} guidance={guidance_scale}"
-    except Exception as e:
-        tb = traceback.format_exc()
-        return None, f"Error generating image: {e}\n{tb}"
-# -------- Gradio UI (no microphone / no speech) --------
 css = """
 .gradio-container { max-width: 1100px !important; }
-.header { text-align: center; padding: 12px; border-radius: 8px; color: white; background: linear-gradient(90deg,#2563eb,#7c3aed); }
 """
-with gr.Blocks(title="NLLB → Bengali + FLUX.1 Schnell", css=css) as demo:
-    gr.Markdown("""<div class='header'><h2>Translation (NLLB) + Fast Image Generation (FLUX.1 Schnell)</h2></div>""")
     with gr.Tabs():
         with gr.TabItem("Translate"):
             with gr.Row():
                 with gr.Column(scale=6):
                     input_text = gr.Textbox(lines=6, label="English Text", placeholder="Type English text here...")
                     with gr.Row():
-                        quick_1 = gr.Button("Hello, how are you?")
-                        quick_2 = gr.Button("Thank you very much.")
-                        quick_3 = gr.Button("The weather is nice today.")
                     translate_btn = gr.Button("Translate")
                 with gr.Column(scale=6):
                     output_text = gr.Textbox(lines=6, label="Bengali Translation", interactive=False)
@@ -211,30 +108,24 @@ with gr.Blocks(title="NLLB → Bengali + FLUX.1 Schnell", css=css) as demo:
                 with gr.Column(scale=6):
                     image_prompt = gr.Textbox(lines=4, label="Image Prompt", placeholder="Describe the image to generate...")
                     with gr.Row():
-                        generate_btn = gr.Button("Generate Image (FLUX)")
                         clear_btn = gr.Button("Clear")
-                    steps_slider = gr.Slider(minimum=1, maximum=8, step=1, value=DEFAULT_IMAGE_STEPS, label="Inference Steps (1-4 recommended)")
-                    guidance_slider = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=0.0, label="Guidance Scale (Schnell often uses low guidance)")
                 with gr.Column(scale=6):
                     output_image = gr.Image(label="Generated Image")
                     status = gr.Textbox(label="Status", interactive=False)
-    gr.Markdown("---")
-    gr.Markdown("*Notes: FLUX.1 [schnell] is designed for very low-step generation. A GPU with sufficient VRAM is strongly recommended. If you cannot run locally, consider a hosted API.*")
-    # Bind events
-    def _use_translation(t):
-        return t
-    quick_1.click(fn=lambda: "Hello, how are you?", inputs=None, outputs=input_text)
-    quick_2.click(fn=lambda: "Thank you very much.", inputs=None, outputs=input_text)
-    quick_3.click(fn=lambda: "The weather is nice today.", inputs=None, outputs=input_text)
     translate_btn.click(fn=translate_text, inputs=input_text, outputs=output_text)
-    use_for_image.click(fn=_use_translation, inputs=output_text, outputs=image_prompt)
-    generate_btn.click(fn=generate_flux_image, inputs=[image_prompt, steps_slider, guidance_slider], outputs=[output_image, status])
     clear_btn.click(fn=lambda: ["", None, ""], inputs=None, outputs=[image_prompt, output_image, status])
-if __name__ == '__main__':
-    demo.launch(server_name='0.0.0.0', server_port=int(os.environ.get('PORT', 7860)))

 # app.py
 import os
 import re
 import random
 import torch
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+# -------- Translation: Facebook NLLB --------
+TRANSLATION_MODEL = "facebook/nllb-200-distilled-600M"
+SRC_LANG = "eng_Latn"
+TGT_LANG = "ben_Beng"
+MAX_LENGTH = 512
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 _translation_tokenizer = None
 _translation_model = None
 def load_translation_model():
     global _translation_tokenizer, _translation_model
     if _translation_tokenizer is None or _translation_model is None:
+        _translation_tokenizer = AutoTokenizer.from_pretrained(TRANSLATION_MODEL, use_fast=False)
+        _translation_model = AutoModelForSeq2SeqLM.from_pretrained(TRANSLATION_MODEL).to(DEVICE)
     return _translation_tokenizer, _translation_model
 def _get_forced_bos_token_id(tokenizer):
+    if hasattr(tokenizer, "lang_code_to_id") and TGT_LANG in tokenizer.lang_code_to_id:
+        return tokenizer.lang_code_to_id[TGT_LANG]
     try:
+        return tokenizer.convert_tokens_to_ids(TGT_LANG)
+    except:
+        return None
+def split_into_sentences(text: str):
+    if not text: return []
+    return [s.strip() for s in re.split(r'(?<=[.!?])\s+', text.strip()) if s.strip()]
+def translate_text(text: str):
+    if not text or not text.strip(): return ""
+    tokenizer, model = load_translation_model()
+    forced_bos = _get_forced_bos_token_id(tokenizer)
     sentences = split_into_sentences(text)
     translations = []
     for s in sentences:
+        inputs = tokenizer(f"{SRC_LANG} {s}", return_tensors="pt", truncation=True, max_length=MAX_LENGTH)
+        inputs = {k:v.to(DEVICE) for k,v in inputs.items()}
+        gen_kwargs = dict(max_length=MAX_LENGTH+64, num_beams=5, early_stopping=True)
+        if forced_bos is not None: gen_kwargs["forced_bos_token_id"] = int(forced_bos)
+        generated_tokens = model.generate(**inputs, **gen_kwargs)
+        decoded = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+        if decoded.startswith(TGT_LANG): decoded = decoded[len(TGT_LANG):].strip()
+        translations.append(decoded)
     return " ".join(translations)
+# -------- Image Generation: SANA-Sprint 0.6B --------
+from diffusers import DiffusionPipeline
+SANA_MODEL = "Efficient-Large-Model/Sana_Sprint_0.6B_1024px_diffusers"
+DEFAULT_STEPS = 1
+DEFAULT_GUIDANCE = 1.0
+_sana_pipe = None
+def load_sana():
+    global _sana_pipe
+    if _sana_pipe is None:
+        dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else \
+                (torch.float16 if torch.cuda.is_available() else torch.float32)
+        _sana_pipe = DiffusionPipeline.from_pretrained(SANA_MODEL, torch_dtype=dtype)
+        try: _sana_pipe.enable_model_cpu_offload()
+        except: pass
+        _sana_pipe = _sana_pipe.to(DEVICE)
+    return _sana_pipe
+def generate_sana_image(prompt: str, steps: int = DEFAULT_STEPS, guidance: float = DEFAULT_GUIDANCE, seed: int = None):
+    if not prompt.strip(): return None, "Please enter an image prompt."
+    pipe = load_sana()
+    if seed is None: seed = random.randint(0, 2**31-1)
+    gen = torch.Generator(device=DEVICE).manual_seed(seed) if DEVICE.type=="cuda" else torch.Generator().manual_seed(seed)
+    out = pipe(prompt, num_inference_steps=int(steps), guidance_scale=float(guidance), generator=gen)
+    return out.images[0], f"SANA-Sprint generated (seed={seed}) steps={steps} guidance={guidance}"
+# -------- Gradio App --------
 css = """
 .gradio-container { max-width: 1100px !important; }
+.header { text-align:center; padding:12px; border-radius:8px; color:white; background:linear-gradient(90deg,#2563eb,#7c3aed); }
 """
+with gr.Blocks(title="NLLB → Bengali + SANA-Sprint", css=css) as demo:
+    gr.Markdown("<div class='header'><h2>English → Bengali Translation + Fast Image Generation (SANA-Sprint)</h2></div>")
     with gr.Tabs():
         with gr.TabItem("Translate"):
             with gr.Row():
                 with gr.Column(scale=6):
                     input_text = gr.Textbox(lines=6, label="English Text", placeholder="Type English text here...")
                     with gr.Row():
+                        quick1 = gr.Button("Hello, how are you?")
+                        quick2 = gr.Button("Thank you very much.")
+                        quick3 = gr.Button("The weather is nice today.")
                     translate_btn = gr.Button("Translate")
                 with gr.Column(scale=6):
                     output_text = gr.Textbox(lines=6, label="Bengali Translation", interactive=False)
                 with gr.Column(scale=6):
                     image_prompt = gr.Textbox(lines=4, label="Image Prompt", placeholder="Describe the image to generate...")
                     with gr.Row():
+                        generate_btn = gr.Button("Generate Image (SANA)")
                         clear_btn = gr.Button("Clear")
+                    steps_slider = gr.Slider(minimum=1, maximum=4, step=1, value=DEFAULT_STEPS, label="Inference Steps (1-4 fastest)")
+                    guidance_slider = gr.Slider(minimum=0.0, maximum=5.0, step=0.5, value=DEFAULT_GUIDANCE, label="Guidance Scale")
                 with gr.Column(scale=6):
                     output_image = gr.Image(label="Generated Image")
                     status = gr.Textbox(label="Status", interactive=False)
+    # Quick phrase events
+    quick1.click(fn=lambda: "Hello, how are you?", inputs=None, outputs=input_text)
+    quick2.click(fn=lambda: "Thank you very much.", inputs=None, outputs=input_text)
+    quick3.click(fn=lambda: "The weather is nice today.", inputs=None, outputs=input_text)
     translate_btn.click(fn=translate_text, inputs=input_text, outputs=output_text)
+    use_for_image.click(fn=lambda x: x, inputs=output_text, outputs=image_prompt)
+    generate_btn.click(fn=generate_sana_image, inputs=[image_prompt, steps_slider, guidance_slider], outputs=[output_image, status])
     clear_btn.click(fn=lambda: ["", None, ""], inputs=None, outputs=[image_prompt, output_image, status])
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))