Spaces:

anisgtboi
/

my-dialect-translator-app

Sleeping

App Files Files Community

my-dialect-translator-app / app.py

anisgtboi

Update app.py

ea5da31 verified 4 months ago

raw

history blame

7.98 kB

	# app.py
	import os
	import traceback
	import gradio as gr
	from typing import Tuple

	# Try to import transformers; if not available, the app will error and tell you to add requirements.
	try:
	from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
	except Exception as e:
	pipeline = None

	# Optional: Hugging Face hosted-inference fallback
	try:
	from huggingface_hub import InferenceApi
	except Exception:
	InferenceApi = None

	# ---------- CONFIG ----------
	# Lightweight models that work well on CPU / Spaces:
	MODEL_EN_TO_BN = "shhossain/opus-mt-en-to-bn" # small finetuned en -> bn (≈75M params)
	MODEL_BN_TO_EN = "Helsinki-NLP/opus-mt-bn-en" # bn -> en
	# If you prefer other model ids, change the strings above.

	# Language labels for UI
	DIRECTION_CHOICES = ["English → Bengali", "Bengali → English"]

	# ---------- GLOBALS ----------
	local_pipeline = None
	local_model_name = None
	use_api_fallback = False
	inference_client = None

	# ---------- HELPERS ----------
	def try_load_local(model_name: str) -> Tuple[bool, str]:
	"""Try to load a local transformers pipeline for translation.
	Returns (success, message)."""
	global local_pipeline, local_model_name, use_api_fallback
	if pipeline is None:
	return False, "transformers not installed (add to requirements.txt)"
	try:
	# Use the 'translation' pipeline (Marian / MarianMT based models)
	local_pipeline = pipeline("translation", model=model_name, device=-1, max_length=512)
	local_model_name = model_name
	use_api_fallback = False
	return True, f"Loaded local model: {model_name}"
	except Exception as e:
	use_api_fallback = True
	return False, f"Local load failed: {str(e)}"

	def try_init_inference_api(token_env="HF_API_TOKEN", model_name_fallback=None):
	"""Initialize huggingface_hub Inference API client if token present."""
	global inference_client, use_api_fallback
	token = os.environ.get(token_env)
	if not token:
	return False, "No HF_API_TOKEN found in env (set Space secret HF_API_TOKEN)"
	if InferenceApi is None:
	return False, "huggingface_hub not installed (add to requirements.txt)"
	try:
	inference_client = InferenceApi(repo_id=model_name_fallback or "facebook/nllb-200-distilled-600M", token=token)
	use_api_fallback = True
	return True, "Inference API client ready"
	except Exception as e:
	return False, f"Inference API init failed: {str(e)}"

	def translate_with_local(text: str):
	global local_pipeline
	if local_pipeline is None:
	raise RuntimeError("Local pipeline not loaded")
	out = local_pipeline(text, max_length=512)
	if isinstance(out, list) and len(out) > 0:
	# many Marian models use 'translation_text' or 'generated_text'
	res = out[0].get("translation_text") if isinstance(out[0], dict) else None
	if not res:
	# fallback to first value in dict
	if isinstance(out[0], dict):
	res = list(out[0].values())[0]
	return res or str(out)
	return str(out)

	def translate_with_api(text: str, model_name: str):
	global inference_client
	if inference_client is None:
	raise RuntimeError("Inference client not ready")
	# Note: the Inference API will run the model hosted on HF; for Marian models, you just pass the text.
	res = inference_client(inputs=text, parameters={})
	# API returns either list or dict; try to extract text
	if isinstance(res, list) and len(res) > 0:
	first = res[0]
	if isinstance(first, dict):
	return first.get("translation_text") or first.get("generated_text") or str(first)
	return str(first)
	if isinstance(res, dict):
	return res.get("translation_text") or res.get("generated_text") or str(res)
	return str(res)

	# ---------- ON START: try local load (best-effort) ----------
	# We'll pre-load both directions lazily on first use; try EN->BN by default
	_success, _msg = try_load_local(MODEL_EN_TO_BN)
	print("Model load attempt:", _success, _msg)

	# If local load failed, but user supplied HF_API_TOKEN in Secrets, init inference client as fallback
	if use_api_fallback:
	ok, msg = try_init_inference_api(model_name_fallback=MODEL_EN_TO_BN)
	print("Inference API init:", ok, msg)

	# ---------- TRANSLATION FUNCTION FOR UI ----------
	def translate_text(text: str, direction: str):
	"""Main translate function: returns (translation, status, analysis)"""
	if not text or not text.strip():
	return "", "Please type text to translate", ""
	try:
	model_name = MODEL_EN_TO_BN if direction == DIRECTION_CHOICES[0] else MODEL_BN_TO_EN

	# If local model not loaded or different than needed, try loading it
	global local_model_name
	if local_pipeline is None or local_model_name != model_name:
	ok, msg = try_load_local(model_name)
	print("Reload attempt:", ok, msg)
	# if local load failed, try to init API if token present
	if not ok and inference_client is None:
	ok2, msg2 = try_init_inference_api(model_name_fallback=model_name)
	print("Fallback init:", ok2, msg2)

	# If local available, use it
	if local_pipeline is not None and local_model_name == model_name:
	translated = translate_with_local(text)
	status = f"Local model used: {local_model_name}"
	else:
	# fallback to hosted inference
	if inference_client is None:
	return "", "No model available locally and no HF_API_TOKEN set for API fallback. Set HF_API_TOKEN in Space secrets.", ""
	translated = translate_with_api(text, model_name)
	status = f"Hosted Inference API used: {model_name}"

	# small "analysis" block: length, word count, suggestions
	words = len(text.split())
	analysis = f"Input words: {words}. Output length: {len(translated.split())} words."
	return translated, status, analysis

	except Exception as e:
	tb = traceback.format_exc()
	return "", f"Error: {str(e)}", tb

	# ---------- GRADIO APP UI ----------
	with gr.Blocks(title="English ↔ Bengali — Fast Translator") as demo:
	gr.Markdown("# English ↔ Bengali — Fast Translator")
	gr.Markdown(
	"Small, fast models (OPUS-MT) used for speed. If local loading fails the app will use the Hugging Face Inference API (requires HF_API_TOKEN set in Space secrets)."
	)

	with gr.Row():
	direction = gr.Radio(label="Direction", choices=DIRECTION_CHOICES, value=DIRECTION_CHOICES[0])
	swap = gr.Button("Swap")

	input_text = gr.Textbox(label="Input text", lines=4, placeholder="Type in English or Bengali...")
	translate_btn = gr.Button("Translate", variant="primary")

	with gr.Row():
	out_translation = gr.Textbox(label="Translation", lines=4)
	out_status = gr.Textbox(label="Status / Tips", lines=2)
	out_analysis = gr.Textbox(label="Analysis / Notes", lines=3)

	# examples
	with gr.Row():
	ex1 = gr.Button("Hello, how are you?")
	ex2 = gr.Button("Ami bhalo achi")
	ex3 = gr.Button("Where is the market?")

	# wiring
	def do_swap(cur):
	return DIRECTION_CHOICES[1] if cur == DIRECTION_CHOICES[0] else DIRECTION_CHOICES[0]
	swap.click(do_swap, inputs=direction, outputs=direction)

	translate_btn.click(translate_text, inputs=[input_text, direction], outputs=[out_translation, out_status, out_analysis])

	ex1.click(lambda: "Hello, how are you?", outputs=input_text)
	ex2.click(lambda: "Ami bhalo achi", outputs=input_text)
	ex3.click(lambda: "Where is the market?", outputs=input_text)

	gr.Markdown("---")
	gr.Markdown("If the app shows `No model available` error: go to Space Settings → Secrets and add `HF_API_TOKEN` (your Hugging Face token).")

	# Launch if run directly
	if __name__ == "__main__":
	demo.launch(debug=True)