Spaces:

Hajano
/

Sindhi-Voice-Chatbot

Sleeping

App Files Files Community

Sindhi-Voice-Chatbot / app.py

Hajano

Update app.py

e0fbbfe verified 11 days ago

raw

history blame contribute delete

3.97 kB

	import torch
	from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
	import gradio as gr
	import re

	# ---------------------------------------------------------
	# 1️⃣ Load Model
	# ---------------------------------------------------------
	torch.set_num_threads(1)
	print("🤖 Starting Model Loading...")

	try:
	# Use small version for faster inference and better generalization
	MODEL_NAME = "google/mt5-small"
	TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME)
	CHAT_MODEL = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
	print("✅ Model loaded successfully.")
	except Exception as e:
	print(f"❌ Model loading failed: {e}")
	TOKENIZER, CHAT_MODEL = None, None


	# ---------------------------------------------------------
	# 2️⃣ Detect Script (Sindhi or Roman Sindhi)
	# ---------------------------------------------------------
	def detect_script(text):
	"""Detect whether text is Sindhi (Arabic script) or Roman Sindhi (Latin script)."""
	if re.search(r'[\u0600-\u06FF]', text):
	return "sindhi"
	else:
	return "roman"


	# ---------------------------------------------------------
	# 3️⃣ Chat Function
	# ---------------------------------------------------------
	def generate_reply(user_text):
	"""Generate Sindhi or Roman Sindhi reply based on input language."""
	if not user_text.strip():
	return "مهرباني ڪري ڪجھ لکو." # Please type something.

	script_type = detect_script(user_text)

	# Improved instruction-style prompts
	if script_type == "sindhi":
	prompt = f"توھان جو ڪم ھي آھي ته ھيٺين جملي جو جواب سنڌي ۾ ڏيو:\nسوال: {user_text}\nجواب:"
	else:
	prompt = f"Tuhanjo kaam aahe ta neeche likhe sawal jo jawab Roman Sindhi mein likho:\nSawaal: {user_text}\nJawab:"

	inputs = TOKENIZER(prompt, return_tensors="pt", truncation=True)
	outputs = CHAT_MODEL.generate(
	**inputs,
	max_new_tokens=100,
	do_sample=True,
	top_p=0.95,
	top_k=40,
	temperature=0.8
	)

	reply = TOKENIZER.decode(outputs[0], skip_special_tokens=True)

	# Remove unwanted tokens (like <extra_id_0>, etc.)
	reply = re.sub(r"<.*?>", "", reply).strip()
	return reply


	# ---------------------------------------------------------
	# 4️⃣ Gradio Interface
	# ---------------------------------------------------------
	with gr.Blocks(title="Sindhi Text Chatbot") as app:
	gr.Markdown(
	"""
	# 💬 سنڌي چيٽ بوٽ (Text Only)
	✍️ رومن سنڌي يا سنڌي ۾ لکو — ۽ بوٽ اوهان کي ساڳئي ٻولي ۾ جواب ڏيندو.
	Example:
	- Sindhi: توھان جو نالو ڇا آهي؟
	- Roman Sindhi: Tuhanjo naalo chaa aahe?
	"""
	)

	with gr.Row():
	with gr.Column(scale=1):
	user_input = gr.Textbox(
	label="✍️ پنهنجو سوال لکو \| Type your message (Sindhi or Roman Sindhi)",
	lines=3,
	placeholder="مثال طور: توھان جو نالو ڇا آهي؟ يا Tuhanjo naalo chaa aahe?"
	)
	send_btn = gr.Button("Send", variant="primary")

	with gr.Column(scale=2):
	reply_output = gr.Textbox(
	label="💬 AI جو جواب \| AI’s Reply",
	lines=5
	)
	clear_btn = gr.Button("Clear")

	# Main button action
	send_btn.click(
	fn=generate_reply,
	inputs=[user_input],
	outputs=[reply_output]
	)

	# Clear all fields
	def clear_all():
	return "", ""

	clear_btn.click(
	fn=clear_all,
	inputs=[],
	outputs=[user_input, reply_output]
	)


	# ---------------------------------------------------------
	# 5️⃣ Launch App
	# ---------------------------------------------------------
	if __name__ == "__main__":
	app.launch()