Hajano's picture
Update app.py
e0fbbfe verified
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import gradio as gr
import re
# ---------------------------------------------------------
# 1️⃣ Load Model
# ---------------------------------------------------------
torch.set_num_threads(1)
print("🤖 Starting Model Loading...")
try:
# Use small version for faster inference and better generalization
MODEL_NAME = "google/mt5-small"
TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME)
CHAT_MODEL = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
print("✅ Model loaded successfully.")
except Exception as e:
print(f"❌ Model loading failed: {e}")
TOKENIZER, CHAT_MODEL = None, None
# ---------------------------------------------------------
# 2️⃣ Detect Script (Sindhi or Roman Sindhi)
# ---------------------------------------------------------
def detect_script(text):
"""Detect whether text is Sindhi (Arabic script) or Roman Sindhi (Latin script)."""
if re.search(r'[\u0600-\u06FF]', text):
return "sindhi"
else:
return "roman"
# ---------------------------------------------------------
# 3️⃣ Chat Function
# ---------------------------------------------------------
def generate_reply(user_text):
"""Generate Sindhi or Roman Sindhi reply based on input language."""
if not user_text.strip():
return "مهرباني ڪري ڪجھ لکو." # Please type something.
script_type = detect_script(user_text)
# Improved instruction-style prompts
if script_type == "sindhi":
prompt = f"توھان جو ڪم ھي آھي ته ھيٺين جملي جو جواب سنڌي ۾ ڏيو:\nسوال: {user_text}\nجواب:"
else:
prompt = f"Tuhanjo kaam aahe ta neeche likhe sawal jo jawab Roman Sindhi mein likho:\nSawaal: {user_text}\nJawab:"
inputs = TOKENIZER(prompt, return_tensors="pt", truncation=True)
outputs = CHAT_MODEL.generate(
**inputs,
max_new_tokens=100,
do_sample=True,
top_p=0.95,
top_k=40,
temperature=0.8
)
reply = TOKENIZER.decode(outputs[0], skip_special_tokens=True)
# Remove unwanted tokens (like <extra_id_0>, etc.)
reply = re.sub(r"<.*?>", "", reply).strip()
return reply
# ---------------------------------------------------------
# 4️⃣ Gradio Interface
# ---------------------------------------------------------
with gr.Blocks(title="Sindhi Text Chatbot") as app:
gr.Markdown(
"""
# 💬 سنڌي چيٽ بوٽ (Text Only)
✍️ رومن سنڌي يا سنڌي ۾ لکو — ۽ بوٽ اوهان کي ساڳئي ٻولي ۾ جواب ڏيندو.
**Example:**
- Sindhi: توھان جو نالو ڇا آهي؟
- Roman Sindhi: Tuhanjo naalo chaa aahe?
"""
)
with gr.Row():
with gr.Column(scale=1):
user_input = gr.Textbox(
label="✍️ پنهنجو سوال لکو | Type your message (Sindhi or Roman Sindhi)",
lines=3,
placeholder="مثال طور: توھان جو نالو ڇا آهي؟ يا Tuhanjo naalo chaa aahe?"
)
send_btn = gr.Button("Send", variant="primary")
with gr.Column(scale=2):
reply_output = gr.Textbox(
label="💬 AI جو جواب | AI’s Reply",
lines=5
)
clear_btn = gr.Button("Clear")
# Main button action
send_btn.click(
fn=generate_reply,
inputs=[user_input],
outputs=[reply_output]
)
# Clear all fields
def clear_all():
return "", ""
clear_btn.click(
fn=clear_all,
inputs=[],
outputs=[user_input, reply_output]
)
# ---------------------------------------------------------
# 5️⃣ Launch App
# ---------------------------------------------------------
if __name__ == "__main__":
app.launch()