import torch from transformers import AutoModelForSeq2SeqLM, AutoTokenizer import gradio as gr import re # --------------------------------------------------------- # 1️⃣ Load Model # --------------------------------------------------------- torch.set_num_threads(1) print("🤖 Starting Model Loading...") try: # Use small version for faster inference and better generalization MODEL_NAME = "google/mt5-small" TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME) CHAT_MODEL = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME) print("✅ Model loaded successfully.") except Exception as e: print(f"❌ Model loading failed: {e}") TOKENIZER, CHAT_MODEL = None, None # --------------------------------------------------------- # 2️⃣ Detect Script (Sindhi or Roman Sindhi) # --------------------------------------------------------- def detect_script(text): """Detect whether text is Sindhi (Arabic script) or Roman Sindhi (Latin script).""" if re.search(r'[\u0600-\u06FF]', text): return "sindhi" else: return "roman" # --------------------------------------------------------- # 3️⃣ Chat Function # --------------------------------------------------------- def generate_reply(user_text): """Generate Sindhi or Roman Sindhi reply based on input language.""" if not user_text.strip(): return "مهرباني ڪري ڪجھ لکو." # Please type something. script_type = detect_script(user_text) # Improved instruction-style prompts if script_type == "sindhi": prompt = f"توھان جو ڪم ھي آھي ته ھيٺين جملي جو جواب سنڌي ۾ ڏيو:\nسوال: {user_text}\nجواب:" else: prompt = f"Tuhanjo kaam aahe ta neeche likhe sawal jo jawab Roman Sindhi mein likho:\nSawaal: {user_text}\nJawab:" inputs = TOKENIZER(prompt, return_tensors="pt", truncation=True) outputs = CHAT_MODEL.generate( **inputs, max_new_tokens=100, do_sample=True, top_p=0.95, top_k=40, temperature=0.8 ) reply = TOKENIZER.decode(outputs[0], skip_special_tokens=True) # Remove unwanted tokens (like , etc.) reply = re.sub(r"<.*?>", "", reply).strip() return reply # --------------------------------------------------------- # 4️⃣ Gradio Interface # --------------------------------------------------------- with gr.Blocks(title="Sindhi Text Chatbot") as app: gr.Markdown( """ # 💬 سنڌي چيٽ بوٽ (Text Only) ✍️ رومن سنڌي يا سنڌي ۾ لکو — ۽ بوٽ اوهان کي ساڳئي ٻولي ۾ جواب ڏيندو. **Example:** - Sindhi: توھان جو نالو ڇا آهي؟ - Roman Sindhi: Tuhanjo naalo chaa aahe? """ ) with gr.Row(): with gr.Column(scale=1): user_input = gr.Textbox( label="✍️ پنهنجو سوال لکو | Type your message (Sindhi or Roman Sindhi)", lines=3, placeholder="مثال طور: توھان جو نالو ڇا آهي؟ يا Tuhanjo naalo chaa aahe?" ) send_btn = gr.Button("Send", variant="primary") with gr.Column(scale=2): reply_output = gr.Textbox( label="💬 AI جو جواب | AI’s Reply", lines=5 ) clear_btn = gr.Button("Clear") # Main button action send_btn.click( fn=generate_reply, inputs=[user_input], outputs=[reply_output] ) # Clear all fields def clear_all(): return "", "" clear_btn.click( fn=clear_all, inputs=[], outputs=[user_input, reply_output] ) # --------------------------------------------------------- # 5️⃣ Launch App # --------------------------------------------------------- if __name__ == "__main__": app.launch()