Spaces:
Sleeping
Sleeping
| import torch | |
| from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
| import gradio as gr | |
| import re | |
| # --------------------------------------------------------- | |
| # 1️⃣ Load Model | |
| # --------------------------------------------------------- | |
| torch.set_num_threads(1) | |
| print("🤖 Starting Model Loading...") | |
| try: | |
| # Use small version for faster inference and better generalization | |
| MODEL_NAME = "google/mt5-small" | |
| TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| CHAT_MODEL = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME) | |
| print("✅ Model loaded successfully.") | |
| except Exception as e: | |
| print(f"❌ Model loading failed: {e}") | |
| TOKENIZER, CHAT_MODEL = None, None | |
| # --------------------------------------------------------- | |
| # 2️⃣ Detect Script (Sindhi or Roman Sindhi) | |
| # --------------------------------------------------------- | |
| def detect_script(text): | |
| """Detect whether text is Sindhi (Arabic script) or Roman Sindhi (Latin script).""" | |
| if re.search(r'[\u0600-\u06FF]', text): | |
| return "sindhi" | |
| else: | |
| return "roman" | |
| # --------------------------------------------------------- | |
| # 3️⃣ Chat Function | |
| # --------------------------------------------------------- | |
| def generate_reply(user_text): | |
| """Generate Sindhi or Roman Sindhi reply based on input language.""" | |
| if not user_text.strip(): | |
| return "مهرباني ڪري ڪجھ لکو." # Please type something. | |
| script_type = detect_script(user_text) | |
| # Improved instruction-style prompts | |
| if script_type == "sindhi": | |
| prompt = f"توھان جو ڪم ھي آھي ته ھيٺين جملي جو جواب سنڌي ۾ ڏيو:\nسوال: {user_text}\nجواب:" | |
| else: | |
| prompt = f"Tuhanjo kaam aahe ta neeche likhe sawal jo jawab Roman Sindhi mein likho:\nSawaal: {user_text}\nJawab:" | |
| inputs = TOKENIZER(prompt, return_tensors="pt", truncation=True) | |
| outputs = CHAT_MODEL.generate( | |
| **inputs, | |
| max_new_tokens=100, | |
| do_sample=True, | |
| top_p=0.95, | |
| top_k=40, | |
| temperature=0.8 | |
| ) | |
| reply = TOKENIZER.decode(outputs[0], skip_special_tokens=True) | |
| # Remove unwanted tokens (like <extra_id_0>, etc.) | |
| reply = re.sub(r"<.*?>", "", reply).strip() | |
| return reply | |
| # --------------------------------------------------------- | |
| # 4️⃣ Gradio Interface | |
| # --------------------------------------------------------- | |
| with gr.Blocks(title="Sindhi Text Chatbot") as app: | |
| gr.Markdown( | |
| """ | |
| # 💬 سنڌي چيٽ بوٽ (Text Only) | |
| ✍️ رومن سنڌي يا سنڌي ۾ لکو — ۽ بوٽ اوهان کي ساڳئي ٻولي ۾ جواب ڏيندو. | |
| **Example:** | |
| - Sindhi: توھان جو نالو ڇا آهي؟ | |
| - Roman Sindhi: Tuhanjo naalo chaa aahe? | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| user_input = gr.Textbox( | |
| label="✍️ پنهنجو سوال لکو | Type your message (Sindhi or Roman Sindhi)", | |
| lines=3, | |
| placeholder="مثال طور: توھان جو نالو ڇا آهي؟ يا Tuhanjo naalo chaa aahe?" | |
| ) | |
| send_btn = gr.Button("Send", variant="primary") | |
| with gr.Column(scale=2): | |
| reply_output = gr.Textbox( | |
| label="💬 AI جو جواب | AI’s Reply", | |
| lines=5 | |
| ) | |
| clear_btn = gr.Button("Clear") | |
| # Main button action | |
| send_btn.click( | |
| fn=generate_reply, | |
| inputs=[user_input], | |
| outputs=[reply_output] | |
| ) | |
| # Clear all fields | |
| def clear_all(): | |
| return "", "" | |
| clear_btn.click( | |
| fn=clear_all, | |
| inputs=[], | |
| outputs=[user_input, reply_output] | |
| ) | |
| # --------------------------------------------------------- | |
| # 5️⃣ Launch App | |
| # --------------------------------------------------------- | |
| if __name__ == "__main__": | |
| app.launch() |