import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline MODEL = "prithivMLmods/Llama-SmolTalk-3.2-1B-Instruct" tokenizer = AutoTokenizer.from_pretrained(MODEL) model = AutoModelForCausalLM.from_pretrained(MODEL, device_map="auto") chatbot = pipeline( "text-generation", model=model, tokenizer=tokenizer, device_map="auto", ) system_prompt = "Tu es Aria, une IA bienveillante et polie qui répond de façon concise et claire." def chat(message, history=[]): context = "\n".join([f"Utilisateur: {m[0]}\nAria: {m[1]}" for m in history]) prompt = f"{system_prompt}\n{context}\nUtilisateur: {message}\nAria:" resp = chatbot(prompt, max_new_tokens=150, do_sample=True, temperature=0.7)[0]["generated_text"] reply = resp.split("Aria:")[-1].strip() history.append([message, reply]) return history, history with gr.Blocks() as demo: chat_ui = gr.Chatbot() msg = gr.Textbox(placeholder="Écris un message...") msg.submit(chat, [msg, chat_ui], [chat_ui]) demo.launch()