Spaces:

uoda5t
/

test2

Sleeping

App Files Files Community

John Smith commited on Jul 3, 2024

Commit

4dbb3f0

verified ·

1 Parent(s): 5b39830

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -19

app.py CHANGED Viewed

@@ -2,35 +2,49 @@ import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-# Load model and tokenizer
 model_name = "cognitivecomputations/TinyDolphin-2.8-1.1b"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
-def generate_response(message, history):
-    # Format the input with chat history
-    prompt = "".join([f"Human: {h[0]}\nAssistant: {h[1]}\n" for h in history])
-    prompt += f"Human: {message}\nAssistant:"
     # Tokenize and generate
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    outputs = model.generate(**inputs, max_new_tokens=1000, temperature=0.7, do_sample=True)
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Extract only the assistant's response
-    assistant_response = response.split("Assistant:")[-1].strip()
-    return assistant_response
 # Create the Gradio interface
 iface = gr.ChatInterface(
     generate_response,
-    title="TinyDolphin-2.8-1.1b Chat Interface",
-    description="Chat with the TinyDolphin-2.8-1.1b model. Type your message and press Enter.",
-    examples=[
-        "What is the capital of France?",
-        "Explain quantum computing in simple terms.",
-        "Write a short poem about artificial intelligence."
-    ],
     cache_examples=False,
 )

 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+# Load the model and tokenizer
 model_name = "cognitivecomputations/TinyDolphin-2.8-1.1b"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name)
+# Move model to GPU if available
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+def generate_response(message, chat_history):
+    # Prepare the input
+    chat_history_text = ""
+    for turn in chat_history:
+        chat_history_text += f"Human: {turn[0]}\nAI: {turn[1]}\n"
+    prompt = f"{chat_history_text}Human: {message}\nAI:"
     # Tokenize and generate
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)
+    outputs = model.generate(
+        **inputs,
+        max_new_tokens=100,
+        temperature=0.7,
+        top_p=0.9,
+        do_sample=True
+    )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract only the AI's response
+    ai_response = response.split("AI:")[-1].strip()
+    return ai_response
 # Create the Gradio interface
 iface = gr.ChatInterface(
     generate_response,
+    chatbot=gr.Chatbot(height=300),
+    textbox=gr.Textbox(placeholder="Type your message here...", container=False, scale=7),
+    title="TinyDolphin-2.8-1.1b Chatbot",
+    description="Chat with the TinyDolphin-2.8-1.1b model.",
+    theme="soft",
+    examples=["Tell me a short story", "What's the capital of France?", "Explain quantum computing"],
     cache_examples=False,
 )