Update app.py
Browse files
app.py
CHANGED
|
@@ -78,7 +78,7 @@ def chat_llama3_8b(message: str,
|
|
| 78 |
print(tokenizer.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False))
|
| 79 |
print(input_ids)
|
| 80 |
|
| 81 |
-
streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=
|
| 82 |
|
| 83 |
generate_kwargs = dict(
|
| 84 |
input_ids= input_ids,
|
|
@@ -97,10 +97,11 @@ def chat_llama3_8b(message: str,
|
|
| 97 |
|
| 98 |
outputs = []
|
| 99 |
for text in streamer:
|
| 100 |
-
#
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
|
|
|
| 104 |
outputs.append(text)
|
| 105 |
print("".join(outputs))
|
| 106 |
yield "".join(outputs)
|
|
|
|
| 78 |
print(tokenizer.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False))
|
| 79 |
print(input_ids)
|
| 80 |
|
| 81 |
+
streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
|
| 82 |
|
| 83 |
generate_kwargs = dict(
|
| 84 |
input_ids= input_ids,
|
|
|
|
| 97 |
|
| 98 |
outputs = []
|
| 99 |
for text in streamer:
|
| 100 |
+
# Remove thinking tags to prevent Gradio display issues
|
| 101 |
+
if "<think>" in text:
|
| 102 |
+
text = text.replace("[think]", "").strip()
|
| 103 |
+
if "</think>" in text:
|
| 104 |
+
text = text.replace("[/think]", "").strip()
|
| 105 |
outputs.append(text)
|
| 106 |
print("".join(outputs))
|
| 107 |
yield "".join(outputs)
|