Tobias Bergmann
commited on
Commit
·
668ee0d
1
Parent(s):
3a8892f
streaming
Browse files
app.py
CHANGED
|
@@ -29,34 +29,28 @@ pipe = Llama(
|
|
| 29 |
def predict(message: str, history: List[List[str]], max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS):
|
| 30 |
if not message:
|
| 31 |
return "", history
|
|
|
|
| 32 |
prompt = message
|
| 33 |
-
|
| 34 |
-
# Initialize reply
|
| 35 |
-
reply = ""
|
| 36 |
-
|
| 37 |
history.append([message, ""])
|
| 38 |
|
| 39 |
-
#
|
|
|
|
|
|
|
|
|
|
| 40 |
stream = pipe(
|
| 41 |
prompt,
|
| 42 |
-
max_tokens=max_new_tokens,
|
| 43 |
stop=["</s>"],
|
| 44 |
stream=True
|
| 45 |
)
|
| 46 |
-
|
| 47 |
for output in stream:
|
| 48 |
-
# This loop will receive partial output (one token at a time)
|
| 49 |
new_text = output['choices'][0]['text']
|
| 50 |
-
|
| 51 |
-
#
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
history[-1][1] = reply
|
| 56 |
-
|
| 57 |
-
# Yield for incremental display on chat
|
| 58 |
-
yield "", history
|
| 59 |
-
|
| 60 |
with gr.Blocks() as demo:
|
| 61 |
gr.Markdown(DESCRIPTION)
|
| 62 |
chatbot = gr.Chatbot()
|
|
@@ -69,5 +63,5 @@ with gr.Blocks() as demo:
|
|
| 69 |
label="Max New Tokens",
|
| 70 |
)
|
| 71 |
textbox.submit(predict, [textbox, chatbot, max_new_tokens_slider], [textbox, chatbot])
|
| 72 |
-
|
| 73 |
demo.queue().launch()
|
|
|
|
| 29 |
def predict(message: str, history: List[List[str]], max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS):
|
| 30 |
if not message:
|
| 31 |
return "", history
|
| 32 |
+
|
| 33 |
prompt = message
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
history.append([message, ""])
|
| 35 |
|
| 36 |
+
# Initialize reply for this round
|
| 37 |
+
reply = ""
|
| 38 |
+
|
| 39 |
+
# This will produce a generator of output chunks
|
| 40 |
stream = pipe(
|
| 41 |
prompt,
|
| 42 |
+
max_tokens=max_new_tokens,
|
| 43 |
stop=["</s>"],
|
| 44 |
stream=True
|
| 45 |
)
|
| 46 |
+
|
| 47 |
for output in stream:
|
|
|
|
| 48 |
new_text = output['choices'][0]['text']
|
| 49 |
+
reply += new_text
|
| 50 |
+
history[-1][1] = reply # Update the current reply in history
|
| 51 |
+
yield "", history
|
| 52 |
+
return "", history # Always return at the end to terminate the generator
|
| 53 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
with gr.Blocks() as demo:
|
| 55 |
gr.Markdown(DESCRIPTION)
|
| 56 |
chatbot = gr.Chatbot()
|
|
|
|
| 63 |
label="Max New Tokens",
|
| 64 |
)
|
| 65 |
textbox.submit(predict, [textbox, chatbot, max_new_tokens_slider], [textbox, chatbot])
|
| 66 |
+
|
| 67 |
demo.queue().launch()
|