Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
2013bf3
1
Parent(s):
9f4e146
Update user agent and enhance predict function with additional parameters for improved flexibility
Browse files
app.py
CHANGED
|
@@ -37,7 +37,7 @@ torch.set_default_device('cuda')
|
|
| 37 |
|
| 38 |
logger = build_logger("gradio_web_server", "gradio_web_server.log")
|
| 39 |
|
| 40 |
-
headers = {"User-Agent": "Vintern-
|
| 41 |
|
| 42 |
no_change_btn = gr.Button()
|
| 43 |
enable_btn = gr.Button(interactive=True)
|
|
@@ -181,21 +181,21 @@ model = AutoModel.from_pretrained(
|
|
| 181 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_fast=False)
|
| 182 |
|
| 183 |
@spaces.GPU
|
| 184 |
-
def predict(message,
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
if pixel_values is not None:
|
| 191 |
question = '<image>\n'+message
|
| 192 |
else:
|
| 193 |
question = message
|
| 194 |
-
|
| 195 |
-
print("Tokenizer: ", tokenizer)
|
| 196 |
-
print("Question: ", question)
|
| 197 |
-
response, conv_history = model.chat(tokenizer, pixel_values, question, generation_config, history=None, return_history=True)
|
| 198 |
-
print(f"AI response: {response}")
|
| 199 |
return response, conv_history
|
| 200 |
|
| 201 |
def http_bot(
|
|
@@ -220,7 +220,6 @@ def http_bot(
|
|
| 220 |
return
|
| 221 |
|
| 222 |
if model is None:
|
| 223 |
-
# state.messages[-1][-1] = server_error_msg
|
| 224 |
state.update_message(Conversation.ASSISTANT, server_error_msg)
|
| 225 |
yield (
|
| 226 |
state,
|
|
@@ -246,26 +245,15 @@ def http_bot(
|
|
| 246 |
|
| 247 |
try:
|
| 248 |
# Stream output
|
| 249 |
-
# response = requests.post(worker_addr, json=pload, headers=headers, stream=True, timeout=300)
|
| 250 |
-
print(f"all_image_paths: {all_image_paths}")
|
| 251 |
message = state.get_user_message(source=state.USER)
|
| 252 |
-
|
| 253 |
-
|
|
|
|
| 254 |
|
| 255 |
-
# streamer = TextIteratorStreamer(
|
| 256 |
-
# tokenizer, skip_prompt=True, skip_special_tokens=True
|
| 257 |
-
# )
|
| 258 |
-
# generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
|
| 259 |
-
|
| 260 |
-
# thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
| 261 |
-
# thread.start()
|
| 262 |
-
|
| 263 |
# response = "This is a test response"
|
| 264 |
buffer = ""
|
| 265 |
for new_text in response:
|
| 266 |
buffer += new_text
|
| 267 |
-
# Remove <|im_end|> or similar tokens from the output
|
| 268 |
-
buffer = buffer.replace("<|im_end|>", "")
|
| 269 |
|
| 270 |
state.update_message(Conversation.ASSISTANT, buffer + state.streaming_placeholder, None)
|
| 271 |
yield (
|
|
@@ -275,8 +263,7 @@ def http_bot(
|
|
| 275 |
) + (disable_btn,) * 5
|
| 276 |
|
| 277 |
except Exception as e:
|
| 278 |
-
logger.error(f"Error in http_bot: {e}")
|
| 279 |
-
traceback.print_exc()
|
| 280 |
state.update_message(Conversation.ASSISTANT, server_error_msg, None)
|
| 281 |
yield (
|
| 282 |
state,
|
|
@@ -293,7 +280,7 @@ def http_bot(
|
|
| 293 |
|
| 294 |
ai_response = state.return_last_message()
|
| 295 |
|
| 296 |
-
logger.info(f"==== response ====\n{ai_response}")
|
| 297 |
|
| 298 |
state.end_of_current_turn()
|
| 299 |
|
|
@@ -321,9 +308,10 @@ def http_bot(
|
|
| 321 |
title_html = """
|
| 322 |
<div style="text-align: center;">
|
| 323 |
<img src="https://lh3.googleusercontent.com/pw/AP1GczMmW-aFQ4dNaR_LCAllh4UZLLx9fTZ1ITHeGVMWx-1bwlIWz4VsWJSGb3_9C7CQfvboqJH41y2Sbc5ToC9ZmKeV4-buf_DEevIMU0HtaLWgHAPOqBiIbG6LaE8CvDqniLZzvB9UX8TR_-YgvYzPFt2z=w1472-h832-s-no-gm?authuser=0" style="height: 100; width: 100%;">
|
|
|
|
| 324 |
<p>Vintern-1B: An Efficient Multimodal Large Language Model for Vietnamese</p>
|
| 325 |
<a href="https://huggingface.co/papers/2408.12480">[📖 Vintern Paper]</a>
|
| 326 |
-
<a href="https://huggingface.co/5CD-AI">[🤗
|
| 327 |
</div>
|
| 328 |
"""
|
| 329 |
|
|
@@ -411,7 +399,7 @@ def build_demo():
|
|
| 411 |
)
|
| 412 |
|
| 413 |
with gr.Blocks(
|
| 414 |
-
title="Vintern-
|
| 415 |
theme=gr.themes.Default(),
|
| 416 |
css=block_css,
|
| 417 |
) as demo:
|
|
@@ -424,7 +412,7 @@ def build_demo():
|
|
| 424 |
|
| 425 |
with gr.Accordion("Settings", open=False) as setting_row:
|
| 426 |
system_prompt = gr.Textbox(
|
| 427 |
-
value="
|
| 428 |
label="System Prompt",
|
| 429 |
interactive=True,
|
| 430 |
)
|
|
@@ -501,7 +489,7 @@ def build_demo():
|
|
| 501 |
with gr.Column(scale=8):
|
| 502 |
chatbot = gr.Chatbot(
|
| 503 |
elem_id="chatbot",
|
| 504 |
-
label="Vintern",
|
| 505 |
height=580,
|
| 506 |
show_copy_button=True,
|
| 507 |
show_share_button=True,
|
|
|
|
| 37 |
|
| 38 |
logger = build_logger("gradio_web_server", "gradio_web_server.log")
|
| 39 |
|
| 40 |
+
headers = {"User-Agent": "Vintern-1B-3.5-Demo Client"}
|
| 41 |
|
| 42 |
no_change_btn = gr.Button()
|
| 43 |
enable_btn = gr.Button(interactive=True)
|
|
|
|
| 181 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_fast=False)
|
| 182 |
|
| 183 |
@spaces.GPU
|
| 184 |
+
def predict(message,
|
| 185 |
+
image_path,
|
| 186 |
+
history,
|
| 187 |
+
max_input_tiles=6,
|
| 188 |
+
temperature=1.0,
|
| 189 |
+
max_output_tokens=700,
|
| 190 |
+
top_p=0.7,
|
| 191 |
+
repetition_penalty=2.5):
|
| 192 |
+
pixel_values = load_image(image_path, max_num=max_input_tiles).to(torch.bfloat16).cuda()
|
| 193 |
+
generation_config = dict(temperature=temperature, max_new_tokens= max_output_tokens, top_p=top_p, do_sample=False, num_beams = 3, repetition_penalty=repetition_penalty)
|
| 194 |
if pixel_values is not None:
|
| 195 |
question = '<image>\n'+message
|
| 196 |
else:
|
| 197 |
question = message
|
| 198 |
+
response, conv_history = model.chat(tokenizer, pixel_values, question, generation_config, history=history, return_history=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
return response, conv_history
|
| 200 |
|
| 201 |
def http_bot(
|
|
|
|
| 220 |
return
|
| 221 |
|
| 222 |
if model is None:
|
|
|
|
| 223 |
state.update_message(Conversation.ASSISTANT, server_error_msg)
|
| 224 |
yield (
|
| 225 |
state,
|
|
|
|
| 245 |
|
| 246 |
try:
|
| 247 |
# Stream output
|
|
|
|
|
|
|
| 248 |
message = state.get_user_message(source=state.USER)
|
| 249 |
+
logger.info(f"==== User message ====\n{message}")
|
| 250 |
+
logger.info(f"==== Image paths ====\n{all_image_paths}")
|
| 251 |
+
response, conv_history = predict(message, all_image_paths[0], max_input_tiles, temperature, max_new_tokens, top_p, repetition_penalty)
|
| 252 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
# response = "This is a test response"
|
| 254 |
buffer = ""
|
| 255 |
for new_text in response:
|
| 256 |
buffer += new_text
|
|
|
|
|
|
|
| 257 |
|
| 258 |
state.update_message(Conversation.ASSISTANT, buffer + state.streaming_placeholder, None)
|
| 259 |
yield (
|
|
|
|
| 263 |
) + (disable_btn,) * 5
|
| 264 |
|
| 265 |
except Exception as e:
|
| 266 |
+
logger.error(f"Error in http_bot: {e} \n{traceback.format_exc()}")
|
|
|
|
| 267 |
state.update_message(Conversation.ASSISTANT, server_error_msg, None)
|
| 268 |
yield (
|
| 269 |
state,
|
|
|
|
| 280 |
|
| 281 |
ai_response = state.return_last_message()
|
| 282 |
|
| 283 |
+
logger.info(f"==== AI response ====\n{ai_response}")
|
| 284 |
|
| 285 |
state.end_of_current_turn()
|
| 286 |
|
|
|
|
| 308 |
title_html = """
|
| 309 |
<div style="text-align: center;">
|
| 310 |
<img src="https://lh3.googleusercontent.com/pw/AP1GczMmW-aFQ4dNaR_LCAllh4UZLLx9fTZ1ITHeGVMWx-1bwlIWz4VsWJSGb3_9C7CQfvboqJH41y2Sbc5ToC9ZmKeV4-buf_DEevIMU0HtaLWgHAPOqBiIbG6LaE8CvDqniLZzvB9UX8TR_-YgvYzPFt2z=w1472-h832-s-no-gm?authuser=0" style="height: 100; width: 100%;">
|
| 311 |
+
<p>🔥Vintern-1B-v3_5🔥</p>
|
| 312 |
<p>Vintern-1B: An Efficient Multimodal Large Language Model for Vietnamese</p>
|
| 313 |
<a href="https://huggingface.co/papers/2408.12480">[📖 Vintern Paper]</a>
|
| 314 |
+
<a href="https://huggingface.co/5CD-AI">[🤗 Huggingface]</a>
|
| 315 |
</div>
|
| 316 |
"""
|
| 317 |
|
|
|
|
| 399 |
)
|
| 400 |
|
| 401 |
with gr.Blocks(
|
| 402 |
+
title="Vintern-1B-v3_5-Demo",
|
| 403 |
theme=gr.themes.Default(),
|
| 404 |
css=block_css,
|
| 405 |
) as demo:
|
|
|
|
| 412 |
|
| 413 |
with gr.Accordion("Settings", open=False) as setting_row:
|
| 414 |
system_prompt = gr.Textbox(
|
| 415 |
+
value="Bạn là một trợ lý AI đa phương thức hữu ích, hãy trả lời câu hỏi người dùng một cách chi tiết.",
|
| 416 |
label="System Prompt",
|
| 417 |
interactive=True,
|
| 418 |
)
|
|
|
|
| 489 |
with gr.Column(scale=8):
|
| 490 |
chatbot = gr.Chatbot(
|
| 491 |
elem_id="chatbot",
|
| 492 |
+
label="Vintern-1B-v3_5-Demo",
|
| 493 |
height=580,
|
| 494 |
show_copy_button=True,
|
| 495 |
show_share_button=True,
|