yu-rp
commited on
Commit
·
3a08b56
1
Parent(s):
bf90ae0
update
Browse files- app.py +31 -5
- gallery/1_resize.jpg +3 -0
- gallery/2_resize.jpg +3 -0
- gallery/3_resize.jpg +3 -0
- model.py +20 -0
app.py
CHANGED
|
@@ -31,6 +31,19 @@ from model import (
|
|
| 31 |
get_model,
|
| 32 |
)
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
logger = build_logger("dimple", "dimple.log")
|
| 35 |
|
| 36 |
no_change_btn = gr.Button()
|
|
@@ -233,8 +246,6 @@ def http_bot(
|
|
| 233 |
)
|
| 234 |
|
| 235 |
thread = threading.Thread(target=run_generate)
|
| 236 |
-
thread.start()
|
| 237 |
-
|
| 238 |
logger.info(f"==== wait for first token ====\n")
|
| 239 |
state.append_message(Conversation.ASSISTANT, state.streaming_placeholder)
|
| 240 |
yield (
|
|
@@ -243,6 +254,10 @@ def http_bot(
|
|
| 243 |
gr.MultimodalTextbox(interactive=False),
|
| 244 |
) + (disable_btn,) * 5
|
| 245 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
try:
|
| 247 |
# Stream output
|
| 248 |
for ans in streamer:
|
|
@@ -252,11 +267,22 @@ def http_bot(
|
|
| 252 |
ans = ans[0]
|
| 253 |
|
| 254 |
state.update_message(Conversation.ASSISTANT, ans, None)
|
|
|
|
| 255 |
yield (
|
| 256 |
state,
|
| 257 |
state.to_gradio_chatbot(),
|
| 258 |
gr.MultimodalTextbox(interactive=False),
|
| 259 |
) + (disable_btn,) * 5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
except Exception as e:
|
| 261 |
state.update_message(Conversation.ASSISTANT, server_error_msg, None)
|
| 262 |
yield (
|
|
@@ -422,7 +448,7 @@ def build_demo():
|
|
| 422 |
[
|
| 423 |
{
|
| 424 |
"files": [
|
| 425 |
-
"gallery/
|
| 426 |
],
|
| 427 |
"text": "Please help me describe the image.",
|
| 428 |
}
|
|
@@ -430,7 +456,7 @@ def build_demo():
|
|
| 430 |
[
|
| 431 |
{
|
| 432 |
"files": [
|
| 433 |
-
"gallery/
|
| 434 |
],
|
| 435 |
"text": "First please perform reasoning, and think step by step to provide best answer to the following question: Is this airplane taking off or landing?",
|
| 436 |
}
|
|
@@ -438,7 +464,7 @@ def build_demo():
|
|
| 438 |
[
|
| 439 |
{
|
| 440 |
"files": [
|
| 441 |
-
"gallery/
|
| 442 |
],
|
| 443 |
"text": "First please perform reasoning, and think step by step to provide best answer to the following question: What is the lamp on, a side table or a nightstand?",
|
| 444 |
}
|
|
|
|
| 31 |
get_model,
|
| 32 |
)
|
| 33 |
|
| 34 |
+
# seed for reproducibility
|
| 35 |
+
import random
|
| 36 |
+
import numpy as np
|
| 37 |
+
import torch
|
| 38 |
+
seed = 42
|
| 39 |
+
random.seed(seed)
|
| 40 |
+
np.random.seed(seed)
|
| 41 |
+
torch.manual_seed(seed)
|
| 42 |
+
if torch.cuda.is_available():
|
| 43 |
+
torch.cuda.manual_seed_all(seed)
|
| 44 |
+
torch.backends.cudnn.deterministic = True
|
| 45 |
+
torch.backends.cudnn.benchmark = False
|
| 46 |
+
|
| 47 |
logger = build_logger("dimple", "dimple.log")
|
| 48 |
|
| 49 |
no_change_btn = gr.Button()
|
|
|
|
| 246 |
)
|
| 247 |
|
| 248 |
thread = threading.Thread(target=run_generate)
|
|
|
|
|
|
|
| 249 |
logger.info(f"==== wait for first token ====\n")
|
| 250 |
state.append_message(Conversation.ASSISTANT, state.streaming_placeholder)
|
| 251 |
yield (
|
|
|
|
| 254 |
gr.MultimodalTextbox(interactive=False),
|
| 255 |
) + (disable_btn,) * 5
|
| 256 |
|
| 257 |
+
num_steps = 0
|
| 258 |
+
start_tstamp = time.time()
|
| 259 |
+
thread.start()
|
| 260 |
+
|
| 261 |
try:
|
| 262 |
# Stream output
|
| 263 |
for ans in streamer:
|
|
|
|
| 267 |
ans = ans[0]
|
| 268 |
|
| 269 |
state.update_message(Conversation.ASSISTANT, ans, None)
|
| 270 |
+
num_steps += 1
|
| 271 |
yield (
|
| 272 |
state,
|
| 273 |
state.to_gradio_chatbot(),
|
| 274 |
gr.MultimodalTextbox(interactive=False),
|
| 275 |
) + (disable_btn,) * 5
|
| 276 |
+
end_tstamp = time.time()
|
| 277 |
+
total_time = end_tstamp - start_tstamp
|
| 278 |
+
tps = int(max_new_tokens) / total_time
|
| 279 |
+
stat_info = f"\n\n[#Tokens: {int(max_new_tokens)}, #Steps: {int(num_steps)}, TPS: {tps:.2f} tokens/s, Latency: {total_time:.2f}s]"
|
| 280 |
+
state.update_message(Conversation.ASSISTANT, ans+stat_info, None)
|
| 281 |
+
yield (
|
| 282 |
+
state,
|
| 283 |
+
state.to_gradio_chatbot(),
|
| 284 |
+
gr.MultimodalTextbox(interactive=False),
|
| 285 |
+
) + (disable_btn,) * 5
|
| 286 |
except Exception as e:
|
| 287 |
state.update_message(Conversation.ASSISTANT, server_error_msg, None)
|
| 288 |
yield (
|
|
|
|
| 448 |
[
|
| 449 |
{
|
| 450 |
"files": [
|
| 451 |
+
"gallery/1_resize.jpg",
|
| 452 |
],
|
| 453 |
"text": "Please help me describe the image.",
|
| 454 |
}
|
|
|
|
| 456 |
[
|
| 457 |
{
|
| 458 |
"files": [
|
| 459 |
+
"gallery/2_resize.jpg",
|
| 460 |
],
|
| 461 |
"text": "First please perform reasoning, and think step by step to provide best answer to the following question: Is this airplane taking off or landing?",
|
| 462 |
}
|
|
|
|
| 464 |
[
|
| 465 |
{
|
| 466 |
"files": [
|
| 467 |
+
"gallery/3_resize.jpg",
|
| 468 |
],
|
| 469 |
"text": "First please perform reasoning, and think step by step to provide best answer to the following question: What is the lamp on, a side table or a nightstand?",
|
| 470 |
}
|
gallery/1_resize.jpg
ADDED
|
Git LFS Details
|
gallery/2_resize.jpg
ADDED
|
Git LFS Details
|
gallery/3_resize.jpg
ADDED
|
Git LFS Details
|
model.py
CHANGED
|
@@ -5,8 +5,14 @@ from transformers import AutoProcessor, AutoModel, TextIteratorStreamer
|
|
| 5 |
class FullSequenceStreamer(TextIteratorStreamer):
|
| 6 |
def __init__(self, tokenizer, **kwargs):
|
| 7 |
super().__init__(tokenizer, **kwargs)
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
def put(self, value, stream_end=False):
|
|
|
|
|
|
|
|
|
|
| 10 |
# Assume full token_ids are passed in every time
|
| 11 |
decoded = self.tokenizer.batch_decode(value, **self.decode_kwargs)
|
| 12 |
self.text_queue.put(decoded)
|
|
@@ -33,3 +39,17 @@ def get_model(device):
|
|
| 33 |
|
| 34 |
return model, processor
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
class FullSequenceStreamer(TextIteratorStreamer):
|
| 6 |
def __init__(self, tokenizer, **kwargs):
|
| 7 |
super().__init__(tokenizer, **kwargs)
|
| 8 |
+
self.mask_token = tokenizer.mask_token_id
|
| 9 |
+
self.placeholder_token = tokenizer.convert_tokens_to_ids("_")
|
| 10 |
+
self.placeholder_token = tokenizer.encode("␣")[0]
|
| 11 |
|
| 12 |
def put(self, value, stream_end=False):
|
| 13 |
+
# change mask tokens to space token
|
| 14 |
+
value = value.clone()
|
| 15 |
+
value[value == self.mask_token] = self.placeholder_token
|
| 16 |
# Assume full token_ids are passed in every time
|
| 17 |
decoded = self.tokenizer.batch_decode(value, **self.decode_kwargs)
|
| 18 |
self.text_queue.put(decoded)
|
|
|
|
| 39 |
|
| 40 |
return model, processor
|
| 41 |
|
| 42 |
+
from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
|
| 43 |
+
|
| 44 |
+
def get_qwen(device):
|
| 45 |
+
|
| 46 |
+
model_name = "Qwen/Qwen2-VL-7B-Instruct"
|
| 47 |
+
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
|
| 48 |
+
model = Qwen2VLForConditionalGeneration.from_pretrained(
|
| 49 |
+
model_name,
|
| 50 |
+
torch_dtype=torch.bfloat16,
|
| 51 |
+
)
|
| 52 |
+
model = model.eval()
|
| 53 |
+
model = model.to(device)
|
| 54 |
+
|
| 55 |
+
return model, processor
|