Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -25,9 +25,20 @@ from longvu.mm_datautils import (
|
|
| 25 |
from decord import cpu, VideoReader
|
| 26 |
|
| 27 |
|
| 28 |
-
title_markdown =
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
block_css = """
|
| 33 |
#buttons button {
|
|
@@ -123,6 +134,15 @@ class Chat:
|
|
| 123 |
|
| 124 |
@spaces.GPU(duration=120)
|
| 125 |
def generate(image, video, message, chatbot, textbox_in, temperature, top_p, max_output_tokens, dtype=torch.float16):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
data = []
|
| 127 |
|
| 128 |
processor = handler.processor
|
|
@@ -157,7 +177,7 @@ def generate(image, video, message, chatbot, textbox_in, temperature, top_p, max
|
|
| 157 |
raise NotImplementedError("Not support image and video at the same time")
|
| 158 |
except Exception as e:
|
| 159 |
traceback.print_exc()
|
| 160 |
-
return gr.update(value=None, interactive=True), gr.update(value=None, interactive=True), message, chatbot
|
| 161 |
|
| 162 |
assert len(message) % 2 == 0, "The message should be a pair of user and system message."
|
| 163 |
|
|
@@ -202,7 +222,7 @@ def generate(image, video, message, chatbot, textbox_in, temperature, top_p, max
|
|
| 202 |
one_turn_chat[1] = text_en_out
|
| 203 |
chatbot.append(one_turn_chat)
|
| 204 |
|
| 205 |
-
return gr.update(value=image, interactive=True), gr.update(value=video, interactive=True), message, chatbot
|
| 206 |
|
| 207 |
|
| 208 |
def regenerate(message, chatbot):
|
|
@@ -284,14 +304,18 @@ with gr.Blocks(title='LongVU', theme=theme, css=block_css) as demo:
|
|
| 284 |
with gr.Column():
|
| 285 |
gr.Examples(
|
| 286 |
examples=[
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
[
|
| 288 |
f"./examples/video1.mp4",
|
| 289 |
"Describe this video in detail.",
|
| 290 |
],
|
| 291 |
[
|
| 292 |
f"./examples/video2.mp4",
|
| 293 |
-
"
|
| 294 |
-
]
|
| 295 |
],
|
| 296 |
inputs=[video, textbox],
|
| 297 |
)
|
|
@@ -301,6 +325,21 @@ with gr.Blocks(title='LongVU', theme=theme, css=block_css) as demo:
|
|
| 301 |
[image, video, message, chatbot, textbox, temperature, top_p, max_output_tokens],
|
| 302 |
[image, video, message, chatbot])
|
| 303 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
regenerate_btn.click(
|
| 305 |
regenerate,
|
| 306 |
[message, chatbot],
|
|
|
|
| 25 |
from decord import cpu, VideoReader
|
| 26 |
|
| 27 |
|
| 28 |
+
title_markdown = """
|
| 29 |
+
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
|
| 30 |
+
<div>
|
| 31 |
+
<h1 >LongVU: Spatiotemporal Adaptive Compression for Long Video-Language Understanding</h1>
|
| 32 |
+
</div>
|
| 33 |
+
</div>
|
| 34 |
+
<div align="center">
|
| 35 |
+
<div style="display:flex; gap: 0.25rem; margin-top: 10px;" align="center">
|
| 36 |
+
<a href=''><img src='https://img.shields.io/badge/arXiv-paper-red'></a>
|
| 37 |
+
<a href='https://vision-cair.github.io/LongVU/'><img src='https://img.shields.io/badge/Project-LongVU-blue'></a>
|
| 38 |
+
<a href=''><img src='https://img.shields.io/badge/model-checkpoints-green'></a>
|
| 39 |
+
</div>
|
| 40 |
+
</div>
|
| 41 |
+
"""
|
| 42 |
|
| 43 |
block_css = """
|
| 44 |
#buttons button {
|
|
|
|
| 134 |
|
| 135 |
@spaces.GPU(duration=120)
|
| 136 |
def generate(image, video, message, chatbot, textbox_in, temperature, top_p, max_output_tokens, dtype=torch.float16):
|
| 137 |
+
if textbox_in is None:
|
| 138 |
+
raise gr.Error("Chat messages cannot be empty")
|
| 139 |
+
return (
|
| 140 |
+
gr.update(value=image, interactive=True),
|
| 141 |
+
gr.update(value=video, interactive=True),
|
| 142 |
+
message,
|
| 143 |
+
chatbot,
|
| 144 |
+
None,
|
| 145 |
+
)
|
| 146 |
data = []
|
| 147 |
|
| 148 |
processor = handler.processor
|
|
|
|
| 177 |
raise NotImplementedError("Not support image and video at the same time")
|
| 178 |
except Exception as e:
|
| 179 |
traceback.print_exc()
|
| 180 |
+
return gr.update(value=None, interactive=True), gr.update(value=None, interactive=True), message, chatbot, None
|
| 181 |
|
| 182 |
assert len(message) % 2 == 0, "The message should be a pair of user and system message."
|
| 183 |
|
|
|
|
| 222 |
one_turn_chat[1] = text_en_out
|
| 223 |
chatbot.append(one_turn_chat)
|
| 224 |
|
| 225 |
+
return gr.update(value=image, interactive=True), gr.update(value=video, interactive=True), message, chatbot, None
|
| 226 |
|
| 227 |
|
| 228 |
def regenerate(message, chatbot):
|
|
|
|
| 304 |
with gr.Column():
|
| 305 |
gr.Examples(
|
| 306 |
examples=[
|
| 307 |
+
[
|
| 308 |
+
f"./examples/video3.mp4",
|
| 309 |
+
"What is the moving direction of the yellow ball?",
|
| 310 |
+
],
|
| 311 |
[
|
| 312 |
f"./examples/video1.mp4",
|
| 313 |
"Describe this video in detail.",
|
| 314 |
],
|
| 315 |
[
|
| 316 |
f"./examples/video2.mp4",
|
| 317 |
+
"What is the name of the store?",
|
| 318 |
+
],
|
| 319 |
],
|
| 320 |
inputs=[video, textbox],
|
| 321 |
)
|
|
|
|
| 325 |
[image, video, message, chatbot, textbox, temperature, top_p, max_output_tokens],
|
| 326 |
[image, video, message, chatbot])
|
| 327 |
|
| 328 |
+
textbox.submit(
|
| 329 |
+
generate,
|
| 330 |
+
[
|
| 331 |
+
image,
|
| 332 |
+
video,
|
| 333 |
+
message,
|
| 334 |
+
chatbot,
|
| 335 |
+
textbox,
|
| 336 |
+
temperature,
|
| 337 |
+
top_p,
|
| 338 |
+
max_output_tokens,
|
| 339 |
+
],
|
| 340 |
+
[image, video, message, chatbot, textbox],
|
| 341 |
+
)
|
| 342 |
+
|
| 343 |
regenerate_btn.click(
|
| 344 |
regenerate,
|
| 345 |
[message, chatbot],
|