Spaces:

baohuynhbk14
/

Qwen3-VL-Demo

Running on Zero

App Files Files Community

baohuynhbk14 commited on 13 days ago

Commit

127b4cb

verified ·

1 Parent(s): cdfbdf6

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -71

app.py CHANGED Viewed

@@ -149,21 +149,30 @@ def navigate_pdf_page(direction: str, state: Dict[str, Any]):
     page_info_html = f'<div style="text-align:center;">Page {new_index + 1} / {total_pages}</div>'
     return image_preview, state, page_info_html
-def downsample_video(video_path):
     vidcap = cv2.VideoCapture(video_path)
     total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
     frames = []
     frame_indices = np.linspace(0, total_frames - 1, min(total_frames, 10), dtype=int)
     for i in frame_indices:
         vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
         success, image = vidcap.read()
         if success:
             image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
             pil_image = Image.fromarray(image)
             frames.append(pil_image)
     vidcap.release()
     return frames
 @spaces.GPU
 def generate_image(model_name: str, text: str, image: Image.Image,
                    max_new_tokens: int = 1024,
@@ -210,7 +219,7 @@ def generate_image(model_name: str, text: str, image: Image.Image,
         time.sleep(0.01)
         yield buffer, buffer
-@spaces.GPU(duration=120)
 def generate_video(model_name: str, text: str, video_path: str,
                    max_new_tokens: int = 1024,
                    temperature: float = 0.6,
@@ -269,75 +278,7 @@ def generate_video(model_name: str, text: str, video_path: str,
         time.sleep(0.01)
         yield buffer, buffer
-# @spaces.GPU(duration=120)
-# def generate_pdf(model_name: str, text: str, state: Dict[str, Any],
-#                  max_new_tokens: int = 2048,
-#                  temperature: float = 0.6,
-#                  top_p: float = 0.9,
-#                  top_k: int = 50,
-#                  repetition_penalty: float = 1.2):
-#     # if model_name == "Qwen2.5-VL-7B-Instruct":
-#     #     processor, model = processor_m, model_m
-#     # elif model_name == "Qwen2.5-VL-3B-Instruct":
-#     #     processor, model = processor_x, model_x
-#     if model_name == "Qwen3-VL-4B-Instruct":
-#         processor, model = processor_q, model_q
-#     elif model_name == "Qwen3-VL-8B-Instruct":
-#         processor, model = processor_y, model_y
-#     # elif model_name == "Qwen3-VL-8B-Thinking":
-#     #     processor, model = processor_z, model_z
-#     elif model_name == "Qwen3-VL-4B-Thinking":
-#         processor, model = processor_t, model_t
-#     elif model_name == "Qwen3-VL-2B-Instruct":
-#         processor, model = processor_l, model_l
-#     elif model_name == "Qwen3-VL-2B-Thinking":
-#         processor, model = processor_j, model_j
-#     else:
-#         yield "Invalid model selected.", "Invalid model selected."
-#         return
-#     if not state or not state["pages"]:
-#         yield "Please upload a PDF file first.", "Please upload a PDF file first."
-#         return
-#     page_images = state["pages"]
-#     full_response = ""
-#     for i, image in enumerate(page_images):
-#         page_header = f"--- Page {i+1}/{len(page_images)} ---\n"
-#         yield full_response + page_header, full_response + page_header
-#         messages = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": text}]}]
-#         # Sử dụng processor đã chọn
-#         prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-#         inputs = processor(text=[prompt_full], images=[image], return_tensors="pt", padding=True).to(device)
-#         streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
-#         generation_kwargs = {
-#             **inputs,
-#             "streamer": streamer,
-#             "max_new_tokens": max_new_tokens,
-#             "do_sample": True,
-#             "temperature": temperature,
-#             "top_p": top_p,
-#             "top_k": top_k,
-#             "repetition_penalty": repetition_penalty
-#         }
-#         # Sử dụng model đã chọn
-#         thread = Thread(target=model.generate, kwargs=generation_kwargs)
-#         thread.start()
-#         page_buffer = ""
-#         for new_text in streamer:
-#             page_buffer += new_text
-#             yield full_response + page_header + page_buffer, full_response + page_header + page_buffer
-#             time.sleep(0.01)
-#         full_response += page_header + page_buffer + "\n\n"
-@spaces.GPU(duration=120)
 def generate_pdf(model_name: str, text: str, state: Dict[str, Any],
                  max_new_tokens: int = 2048,
                  temperature: float = 0.6,

     page_info_html = f'<div style="text-align:center;">Page {new_index + 1} / {total_pages}</div>'
     return image_preview, state, page_info_html
+def downsample_video(video_path, max_dim=720):
     vidcap = cv2.VideoCapture(video_path)
     total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
     frames = []
     frame_indices = np.linspace(0, total_frames - 1, min(total_frames, 10), dtype=int)
     for i in frame_indices:
         vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
         success, image = vidcap.read()
         if success:
             image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+            h, w = image.shape[:2]
+            scale = max_dim / max(h, w)
+            if scale < 1:
+                image = cv2.resize(image, (int(w*scale), int(h*scale)), interpolation=cv2.INTER_AREA)
             pil_image = Image.fromarray(image)
             frames.append(pil_image)
     vidcap.release()
     return frames
 @spaces.GPU
 def generate_image(model_name: str, text: str, image: Image.Image,
                    max_new_tokens: int = 1024,
         time.sleep(0.01)
         yield buffer, buffer
+@spaces.GPU(duration=180)
 def generate_video(model_name: str, text: str, video_path: str,
                    max_new_tokens: int = 1024,
                    temperature: float = 0.6,
         time.sleep(0.01)
         yield buffer, buffer
+@spaces.GPU(duration=180)
 def generate_pdf(model_name: str, text: str, state: Dict[str, Any],
                  max_new_tokens: int = 2048,
                  temperature: float = 0.6,