baohuynhbk14 commited on
Commit
127b4cb
·
verified ·
1 Parent(s): cdfbdf6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -71
app.py CHANGED
@@ -149,21 +149,30 @@ def navigate_pdf_page(direction: str, state: Dict[str, Any]):
149
  page_info_html = f'<div style="text-align:center;">Page {new_index + 1} / {total_pages}</div>'
150
  return image_preview, state, page_info_html
151
 
152
- def downsample_video(video_path):
153
  vidcap = cv2.VideoCapture(video_path)
154
  total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
155
  frames = []
156
  frame_indices = np.linspace(0, total_frames - 1, min(total_frames, 10), dtype=int)
 
157
  for i in frame_indices:
158
  vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
159
  success, image = vidcap.read()
160
  if success:
161
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 
 
 
 
 
 
162
  pil_image = Image.fromarray(image)
163
  frames.append(pil_image)
 
164
  vidcap.release()
165
  return frames
166
 
 
167
  @spaces.GPU
168
  def generate_image(model_name: str, text: str, image: Image.Image,
169
  max_new_tokens: int = 1024,
@@ -210,7 +219,7 @@ def generate_image(model_name: str, text: str, image: Image.Image,
210
  time.sleep(0.01)
211
  yield buffer, buffer
212
 
213
- @spaces.GPU(duration=120)
214
  def generate_video(model_name: str, text: str, video_path: str,
215
  max_new_tokens: int = 1024,
216
  temperature: float = 0.6,
@@ -269,75 +278,7 @@ def generate_video(model_name: str, text: str, video_path: str,
269
  time.sleep(0.01)
270
  yield buffer, buffer
271
 
272
-
273
- # @spaces.GPU(duration=120)
274
- # def generate_pdf(model_name: str, text: str, state: Dict[str, Any],
275
- # max_new_tokens: int = 2048,
276
- # temperature: float = 0.6,
277
- # top_p: float = 0.9,
278
- # top_k: int = 50,
279
- # repetition_penalty: float = 1.2):
280
-
281
- # # if model_name == "Qwen2.5-VL-7B-Instruct":
282
- # # processor, model = processor_m, model_m
283
- # # elif model_name == "Qwen2.5-VL-3B-Instruct":
284
- # # processor, model = processor_x, model_x
285
- # if model_name == "Qwen3-VL-4B-Instruct":
286
- # processor, model = processor_q, model_q
287
- # elif model_name == "Qwen3-VL-8B-Instruct":
288
- # processor, model = processor_y, model_y
289
- # # elif model_name == "Qwen3-VL-8B-Thinking":
290
- # # processor, model = processor_z, model_z
291
- # elif model_name == "Qwen3-VL-4B-Thinking":
292
- # processor, model = processor_t, model_t
293
- # elif model_name == "Qwen3-VL-2B-Instruct":
294
- # processor, model = processor_l, model_l
295
- # elif model_name == "Qwen3-VL-2B-Thinking":
296
- # processor, model = processor_j, model_j
297
- # else:
298
- # yield "Invalid model selected.", "Invalid model selected."
299
- # return
300
-
301
- # if not state or not state["pages"]:
302
- # yield "Please upload a PDF file first.", "Please upload a PDF file first."
303
- # return
304
-
305
- # page_images = state["pages"]
306
- # full_response = ""
307
- # for i, image in enumerate(page_images):
308
- # page_header = f"--- Page {i+1}/{len(page_images)} ---\n"
309
- # yield full_response + page_header, full_response + page_header
310
-
311
- # messages = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": text}]}]
312
- # # Sử dụng processor đã chọn
313
- # prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
314
- # inputs = processor(text=[prompt_full], images=[image], return_tensors="pt", padding=True).to(device)
315
- # streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
316
-
317
- # generation_kwargs = {
318
- # **inputs,
319
- # "streamer": streamer,
320
- # "max_new_tokens": max_new_tokens,
321
- # "do_sample": True,
322
- # "temperature": temperature,
323
- # "top_p": top_p,
324
- # "top_k": top_k,
325
- # "repetition_penalty": repetition_penalty
326
- # }
327
-
328
- # # Sử dụng model đã chọn
329
- # thread = Thread(target=model.generate, kwargs=generation_kwargs)
330
- # thread.start()
331
-
332
- # page_buffer = ""
333
- # for new_text in streamer:
334
- # page_buffer += new_text
335
- # yield full_response + page_header + page_buffer, full_response + page_header + page_buffer
336
- # time.sleep(0.01)
337
-
338
- # full_response += page_header + page_buffer + "\n\n"
339
-
340
- @spaces.GPU(duration=120)
341
  def generate_pdf(model_name: str, text: str, state: Dict[str, Any],
342
  max_new_tokens: int = 2048,
343
  temperature: float = 0.6,
 
149
  page_info_html = f'<div style="text-align:center;">Page {new_index + 1} / {total_pages}</div>'
150
  return image_preview, state, page_info_html
151
 
152
+ def downsample_video(video_path, max_dim=720):
153
  vidcap = cv2.VideoCapture(video_path)
154
  total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
155
  frames = []
156
  frame_indices = np.linspace(0, total_frames - 1, min(total_frames, 10), dtype=int)
157
+
158
  for i in frame_indices:
159
  vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
160
  success, image = vidcap.read()
161
  if success:
162
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
163
+
164
+ h, w = image.shape[:2]
165
+ scale = max_dim / max(h, w)
166
+ if scale < 1:
167
+ image = cv2.resize(image, (int(w*scale), int(h*scale)), interpolation=cv2.INTER_AREA)
168
+
169
  pil_image = Image.fromarray(image)
170
  frames.append(pil_image)
171
+
172
  vidcap.release()
173
  return frames
174
 
175
+
176
  @spaces.GPU
177
  def generate_image(model_name: str, text: str, image: Image.Image,
178
  max_new_tokens: int = 1024,
 
219
  time.sleep(0.01)
220
  yield buffer, buffer
221
 
222
+ @spaces.GPU(duration=180)
223
  def generate_video(model_name: str, text: str, video_path: str,
224
  max_new_tokens: int = 1024,
225
  temperature: float = 0.6,
 
278
  time.sleep(0.01)
279
  yield buffer, buffer
280
 
281
+ @spaces.GPU(duration=180)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  def generate_pdf(model_name: str, text: str, state: Dict[str, Any],
283
  max_new_tokens: int = 2048,
284
  temperature: float = 0.6,