baohuynhbk14 commited on
Commit
54c12ba
·
verified ·
1 Parent(s): 41daf64

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -39
app.py CHANGED
@@ -264,6 +264,73 @@ def generate_video(model_name: str, text: str, video_path: str,
264
  yield buffer, buffer
265
 
266
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  @spaces.GPU
268
  def generate_pdf(model_name: str, text: str, state: Dict[str, Any],
269
  max_new_tokens: int = 2048,
@@ -272,16 +339,10 @@ def generate_pdf(model_name: str, text: str, state: Dict[str, Any],
272
  top_k: int = 50,
273
  repetition_penalty: float = 1.2):
274
 
275
- # if model_name == "Qwen2.5-VL-7B-Instruct":
276
- # processor, model = processor_m, model_m
277
- # elif model_name == "Qwen2.5-VL-3B-Instruct":
278
- # processor, model = processor_x, model_x
279
  if model_name == "Qwen3-VL-4B-Instruct":
280
  processor, model = processor_q, model_q
281
  elif model_name == "Qwen3-VL-8B-Instruct":
282
  processor, model = processor_y, model_y
283
- # elif model_name == "Qwen3-VL-8B-Thinking":
284
- # processor, model = processor_z, model_z
285
  elif model_name == "Qwen3-VL-4B-Thinking":
286
  processor, model = processor_t, model_t
287
  elif model_name == "Qwen3-VL-2B-Instruct":
@@ -297,39 +358,44 @@ def generate_pdf(model_name: str, text: str, state: Dict[str, Any],
297
  return
298
 
299
  page_images = state["pages"]
300
- full_response = ""
301
- for i, image in enumerate(page_images):
302
- page_header = f"--- Page {i+1}/{len(page_images)} ---\n"
303
- yield full_response + page_header, full_response + page_header
304
-
305
- messages = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": text}]}]
306
- # Sử dụng processor đã chọn
307
- prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
308
- inputs = processor(text=[prompt_full], images=[image], return_tensors="pt", padding=True).to(device)
309
- streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
310
-
311
- generation_kwargs = {
312
- **inputs,
313
- "streamer": streamer,
314
- "max_new_tokens": max_new_tokens,
315
- # "do_sample": True,
316
- # "temperature": temperature,
317
- # "top_p": top_p,
318
- # "top_k": top_k,
319
- # "repetition_penalty": repetition_penalty
320
- }
321
-
322
- # Sử dụng model đã chọn
323
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
324
- thread.start()
325
-
326
- page_buffer = ""
327
- for new_text in streamer:
328
- page_buffer += new_text
329
- yield full_response + page_header + page_buffer, full_response + page_header + page_buffer
330
- time.sleep(0.01)
331
-
332
- full_response += page_header + page_buffer + "\n\n"
 
 
 
 
 
333
 
334
  image_examples = [
335
  ["Explain the content in detail.", "images/force.jpg"],
 
264
  yield buffer, buffer
265
 
266
 
267
+ @spaces.GPU
268
+ # def generate_pdf(model_name: str, text: str, state: Dict[str, Any],
269
+ # max_new_tokens: int = 2048,
270
+ # temperature: float = 0.6,
271
+ # top_p: float = 0.9,
272
+ # top_k: int = 50,
273
+ # repetition_penalty: float = 1.2):
274
+
275
+ # # if model_name == "Qwen2.5-VL-7B-Instruct":
276
+ # # processor, model = processor_m, model_m
277
+ # # elif model_name == "Qwen2.5-VL-3B-Instruct":
278
+ # # processor, model = processor_x, model_x
279
+ # if model_name == "Qwen3-VL-4B-Instruct":
280
+ # processor, model = processor_q, model_q
281
+ # elif model_name == "Qwen3-VL-8B-Instruct":
282
+ # processor, model = processor_y, model_y
283
+ # # elif model_name == "Qwen3-VL-8B-Thinking":
284
+ # # processor, model = processor_z, model_z
285
+ # elif model_name == "Qwen3-VL-4B-Thinking":
286
+ # processor, model = processor_t, model_t
287
+ # elif model_name == "Qwen3-VL-2B-Instruct":
288
+ # processor, model = processor_l, model_l
289
+ # elif model_name == "Qwen3-VL-2B-Thinking":
290
+ # processor, model = processor_j, model_j
291
+ # else:
292
+ # yield "Invalid model selected.", "Invalid model selected."
293
+ # return
294
+
295
+ # if not state or not state["pages"]:
296
+ # yield "Please upload a PDF file first.", "Please upload a PDF file first."
297
+ # return
298
+
299
+ # page_images = state["pages"]
300
+ # full_response = ""
301
+ # for i, image in enumerate(page_images):
302
+ # page_header = f"--- Page {i+1}/{len(page_images)} ---\n"
303
+ # yield full_response + page_header, full_response + page_header
304
+
305
+ # messages = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": text}]}]
306
+ # # Sử dụng processor đã chọn
307
+ # prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
308
+ # inputs = processor(text=[prompt_full], images=[image], return_tensors="pt", padding=True).to(device)
309
+ # streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
310
+
311
+ # generation_kwargs = {
312
+ # **inputs,
313
+ # "streamer": streamer,
314
+ # "max_new_tokens": max_new_tokens,
315
+ # # "do_sample": True,
316
+ # # "temperature": temperature,
317
+ # # "top_p": top_p,
318
+ # # "top_k": top_k,
319
+ # # "repetition_penalty": repetition_penalty
320
+ # }
321
+
322
+ # # Sử dụng model đã chọn
323
+ # thread = Thread(target=model.generate, kwargs=generation_kwargs)
324
+ # thread.start()
325
+
326
+ # page_buffer = ""
327
+ # for new_text in streamer:
328
+ # page_buffer += new_text
329
+ # yield full_response + page_header + page_buffer, full_response + page_header + page_buffer
330
+ # time.sleep(0.01)
331
+
332
+ # full_response += page_header + page_buffer + "\n\n"
333
+
334
  @spaces.GPU
335
  def generate_pdf(model_name: str, text: str, state: Dict[str, Any],
336
  max_new_tokens: int = 2048,
 
339
  top_k: int = 50,
340
  repetition_penalty: float = 1.2):
341
 
 
 
 
 
342
  if model_name == "Qwen3-VL-4B-Instruct":
343
  processor, model = processor_q, model_q
344
  elif model_name == "Qwen3-VL-8B-Instruct":
345
  processor, model = processor_y, model_y
 
 
346
  elif model_name == "Qwen3-VL-4B-Thinking":
347
  processor, model = processor_t, model_t
348
  elif model_name == "Qwen3-VL-2B-Instruct":
 
358
  return
359
 
360
  page_images = state["pages"]
361
+
362
+ messages = [{"role": "user", "content": [{"type": "text", "text": text}]}]
363
+ images_for_processor = []
364
+ for frame in page_images:
365
+ messages[0]["content"].append({"type": "image"})
366
+ images_for_processor.append(frame)
367
+
368
+ prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
369
+
370
+ inputs = processor(
371
+ text=[prompt_full],
372
+ images=images_for_processor, # Truyền cả list ảnh
373
+ return_tensors="pt",
374
+ padding=True
375
+ ).to(device)
376
+
377
+ streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
378
+
379
+ generation_kwargs = {
380
+ **inputs,
381
+ "streamer": streamer,
382
+ "max_new_tokens": max_new_tokens,
383
+ "do_sample": True,
384
+ "temperature": temperature,
385
+ "top_p": top_p,
386
+ "top_k": top_k,
387
+ "repetition_penalty": repetition_penalty
388
+ }
389
+
390
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
391
+ thread.start()
392
+
393
+ buffer = ""
394
+ for new_text in streamer:
395
+ buffer += new_text
396
+ buffer = buffer.replace("<|im_end|>", "") # Thêm dòng này giống video
397
+ yield buffer, buffer
398
+ time.sleep(0.01)
399
 
400
  image_examples = [
401
  ["Explain the content in detail.", "images/force.jpg"],