Spaces:

baohuynhbk14
/

Qwen3-VL-Demo

Running on Zero

App Files Files Community

baohuynhbk14 commited on 17 days ago

Commit

8eace23

verified ·

1 Parent(s): 99d003b

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -35

app.py CHANGED Viewed

@@ -30,21 +30,21 @@ DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-# Load Qwen2.5-VL-7B-Instruct
-MODEL_ID_M = "Qwen/Qwen2.5-VL-7B-Instruct"
-processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
-model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    MODEL_ID_M,
-    trust_remote_code=True,
-    torch_dtype=torch.float16).to(device).eval()
-# Load Qwen2.5-VL-3B-Instruct
-MODEL_ID_X = "Qwen/Qwen2.5-VL-3B-Instruct"
-processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
-model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    MODEL_ID_X,
-    trust_remote_code=True,
-    torch_dtype=torch.float16).to(device).eval()
 # Load Qwen3-VL-4B-Instruct
 MODEL_ID_Q = "Qwen/Qwen3-VL-4B-Instruct"
@@ -168,11 +168,11 @@ def generate_image(model_name: str, text: str, image: Image.Image,
     """
     Generates responses using the selected model for image input.
     """
-    if model_name == "Qwen2.5-VL-7B-Instruct":
-        processor, model = processor_m, model_m
-    elif model_name == "Qwen2.5-VL-3B-Instruct":
-        processor, model = processor_x, model_x
-    elif model_name == "Qwen3-VL-4B-Instruct":
         processor, model = processor_q, model_q
     elif model_name == "Qwen3-VL-8B-Instruct":
         processor, model = processor_y, model_y
@@ -214,11 +214,11 @@ def generate_video(model_name: str, text: str, video_path: str,
     """
     Generates responses using the selected model for video input.
     """
-    if model_name == "Qwen2.5-VL-7B-Instruct":
-        processor, model = processor_m, model_m
-    elif model_name == "Qwen2.5-VL-3B-Instruct":
-        processor, model = processor_x, model_x
-    elif model_name == "Qwen3-VL-4B-Instruct":
         processor, model = processor_q, model_q
     elif model_name == "Qwen3-VL-8B-Instruct":
         processor, model = processor_y, model_y
@@ -272,12 +272,11 @@ def generate_pdf(model_name: str, text: str, state: Dict[str, Any],
                  top_k: int = 50,
                  repetition_penalty: float = 1.2):
-    # --- Thêm logic chọn model ---
-    if model_name == "Qwen2.5-VL-7B-Instruct":
-        processor, model = processor_m, model_m
-    elif model_name == "Qwen2.5-VL-3B-Instruct":
-        processor, model = processor_x, model_x
-    elif model_name == "Qwen3-VL-4B-Instruct":
         processor, model = processor_q, model_q
     elif model_name == "Qwen3-VL-8B-Instruct":
         processor, model = processor_y, model_y
@@ -313,11 +312,11 @@ def generate_pdf(model_name: str, text: str, state: Dict[str, Any],
             **inputs,
             "streamer": streamer,
             "max_new_tokens": max_new_tokens,
-            "do_sample": True,
-            "temperature": temperature,
-            "top_p": top_p,
-            "top_k": top_k,
-            "repetition_penalty": repetition_penalty
         }
         # Sử dụng model đã chọn

 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+# # Load Qwen2.5-VL-7B-Instruct
+# MODEL_ID_M = "Qwen/Qwen2.5-VL-7B-Instruct"
+# processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
+# model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+#     MODEL_ID_M,
+#     trust_remote_code=True,
+#     torch_dtype=torch.float16).to(device).eval()
+# # Load Qwen2.5-VL-3B-Instruct
+# MODEL_ID_X = "Qwen/Qwen2.5-VL-3B-Instruct"
+# processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
+# model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+#     MODEL_ID_X,
+#     trust_remote_code=True,
+#     torch_dtype=torch.float16).to(device).eval()
 # Load Qwen3-VL-4B-Instruct
 MODEL_ID_Q = "Qwen/Qwen3-VL-4B-Instruct"
     """
     Generates responses using the selected model for image input.
     """
+    # if model_name == "Qwen2.5-VL-7B-Instruct":
+    #     processor, model = processor_m, model_m
+    # elif model_name == "Qwen2.5-VL-3B-Instruct":
+    #     processor, model = processor_x, model_x
+    if model_name == "Qwen3-VL-4B-Instruct":
         processor, model = processor_q, model_q
     elif model_name == "Qwen3-VL-8B-Instruct":
         processor, model = processor_y, model_y
     """
     Generates responses using the selected model for video input.
     """
+    # if model_name == "Qwen2.5-VL-7B-Instruct":
+    #     processor, model = processor_m, model_m
+    # elif model_name == "Qwen2.5-VL-3B-Instruct":
+    #     processor, model = processor_x, model_x
+    if model_name == "Qwen3-VL-4B-Instruct":
         processor, model = processor_q, model_q
     elif model_name == "Qwen3-VL-8B-Instruct":
         processor, model = processor_y, model_y
                  top_k: int = 50,
                  repetition_penalty: float = 1.2):
+    # if model_name == "Qwen2.5-VL-7B-Instruct":
+    #     processor, model = processor_m, model_m
+    # elif model_name == "Qwen2.5-VL-3B-Instruct":
+    #     processor, model = processor_x, model_x
+    if model_name == "Qwen3-VL-4B-Instruct":
         processor, model = processor_q, model_q
     elif model_name == "Qwen3-VL-8B-Instruct":
         processor, model = processor_y, model_y
             **inputs,
             "streamer": streamer,
             "max_new_tokens": max_new_tokens,
+            # "do_sample": True,
+            # "temperature": temperature,
+            # "top_p": top_p,
+            # "top_k": top_k,
+            # "repetition_penalty": repetition_penalty
         }
         # Sử dụng model đã chọn