Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -30,21 +30,21 @@ DEFAULT_MAX_NEW_TOKENS = 1024
|
|
| 30 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
| 31 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 32 |
|
| 33 |
-
# Load Qwen2.5-VL-7B-Instruct
|
| 34 |
-
MODEL_ID_M = "Qwen/Qwen2.5-VL-7B-Instruct"
|
| 35 |
-
processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
|
| 36 |
-
model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
|
| 41 |
-
# Load Qwen2.5-VL-3B-Instruct
|
| 42 |
-
MODEL_ID_X = "Qwen/Qwen2.5-VL-3B-Instruct"
|
| 43 |
-
processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
|
| 44 |
-
model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
|
| 49 |
# Load Qwen3-VL-4B-Instruct
|
| 50 |
MODEL_ID_Q = "Qwen/Qwen3-VL-4B-Instruct"
|
|
@@ -168,11 +168,11 @@ def generate_image(model_name: str, text: str, image: Image.Image,
|
|
| 168 |
"""
|
| 169 |
Generates responses using the selected model for image input.
|
| 170 |
"""
|
| 171 |
-
if model_name == "Qwen2.5-VL-7B-Instruct":
|
| 172 |
-
|
| 173 |
-
elif model_name == "Qwen2.5-VL-3B-Instruct":
|
| 174 |
-
|
| 175 |
-
|
| 176 |
processor, model = processor_q, model_q
|
| 177 |
elif model_name == "Qwen3-VL-8B-Instruct":
|
| 178 |
processor, model = processor_y, model_y
|
|
@@ -214,11 +214,11 @@ def generate_video(model_name: str, text: str, video_path: str,
|
|
| 214 |
"""
|
| 215 |
Generates responses using the selected model for video input.
|
| 216 |
"""
|
| 217 |
-
if model_name == "Qwen2.5-VL-7B-Instruct":
|
| 218 |
-
|
| 219 |
-
elif model_name == "Qwen2.5-VL-3B-Instruct":
|
| 220 |
-
|
| 221 |
-
|
| 222 |
processor, model = processor_q, model_q
|
| 223 |
elif model_name == "Qwen3-VL-8B-Instruct":
|
| 224 |
processor, model = processor_y, model_y
|
|
@@ -272,12 +272,11 @@ def generate_pdf(model_name: str, text: str, state: Dict[str, Any],
|
|
| 272 |
top_k: int = 50,
|
| 273 |
repetition_penalty: float = 1.2):
|
| 274 |
|
| 275 |
-
#
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
elif model_name == "Qwen3-VL-4B-Instruct":
|
| 281 |
processor, model = processor_q, model_q
|
| 282 |
elif model_name == "Qwen3-VL-8B-Instruct":
|
| 283 |
processor, model = processor_y, model_y
|
|
@@ -313,11 +312,11 @@ def generate_pdf(model_name: str, text: str, state: Dict[str, Any],
|
|
| 313 |
**inputs,
|
| 314 |
"streamer": streamer,
|
| 315 |
"max_new_tokens": max_new_tokens,
|
| 316 |
-
"do_sample": True,
|
| 317 |
-
"temperature": temperature,
|
| 318 |
-
"top_p": top_p,
|
| 319 |
-
"top_k": top_k,
|
| 320 |
-
"repetition_penalty": repetition_penalty
|
| 321 |
}
|
| 322 |
|
| 323 |
# Sử dụng model đã chọn
|
|
|
|
| 30 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
| 31 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 32 |
|
| 33 |
+
# # Load Qwen2.5-VL-7B-Instruct
|
| 34 |
+
# MODEL_ID_M = "Qwen/Qwen2.5-VL-7B-Instruct"
|
| 35 |
+
# processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
|
| 36 |
+
# model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 37 |
+
# MODEL_ID_M,
|
| 38 |
+
# trust_remote_code=True,
|
| 39 |
+
# torch_dtype=torch.float16).to(device).eval()
|
| 40 |
|
| 41 |
+
# # Load Qwen2.5-VL-3B-Instruct
|
| 42 |
+
# MODEL_ID_X = "Qwen/Qwen2.5-VL-3B-Instruct"
|
| 43 |
+
# processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
|
| 44 |
+
# model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 45 |
+
# MODEL_ID_X,
|
| 46 |
+
# trust_remote_code=True,
|
| 47 |
+
# torch_dtype=torch.float16).to(device).eval()
|
| 48 |
|
| 49 |
# Load Qwen3-VL-4B-Instruct
|
| 50 |
MODEL_ID_Q = "Qwen/Qwen3-VL-4B-Instruct"
|
|
|
|
| 168 |
"""
|
| 169 |
Generates responses using the selected model for image input.
|
| 170 |
"""
|
| 171 |
+
# if model_name == "Qwen2.5-VL-7B-Instruct":
|
| 172 |
+
# processor, model = processor_m, model_m
|
| 173 |
+
# elif model_name == "Qwen2.5-VL-3B-Instruct":
|
| 174 |
+
# processor, model = processor_x, model_x
|
| 175 |
+
if model_name == "Qwen3-VL-4B-Instruct":
|
| 176 |
processor, model = processor_q, model_q
|
| 177 |
elif model_name == "Qwen3-VL-8B-Instruct":
|
| 178 |
processor, model = processor_y, model_y
|
|
|
|
| 214 |
"""
|
| 215 |
Generates responses using the selected model for video input.
|
| 216 |
"""
|
| 217 |
+
# if model_name == "Qwen2.5-VL-7B-Instruct":
|
| 218 |
+
# processor, model = processor_m, model_m
|
| 219 |
+
# elif model_name == "Qwen2.5-VL-3B-Instruct":
|
| 220 |
+
# processor, model = processor_x, model_x
|
| 221 |
+
if model_name == "Qwen3-VL-4B-Instruct":
|
| 222 |
processor, model = processor_q, model_q
|
| 223 |
elif model_name == "Qwen3-VL-8B-Instruct":
|
| 224 |
processor, model = processor_y, model_y
|
|
|
|
| 272 |
top_k: int = 50,
|
| 273 |
repetition_penalty: float = 1.2):
|
| 274 |
|
| 275 |
+
# if model_name == "Qwen2.5-VL-7B-Instruct":
|
| 276 |
+
# processor, model = processor_m, model_m
|
| 277 |
+
# elif model_name == "Qwen2.5-VL-3B-Instruct":
|
| 278 |
+
# processor, model = processor_x, model_x
|
| 279 |
+
if model_name == "Qwen3-VL-4B-Instruct":
|
|
|
|
| 280 |
processor, model = processor_q, model_q
|
| 281 |
elif model_name == "Qwen3-VL-8B-Instruct":
|
| 282 |
processor, model = processor_y, model_y
|
|
|
|
| 312 |
**inputs,
|
| 313 |
"streamer": streamer,
|
| 314 |
"max_new_tokens": max_new_tokens,
|
| 315 |
+
# "do_sample": True,
|
| 316 |
+
# "temperature": temperature,
|
| 317 |
+
# "top_p": top_p,
|
| 318 |
+
# "top_k": top_k,
|
| 319 |
+
# "repetition_penalty": repetition_penalty
|
| 320 |
}
|
| 321 |
|
| 322 |
# Sử dụng model đã chọn
|