fix: upgrade to Qwen2.5-VL-3B with 8bit quantization
Browse files- Replace Qwen2-VL-2B with Qwen2.5-VL-3B for better OCR quality
- Apply 8bit quantization to both models for faster inference
- Add bitsandbytes dependency for quantization support
- Better accuracy with optimized speed
๐ค Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <[email protected]>
- app.py +13 -11
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -8,7 +8,7 @@ import gradio as gr
|
|
| 8 |
import spaces
|
| 9 |
import torch
|
| 10 |
from PIL import Image
|
| 11 |
-
from transformers import
|
| 12 |
from qwen_vl_utils import process_vision_info
|
| 13 |
from huggingface_hub import login
|
| 14 |
|
|
@@ -17,8 +17,8 @@ HF_TOKEN = os.getenv("HF_TOKEN")
|
|
| 17 |
if HF_TOKEN:
|
| 18 |
login(token=HF_TOKEN.strip())
|
| 19 |
|
| 20 |
-
# OCR ๋ชจ๋ธ ID (
|
| 21 |
-
OCR_MODEL_ID = "Qwen/Qwen2-VL-
|
| 22 |
|
| 23 |
# ์ฝ ์ ๋ณด ๋ถ์ ๋ชจ๋ธ ID (์๋ฃ ์ ๋ฌธ)
|
| 24 |
MED_MODEL_ID = "google/medgemma-4b-it"
|
|
@@ -34,21 +34,23 @@ def load_models():
|
|
| 34 |
global OCR_MODEL, OCR_PROCESSOR, MED_MODEL, MED_TOKENIZER
|
| 35 |
|
| 36 |
if OCR_MODEL is None:
|
| 37 |
-
print("๐ Loading Qwen2-VL-
|
| 38 |
-
OCR_MODEL =
|
| 39 |
OCR_MODEL_ID,
|
| 40 |
-
torch_dtype=
|
| 41 |
-
device_map="auto"
|
|
|
|
| 42 |
)
|
| 43 |
OCR_PROCESSOR = AutoProcessor.from_pretrained(OCR_MODEL_ID)
|
| 44 |
print("โ
OCR model loaded!")
|
| 45 |
|
| 46 |
if MED_MODEL is None:
|
| 47 |
-
print("๐ Loading MedGemma-4B for medical analysis...")
|
| 48 |
MED_MODEL = AutoModelForCausalLM.from_pretrained(
|
| 49 |
MED_MODEL_ID,
|
| 50 |
torch_dtype=torch.bfloat16,
|
| 51 |
-
device_map="auto"
|
|
|
|
| 52 |
)
|
| 53 |
MED_TOKENIZER = AutoTokenizer.from_pretrained(MED_MODEL_ID)
|
| 54 |
print("โ
Medical model loaded!")
|
|
@@ -396,8 +398,8 @@ with gr.Blocks(theme=gr.themes.Soft(), css=CUSTOM_CSS) as demo:
|
|
| 396 |
- AI๊ฐ ์์ฑํ ์ ๋ณด์ด๋ฏ๋ก ์ ํํ์ง ์์ ์ ์์ต๋๋ค
|
| 397 |
|
| 398 |
**๐ค ๊ธฐ์ ์คํ**
|
| 399 |
-
- Qwen2-VL-
|
| 400 |
-
- Google MedGemma-4B-IT (์๋ฃ ์ ๋ฌธ ๋ชจ๋ธ
|
| 401 |
|
| 402 |
**๐ ์ค์ ๋ฐฉ๋ฒ**
|
| 403 |
- Hugging Face Spaces์ Settings โ Repository secrets์์ `HF_TOKEN` ์ถ๊ฐ ํ์
|
|
|
|
| 8 |
import spaces
|
| 9 |
import torch
|
| 10 |
from PIL import Image
|
| 11 |
+
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, AutoTokenizer, AutoModelForCausalLM
|
| 12 |
from qwen_vl_utils import process_vision_info
|
| 13 |
from huggingface_hub import login
|
| 14 |
|
|
|
|
| 17 |
if HF_TOKEN:
|
| 18 |
login(token=HF_TOKEN.strip())
|
| 19 |
|
| 20 |
+
# OCR ๋ชจ๋ธ ID (ํ์ง ์ฐ์ )
|
| 21 |
+
OCR_MODEL_ID = "Qwen/Qwen2.5-VL-3B-Instruct"
|
| 22 |
|
| 23 |
# ์ฝ ์ ๋ณด ๋ถ์ ๋ชจ๋ธ ID (์๋ฃ ์ ๋ฌธ)
|
| 24 |
MED_MODEL_ID = "google/medgemma-4b-it"
|
|
|
|
| 34 |
global OCR_MODEL, OCR_PROCESSOR, MED_MODEL, MED_TOKENIZER
|
| 35 |
|
| 36 |
if OCR_MODEL is None:
|
| 37 |
+
print("๐ Loading Qwen2.5-VL-3B for OCR (8bit quantization)...")
|
| 38 |
+
OCR_MODEL = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 39 |
OCR_MODEL_ID,
|
| 40 |
+
torch_dtype="auto",
|
| 41 |
+
device_map="auto",
|
| 42 |
+
load_in_8bit=True
|
| 43 |
)
|
| 44 |
OCR_PROCESSOR = AutoProcessor.from_pretrained(OCR_MODEL_ID)
|
| 45 |
print("โ
OCR model loaded!")
|
| 46 |
|
| 47 |
if MED_MODEL is None:
|
| 48 |
+
print("๐ Loading MedGemma-4B for medical analysis (8bit quantization)...")
|
| 49 |
MED_MODEL = AutoModelForCausalLM.from_pretrained(
|
| 50 |
MED_MODEL_ID,
|
| 51 |
torch_dtype=torch.bfloat16,
|
| 52 |
+
device_map="auto",
|
| 53 |
+
load_in_8bit=True
|
| 54 |
)
|
| 55 |
MED_TOKENIZER = AutoTokenizer.from_pretrained(MED_MODEL_ID)
|
| 56 |
print("โ
Medical model loaded!")
|
|
|
|
| 398 |
- AI๊ฐ ์์ฑํ ์ ๋ณด์ด๋ฏ๋ก ์ ํํ์ง ์์ ์ ์์ต๋๋ค
|
| 399 |
|
| 400 |
**๐ค ๊ธฐ์ ์คํ**
|
| 401 |
+
- Qwen2.5-VL-3B-Instruct (8bit ์์ํ, ๊ณ ํ์ง OCR)
|
| 402 |
+
- Google MedGemma-4B-IT (8bit ์์ํ, ์๋ฃ ์ ๋ฌธ ๋ชจ๋ธ)
|
| 403 |
|
| 404 |
**๐ ์ค์ ๋ฐฉ๋ฒ**
|
| 405 |
- Hugging Face Spaces์ Settings โ Repository secrets์์ `HF_TOKEN` ์ถ๊ฐ ํ์
|
requirements.txt
CHANGED
|
@@ -7,3 +7,4 @@ numpy
|
|
| 7 |
qwen-vl-utils
|
| 8 |
accelerate
|
| 9 |
huggingface_hub
|
|
|
|
|
|
| 7 |
qwen-vl-utils
|
| 8 |
accelerate
|
| 9 |
huggingface_hub
|
| 10 |
+
bitsandbytes
|