LLDDWW Claude commited on
Commit
7fabc42
ยท
1 Parent(s): e96841e

fix: upgrade to Qwen2.5-VL-3B with 8bit quantization

Browse files

- Replace Qwen2-VL-2B with Qwen2.5-VL-3B for better OCR quality
- Apply 8bit quantization to both models for faster inference
- Add bitsandbytes dependency for quantization support
- Better accuracy with optimized speed

๐Ÿค– Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <[email protected]>

Files changed (2) hide show
  1. app.py +13 -11
  2. requirements.txt +1 -0
app.py CHANGED
@@ -8,7 +8,7 @@ import gradio as gr
8
  import spaces
9
  import torch
10
  from PIL import Image
11
- from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, AutoTokenizer, AutoModelForCausalLM
12
  from qwen_vl_utils import process_vision_info
13
  from huggingface_hub import login
14
 
@@ -17,8 +17,8 @@ HF_TOKEN = os.getenv("HF_TOKEN")
17
  if HF_TOKEN:
18
  login(token=HF_TOKEN.strip())
19
 
20
- # OCR ๋ชจ๋ธ ID (๋” ๋น ๋ฅธ ์ถ”๋ก ์„ ์œ„ํ•ด 2B ๋ชจ๋ธ ์‚ฌ์šฉ)
21
- OCR_MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct"
22
 
23
  # ์•ฝ ์ •๋ณด ๋ถ„์„ ๋ชจ๋ธ ID (์˜๋ฃŒ ์ „๋ฌธ)
24
  MED_MODEL_ID = "google/medgemma-4b-it"
@@ -34,21 +34,23 @@ def load_models():
34
  global OCR_MODEL, OCR_PROCESSOR, MED_MODEL, MED_TOKENIZER
35
 
36
  if OCR_MODEL is None:
37
- print("๐Ÿ”„ Loading Qwen2-VL-2B for OCR...")
38
- OCR_MODEL = Qwen2VLForConditionalGeneration.from_pretrained(
39
  OCR_MODEL_ID,
40
- torch_dtype=torch.bfloat16,
41
- device_map="auto"
 
42
  )
43
  OCR_PROCESSOR = AutoProcessor.from_pretrained(OCR_MODEL_ID)
44
  print("โœ… OCR model loaded!")
45
 
46
  if MED_MODEL is None:
47
- print("๐Ÿ”„ Loading MedGemma-4B for medical analysis...")
48
  MED_MODEL = AutoModelForCausalLM.from_pretrained(
49
  MED_MODEL_ID,
50
  torch_dtype=torch.bfloat16,
51
- device_map="auto"
 
52
  )
53
  MED_TOKENIZER = AutoTokenizer.from_pretrained(MED_MODEL_ID)
54
  print("โœ… Medical model loaded!")
@@ -396,8 +398,8 @@ with gr.Blocks(theme=gr.themes.Soft(), css=CUSTOM_CSS) as demo:
396
  - AI๊ฐ€ ์ƒ์„ฑํ•œ ์ •๋ณด์ด๋ฏ€๋กœ ์ •ํ™•ํ•˜์ง€ ์•Š์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค
397
 
398
  **๐Ÿค– ๊ธฐ์ˆ  ์Šคํƒ**
399
- - Qwen2-VL-2B-Instruct (๋น ๋ฅธ OCR ํ…์ŠคํŠธ ์ถ”์ถœ)
400
- - Google MedGemma-4B-IT (์˜๋ฃŒ ์ „๋ฌธ ๋ชจ๋ธ - ์•ฝ ์ •๋ณด ๋ถ„์„ ๋ฐ ์„ค๋ช…)
401
 
402
  **๐Ÿ”‘ ์„ค์ • ๋ฐฉ๋ฒ•**
403
  - Hugging Face Spaces์˜ Settings โ†’ Repository secrets์—์„œ `HF_TOKEN` ์ถ”๊ฐ€ ํ•„์š”
 
8
  import spaces
9
  import torch
10
  from PIL import Image
11
+ from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, AutoTokenizer, AutoModelForCausalLM
12
  from qwen_vl_utils import process_vision_info
13
  from huggingface_hub import login
14
 
 
17
  if HF_TOKEN:
18
  login(token=HF_TOKEN.strip())
19
 
20
+ # OCR ๋ชจ๋ธ ID (ํ’ˆ์งˆ ์šฐ์„ )
21
+ OCR_MODEL_ID = "Qwen/Qwen2.5-VL-3B-Instruct"
22
 
23
  # ์•ฝ ์ •๋ณด ๋ถ„์„ ๋ชจ๋ธ ID (์˜๋ฃŒ ์ „๋ฌธ)
24
  MED_MODEL_ID = "google/medgemma-4b-it"
 
34
  global OCR_MODEL, OCR_PROCESSOR, MED_MODEL, MED_TOKENIZER
35
 
36
  if OCR_MODEL is None:
37
+ print("๐Ÿ”„ Loading Qwen2.5-VL-3B for OCR (8bit quantization)...")
38
+ OCR_MODEL = Qwen2_5_VLForConditionalGeneration.from_pretrained(
39
  OCR_MODEL_ID,
40
+ torch_dtype="auto",
41
+ device_map="auto",
42
+ load_in_8bit=True
43
  )
44
  OCR_PROCESSOR = AutoProcessor.from_pretrained(OCR_MODEL_ID)
45
  print("โœ… OCR model loaded!")
46
 
47
  if MED_MODEL is None:
48
+ print("๐Ÿ”„ Loading MedGemma-4B for medical analysis (8bit quantization)...")
49
  MED_MODEL = AutoModelForCausalLM.from_pretrained(
50
  MED_MODEL_ID,
51
  torch_dtype=torch.bfloat16,
52
+ device_map="auto",
53
+ load_in_8bit=True
54
  )
55
  MED_TOKENIZER = AutoTokenizer.from_pretrained(MED_MODEL_ID)
56
  print("โœ… Medical model loaded!")
 
398
  - AI๊ฐ€ ์ƒ์„ฑํ•œ ์ •๋ณด์ด๋ฏ€๋กœ ์ •ํ™•ํ•˜์ง€ ์•Š์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค
399
 
400
  **๐Ÿค– ๊ธฐ์ˆ  ์Šคํƒ**
401
+ - Qwen2.5-VL-3B-Instruct (8bit ์–‘์žํ™”, ๊ณ ํ’ˆ์งˆ OCR)
402
+ - Google MedGemma-4B-IT (8bit ์–‘์žํ™”, ์˜๋ฃŒ ์ „๋ฌธ ๋ชจ๋ธ)
403
 
404
  **๐Ÿ”‘ ์„ค์ • ๋ฐฉ๋ฒ•**
405
  - Hugging Face Spaces์˜ Settings โ†’ Repository secrets์—์„œ `HF_TOKEN` ์ถ”๊ฐ€ ํ•„์š”
requirements.txt CHANGED
@@ -7,3 +7,4 @@ numpy
7
  qwen-vl-utils
8
  accelerate
9
  huggingface_hub
 
 
7
  qwen-vl-utils
8
  accelerate
9
  huggingface_hub
10
+ bitsandbytes