om440 commited on
Commit
ecadc68
·
verified ·
1 Parent(s): e27bd4a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import MllamaForConditionalGeneration, AutoProcessor
2
+ from PIL import Image
3
+ import torch
4
+ import gradio as gr
5
+ import spaces
6
+ import bitsandbytes as bnb # Assure-toi que c'est installé
7
+
8
+ # Initialize model and processor
9
+ ckpt = "unsloth/Llama-3.2-11B-Vision-Instruct"
10
+ model = MllamaForConditionalGeneration.from_pretrained(
11
+ ckpt,
12
+ load_in_4bit=True, # <-- activation quantization 4-bit
13
+ device_map="auto", # déploie automatiquement sur GPU
14
+ torch_dtype=torch.float16, # dtype compatible avec 4-bit
15
+ quantization_config=bnb.QuantizationConfig(
16
+ load_in_4bit=True,
17
+ bnb_4bit_use_double_quant=True,
18
+ bnb_4bit_quant_type="nf4", # ou "fp4"
19
+ bnb_4bit_compute_dtype=torch.float16
20
+ )
21
+ )
22
+ processor = AutoProcessor.from_pretrained(ckpt)
23
+
24
+ @spaces.GPU
25
+ def extract_text(image):
26
+ image = Image.open(image).convert("RGB")
27
+
28
+ messages = [
29
+ {
30
+ "role": "user",
31
+ "content": [
32
+ {"type": "text", "text": "Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output"},
33
+ {"type": "image"}
34
+ ]
35
+ }
36
+ ]
37
+
38
+ texts = processor.apply_chat_template(messages, add_generation_prompt=True)
39
+ inputs = processor(text=texts, images=[image], return_tensors="pt").to("cuda")
40
+
41
+ outputs = model.generate(**inputs, max_new_tokens=250)
42
+ result = processor.decode(outputs[0], skip_special_tokens=True)
43
+
44
+ print(result)
45
+
46
+ if "assistant" in result.lower():
47
+ result = result[result.lower().find("assistant") + len("assistant"):].strip()
48
+
49
+ result = result.replace("user", "").replace("Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output", "").strip()
50
+
51
+ print(result)
52
+
53
+ return result
54
+
55
+ demo = gr.Interface(
56
+ fn=extract_text,
57
+ inputs=gr.Image(type="filepath", label="Upload Image"),
58
+ outputs=gr.Textbox(label="Extracted Text"),
59
+ title="Handwritten Text Extractor",
60
+ description="Upload an image containing handwritten text to extract its content.",
61
+ )
62
+
63
+ demo.launch(debug=True)