Spaces:

om440
/

handwritten-to-text-fine-tunned

Runtime error

App Files Files Community

om440 commited on Aug 4

Commit

ecadc68

verified ·

1 Parent(s): e27bd4a

Create app.py

Browse files

Files changed (1) hide show

app.py +63 -0

app.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from transformers import MllamaForConditionalGeneration, AutoProcessor
+from PIL import Image
+import torch
+import gradio as gr
+import spaces
+import bitsandbytes as bnb  # Assure-toi que c'est installé
+# Initialize model and processor
+ckpt = "unsloth/Llama-3.2-11B-Vision-Instruct"
+model = MllamaForConditionalGeneration.from_pretrained(
+    ckpt,
+    load_in_4bit=True,                   # <-- activation quantization 4-bit
+    device_map="auto",                   # déploie automatiquement sur GPU
+    torch_dtype=torch.float16,           # dtype compatible avec 4-bit
+    quantization_config=bnb.QuantizationConfig(
+        load_in_4bit=True,
+        bnb_4bit_use_double_quant=True,
+        bnb_4bit_quant_type="nf4",      # ou "fp4"
+        bnb_4bit_compute_dtype=torch.float16
+    )
+)
+processor = AutoProcessor.from_pretrained(ckpt)
+@spaces.GPU
+def extract_text(image):
+    image = Image.open(image).convert("RGB")
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output"},
+                {"type": "image"}
+            ]
+        }
+    ]
+    texts = processor.apply_chat_template(messages, add_generation_prompt=True)
+    inputs = processor(text=texts, images=[image], return_tensors="pt").to("cuda")
+    outputs = model.generate(**inputs, max_new_tokens=250)
+    result = processor.decode(outputs[0], skip_special_tokens=True)
+    print(result)
+    if "assistant" in result.lower():
+        result = result[result.lower().find("assistant") + len("assistant"):].strip()
+    result = result.replace("user", "").replace("Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output", "").strip()
+    print(result)
+    return result
+demo = gr.Interface(
+    fn=extract_text,
+    inputs=gr.Image(type="filepath", label="Upload Image"),
+    outputs=gr.Textbox(label="Extracted Text"),
+    title="Handwritten Text Extractor",
+    description="Upload an image containing handwritten text to extract its content.",
+)
+demo.launch(debug=True)