Spaces:

Sebukpor
/

finetuned_llama_3.2_11B

Runtime error

App Files Files Community

Sebukpor commited on Feb 4

Commit

94d8d44

verified ·

1 Parent(s): f096b1c

Create app.py

Browse files

Files changed (1) hide show

app.py +81 -0

app.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import gradio as gr
+import torch
+from PIL import Image
+from unsloth import FastVisionModel
+from peft import PeftModel
+# -----------------------------------------------------------------------------
+# 1. Load Model Correctly (No Double Patching)
+# -----------------------------------------------------------------------------
+# Load model with 4-bit quantization
+model, tokenizer = FastVisionModel.from_pretrained(
+    "unsloth/Llama-3.2-11B-Vision-Instruct",
+    load_in_4bit = True,
+    device_map = "auto",
+)
+# -----------------------------------------------------------------------------
+# 2. Load Your Fine-Tuned Adapter CORRECTLY
+# -----------------------------------------------------------------------------
+# Loading adapter WITHOUT get_peft_model()
+model = PeftModel.from_pretrained(model, "/content/fine_tuned_model")
+model = model.merge_and_unload()  # Merge adapters into base model
+model.to("cuda")
+model.eval()
+# -----------------------------------------------------------------------------
+# 3. Data preprocessing step
+# -----------------------------------------------------------------------------
+def analyze(image, user_prompt):
+    if image.mode != "RGB":
+        image = image.convert("RGB")
+    messages = [
+        {"role": "user", "content": [
+            {"type": "image", "image": image},
+            {"type": "text", "text": user_prompt}
+        ]}
+    ]
+    input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
+    inputs = tokenizer(
+        image,
+        input_text,
+        return_tensors = "pt",
+        add_special_tokens = False,
+    ).to("cuda")
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens = 512,
+            use_cache = True,
+            temperature = 1.0,
+            min_p = 0.1,
+        )
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
+# -----------------------------------------------------------------------------
+# 4. Launch Interface
+# -----------------------------------------------------------------------------
+gr.Interface(
+    fn=analyze,
+    inputs=[
+        gr.Image(type="pil", label="Upload Medical Scan"),
+        gr.Textbox(
+            placeholder="Example: 'Describe any abnormalities in this chest X-ray'",
+            label="Your Question",
+            lines=2
+        )
+    ],
+    outputs=gr.Textbox(label="Expert Analysis"),
+    title=" DAS medhub Radiology AI Assistant (Fine-Tuned)",
+    description="Upload a medical image and ask questions about it"
+).launch(server_port=7860, debug=False)