DermaVLM
/

DermatoLlama-200k

Model card Files Files and versions

abdurrahimyilmaz commited on 3 days ago

Commit

7717955

·

verified ·

1 Parent(s): 256cbad

Update README.md

Files changed (1) hide show

README.md +24 -9

README.md CHANGED Viewed

@@ -27,25 +27,32 @@ GPU: 1xRTX4090 <br>
 ## Usage
 ```python
 from transformers import MllamaForConditionalGeneration, AutoProcessor
 from peft import PeftModel
 from PIL import Image
 # Load base model
-base_model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
-model = MllamaForConditionalGeneration.from_pretrained(base_model_name)
 processor = AutoProcessor.from_pretrained(base_model_name)
 # Load LoRA adapter
-adapter_path = "DermaVLM/DermatoLLama-200k"
 model = PeftModel.from_pretrained(model, adapter_path)
-# Inference
-image_path = "DERM12345.jpg"
-image = Image.open(image_path).convert("RGB")
-prompt_text = "Describe the image in detail."
 messages = []
 content_list = []
 if image:
     content_list.append({"type": "image"})
@@ -59,7 +66,7 @@ input_text = processor.apply_chat_template(
     tokenize=False,
 )
-# Prepare final inputs
 inputs = processor(
     images=image,
     text=input_text,
@@ -68,7 +75,7 @@ inputs = processor(
 ).to(model.device)
 generation_config = {
-    "max_new_tokens": 256,
     "do_sample": True,
     "temperature": 0.4,
     "top_p": 0.95,
@@ -76,6 +83,10 @@ generation_config = {
 input_length = inputs.input_ids.shape[1]
 with torch.no_grad():
     outputs = model.generate(
         **inputs,
@@ -89,7 +100,11 @@ with torch.no_grad():
     generated_tokens = outputs[0][input_length:]
     raw_output = processor.decode(generated_tokens, skip_special_tokens=True)
 print(raw_output)
 ```
 ## Citation

 ## Usage
 ```python
+# %%
 from transformers import MllamaForConditionalGeneration, AutoProcessor
 from peft import PeftModel
+import torch
 from PIL import Image
 # Load base model
+base_model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
+model = MllamaForConditionalGeneration.from_pretrained(
+    base_model_name, torch_dtype=torch.bfloat16, device_map="auto"
+)
 processor = AutoProcessor.from_pretrained(base_model_name)
 # Load LoRA adapter
+adapter_path = "DermaVLM/DermatoLLama-full"
 model = PeftModel.from_pretrained(model, adapter_path)
+# %%
+# Load image using Pillow
+image_path = rf"IMAGE_LOCATION"  # Replace with your image path
+image = Image.open(image_path)
+prompt_text = "Analyze the dermatological condition shown in the image and provide a detailed report including body location."
 messages = []
 content_list = []
+# Add the image to the content
 if image:
     content_list.append({"type": "image"})
     tokenize=False,
 )
+# Prepare final inputs with the loaded image
 inputs = processor(
     images=image,
     text=input_text,
 ).to(model.device)
 generation_config = {
+    "max_new_tokens": 512, # be careful with this, it can cause very long inference times
     "do_sample": True,
     "temperature": 0.4,
     "top_p": 0.95,
 input_length = inputs.input_ids.shape[1]
+print(f"Processing image: {image_path}")
+print(f"Image size: {image.size}")
+print("Generating response...")
 with torch.no_grad():
     outputs = model.generate(
         **inputs,
     generated_tokens = outputs[0][input_length:]
     raw_output = processor.decode(generated_tokens, skip_special_tokens=True)
+print("\n" + "="*50)
+print("DERMATOLOGY ANALYSIS:")
+print("="*50)
 print(raw_output)
+print("="*50)
 ```
 ## Citation