fredaddy
/

MiniCPM-v-2_6

Image-Text-to-Text

feature-extraction

Model card Files Files and versions

fredaddy commited on Sep 16, 2024

Commit

b1cc8b6

·

verified ·

1 Parent(s): acc0847

Update handler.py

Files changed (1) hide show

handler.py +45 -1

handler.py CHANGED Viewed

	@@ -1 +1,45 @@
1	- #Handler.py file needed

+#Handler.py file needed
+from PIL import Image
+import torch
+from transformers import AutoProcessor, AutoModelForVision2Seq
+class ModelHandler:
+    def __init__(self):
+        self.model = None
+        self.processor = None
+    def initialize(self, model_dir):
+        # Load the processor and model
+        self.processor = AutoProcessor.from_pretrained(model_dir)
+        self.model = AutoModelForVision2Seq.from_pretrained(model_dir)
+    def preprocess(self, inputs):
+        # Process the input image
+        image = Image.open(inputs["image"].file)
+        pixel_values = self.processor(images=image, return_tensors="pt").pixel_values
+        # Process the text context (if provided)
+        text_context = inputs.get("text_context", "")
+        if text_context:
+            context_inputs = self.processor(text=text_context, return_tensors="pt").input_ids
+        else:
+            context_inputs = None
+        return pixel_values, context_inputs
+    def inference(self, pixel_values, context_inputs=None):
+        # Run inference on the image with or without text context
+        with torch.no_grad():
+            if context_inputs is not None:
+                outputs = self.model.generate(pixel_values, input_ids=context_inputs)
+            else:
+                outputs = self.model.generate(pixel_values)
+        return outputs
+    def postprocess(self, outputs):
+        # Decode the output to text
+        decoded_text = self.processor.batch_decode(outputs, skip_special_tokens=True)
+        return {"digitized_text": decoded_text[0]}
+service = ModelHandler()