guychuk's picture
Update app.py
a6de9cf verified
import gradio as gr
from transformers import AutoImageProcessor, AutoTokenizer
from optimum.onnxruntime import ORTModelForVision2Seq
from PIL import Image
# Load model, tokenizer, and processor
processor = AutoImageProcessor.from_pretrained("WinKawaks/vit-small-patch16-224")
tokenizer = AutoTokenizer.from_pretrained("WinKawaks/vit-small-patch16-224")
model = ORTModelForVision2Seq.from_pretrained(
"WinKawaks/vit-small-patch16-224",
export=True,
from_transformers=False
)
def run(image):
if image is None:
return "No image provided."
# Preprocess
inputs = processor(images=image, return_tensors="pt")
# Generate
outputs = model.generate(**inputs, max_length=64)
# Decode
text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return text
# --- Gradio UI ---
demo = gr.Interface(
fn=run,
inputs=gr.Image(type="pil"),
outputs="text",
title="ViT Vision2Seq ONNX Demo",
description="Upload an image → get generated text from WinKawaks/vit-small-patch16-224 (ONNX)."
)
demo.launch()