File size: 1,070 Bytes
710b591
9377eb2
 
 
710b591
9377eb2
 
 
 
 
a6de9cf
 
9377eb2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import gradio as gr
from transformers import AutoImageProcessor, AutoTokenizer
from optimum.onnxruntime import ORTModelForVision2Seq
from PIL import Image

# Load model, tokenizer, and processor
processor = AutoImageProcessor.from_pretrained("WinKawaks/vit-small-patch16-224")
tokenizer = AutoTokenizer.from_pretrained("WinKawaks/vit-small-patch16-224")
model = ORTModelForVision2Seq.from_pretrained(
    "WinKawaks/vit-small-patch16-224", 
    export=True,
    from_transformers=False
)

def run(image):
    if image is None:
        return "No image provided."

    # Preprocess
    inputs = processor(images=image, return_tensors="pt")

    # Generate
    outputs = model.generate(**inputs, max_length=64)

    # Decode
    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return text


# --- Gradio UI ---
demo = gr.Interface(
    fn=run,
    inputs=gr.Image(type="pil"),
    outputs="text",
    title="ViT Vision2Seq ONNX Demo",
    description="Upload an image → get generated text from WinKawaks/vit-small-patch16-224 (ONNX)."
)

demo.launch()