Spaces:

OzLabs
/

pr-demo

Runtime error

App Files Files Community

guychuk commited on 21 days ago

Commit

cde4f1b

verified ·

1 Parent(s): 1d32d95

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -16

app.py CHANGED Viewed

@@ -7,18 +7,39 @@ from transformers import AutoTokenizer, AutoConfig
 # ---------------------------------------------------------
-# Helper
 # ---------------------------------------------------------
 def softmax(x):
     e = np.exp(x - np.max(x))
     return e / e.sum()
 # ---------------------------------------------------------
-# Load ONNX models + tokenizers + configs (for labels)
 # ---------------------------------------------------------
-# --- Model 1: Multilingual DistilBERT Sentiment ---
 multilingual_onnx_path = hf_hub_download(
     repo_id="lxyuan/distilbert-base-multilingual-cased-sentiments-student",
     filename="onnx/model.onnx"
@@ -44,7 +65,7 @@ labels_sdg = config_sdg.id2label
 session_sdg = ort.InferenceSession(sdg_onnx_path, providers=["CPUExecutionProvider"])
-# --- Model 3: German Sentiment BERT ---
 german_onnx_path = hf_hub_download(
     repo_id="oliverguhr/german-sentiment-bert",
     filename="onnx/model.onnx"
@@ -67,10 +88,22 @@ MEAN = [0.485, 0.456, 0.406]
 STD = [0.229, 0.224, 0.225]
 # ---------------------------------------------------------
-# Inference functions
 # ---------------------------------------------------------
 def run_multilingual(text):
     inputs = tokenizer_multilingual(text, return_tensors="np", truncation=True, padding=True)
     inputs = {k: v.astype(np.int64) for k, v in inputs.items()}
@@ -112,10 +145,13 @@ def run_vit(image):
     return {f"class_{i}": float(probs[i]) for i in top5}
 # ---------------------------------------------------------
-# Unified inference router
 # ---------------------------------------------------------
 def inference(model_name, text, image):
     if model_name == "Multilingual Sentiment":
         return run_multilingual(text)
@@ -127,6 +163,8 @@ def inference(model_name, text, image):
         if image is None:
             return {"error": "Please upload an image."}
         return run_vit(image)
     else:
         return {"error": "Invalid model selected."}
@@ -134,24 +172,24 @@ def inference(model_name, text, image):
 # ---------------------------------------------------------
 # Gradio UI
 # ---------------------------------------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("# 🔍 Multi-Model ONNX Demo (Loaded from Hugging Face Hub)")
-    gr.Markdown("Text + Image models, running entirely with **ONNX Runtime CPU**")
     model_selector = gr.Dropdown(
         [
             "Multilingual Sentiment",
             "SDG Classification",
             "German Sentiment",
-            "ViT Image Classification"
         ],
-        label="Choose a model"
     )
-    text_input = gr.Textbox(lines=3, label="Text Input")
-    image_input = gr.Image(type="pil", label="Image Input")
-    output_box = gr.JSON(label="Model Output")
     run_button = gr.Button("Run")

 # ---------------------------------------------------------
+# Helper functions
 # ---------------------------------------------------------
 def softmax(x):
     e = np.exp(x - np.max(x))
     return e / e.sum()
+def greedy_decode_onnx(session, tokenizer, prompt, max_new_tokens=64):
+    """
+    Minimal greedy decoding loop for decoder-only ONNX models that:
+    - Take input_ids
+    - Return logits for the last position
+    """
+    # Encode prompt
+    ids = tokenizer(prompt, return_tensors="np")["input_ids"].astype(np.int64)
+    for _ in range(max_new_tokens):
+        ort_inputs = {"input_ids": ids}
+        logits = session.run(None, ort_inputs)[0]   # shape: [batch, seq, vocab]
+        next_token_logits = logits[:, -1, :]        # last position
+        next_token = int(np.argmax(next_token_logits, axis=-1)[0])
+        ids = np.concatenate([ids, [[next_token]]], axis=1)
+        if next_token in tokenizer.eos_token_id or next_token == tokenizer.eos_token_id:
+            break
+    return tokenizer.decode(ids[0], skip_special_tokens=True)
 # ---------------------------------------------------------
+# Load ONNX models + tokenizers + configs
 # ---------------------------------------------------------
+# --- Model 1: Multilingual DistilBERT ---
 multilingual_onnx_path = hf_hub_download(
     repo_id="lxyuan/distilbert-base-multilingual-cased-sentiments-student",
     filename="onnx/model.onnx"
 session_sdg = ort.InferenceSession(sdg_onnx_path, providers=["CPUExecutionProvider"])
+# --- Model 3: German Sentiment ---
 german_onnx_path = hf_hub_download(
     repo_id="oliverguhr/german-sentiment-bert",
     filename="onnx/model.onnx"
 STD = [0.229, 0.224, 0.225]
+# --- Model 5: DeepSeek Coder (PR #8) ---
+ds_onnx_path = hf_hub_download(
+    repo_id="deepseek-ai/deepseek-coder-1.3b-base",
+    filename="model.onnx",            # you said this exists ― so we trust you :)
+    revision="refs/pr/8"
+)
+tokenizer_ds = AutoTokenizer.from_pretrained(
+    "deepseek-ai/deepseek-coder-1.3b-base",
+    revision="refs/pr/8"
+)
+session_ds = ort.InferenceSession(ds_onnx_path, providers=["CPUExecutionProvider"])
 # ---------------------------------------------------------
+# Inference functions for classification models
 # ---------------------------------------------------------
 def run_multilingual(text):
     inputs = tokenizer_multilingual(text, return_tensors="np", truncation=True, padding=True)
     inputs = {k: v.astype(np.int64) for k, v in inputs.items()}
     return {f"class_{i}": float(probs[i]) for i in top5}
+def run_deepseek(prompt):
+    return greedy_decode_onnx(session_ds, tokenizer_ds, prompt, max_new_tokens=64)
 # ---------------------------------------------------------
+# Unified model router
 # ---------------------------------------------------------
 def inference(model_name, text, image):
     if model_name == "Multilingual Sentiment":
         return run_multilingual(text)
         if image is None:
             return {"error": "Please upload an image."}
         return run_vit(image)
+    elif model_name == "DeepSeek Coder":
+        return {"generated_text": run_deepseek(text)}
     else:
         return {"error": "Invalid model selected."}
 # ---------------------------------------------------------
 # Gradio UI
 # ---------------------------------------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("# 🔍 Multi-Model ONNX Inference Demo")
+    gr.Markdown("All models downloaded directly from the Hugging Face Hub via `hf_hub_download`.")
     model_selector = gr.Dropdown(
         [
             "Multilingual Sentiment",
             "SDG Classification",
             "German Sentiment",
+            "ViT Image Classification",
+            "DeepSeek Coder"
         ],
+        label="Choose a Model"
     )
+    text_input = gr.Textbox(lines=3, label="Text Prompt / Input")
+    image_input = gr.Image(type="pil", label="Image Input (for ViT)", visible=True)
+    output_box = gr.JSON(label="Output")
     run_button = gr.Button("Run")