guychuk commited on
Commit
cde4f1b
Β·
verified Β·
1 Parent(s): 1d32d95

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -16
app.py CHANGED
@@ -7,18 +7,39 @@ from transformers import AutoTokenizer, AutoConfig
7
 
8
 
9
  # ---------------------------------------------------------
10
- # Helper
11
  # ---------------------------------------------------------
12
  def softmax(x):
13
  e = np.exp(x - np.max(x))
14
  return e / e.sum()
15
 
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  # ---------------------------------------------------------
18
- # Load ONNX models + tokenizers + configs (for labels)
19
  # ---------------------------------------------------------
20
 
21
- # --- Model 1: Multilingual DistilBERT Sentiment ---
22
  multilingual_onnx_path = hf_hub_download(
23
  repo_id="lxyuan/distilbert-base-multilingual-cased-sentiments-student",
24
  filename="onnx/model.onnx"
@@ -44,7 +65,7 @@ labels_sdg = config_sdg.id2label
44
  session_sdg = ort.InferenceSession(sdg_onnx_path, providers=["CPUExecutionProvider"])
45
 
46
 
47
- # --- Model 3: German Sentiment BERT ---
48
  german_onnx_path = hf_hub_download(
49
  repo_id="oliverguhr/german-sentiment-bert",
50
  filename="onnx/model.onnx"
@@ -67,10 +88,22 @@ MEAN = [0.485, 0.456, 0.406]
67
  STD = [0.229, 0.224, 0.225]
68
 
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  # ---------------------------------------------------------
71
- # Inference functions
72
  # ---------------------------------------------------------
73
-
74
  def run_multilingual(text):
75
  inputs = tokenizer_multilingual(text, return_tensors="np", truncation=True, padding=True)
76
  inputs = {k: v.astype(np.int64) for k, v in inputs.items()}
@@ -112,10 +145,13 @@ def run_vit(image):
112
  return {f"class_{i}": float(probs[i]) for i in top5}
113
 
114
 
 
 
 
 
115
  # ---------------------------------------------------------
116
- # Unified inference router
117
  # ---------------------------------------------------------
118
-
119
  def inference(model_name, text, image):
120
  if model_name == "Multilingual Sentiment":
121
  return run_multilingual(text)
@@ -127,6 +163,8 @@ def inference(model_name, text, image):
127
  if image is None:
128
  return {"error": "Please upload an image."}
129
  return run_vit(image)
 
 
130
  else:
131
  return {"error": "Invalid model selected."}
132
 
@@ -134,24 +172,24 @@ def inference(model_name, text, image):
134
  # ---------------------------------------------------------
135
  # Gradio UI
136
  # ---------------------------------------------------------
137
-
138
  with gr.Blocks() as demo:
139
- gr.Markdown("# πŸ” Multi-Model ONNX Demo (Loaded from Hugging Face Hub)")
140
- gr.Markdown("Text + Image models, running entirely with **ONNX Runtime CPU**")
141
 
142
  model_selector = gr.Dropdown(
143
  [
144
  "Multilingual Sentiment",
145
  "SDG Classification",
146
  "German Sentiment",
147
- "ViT Image Classification"
 
148
  ],
149
- label="Choose a model"
150
  )
151
 
152
- text_input = gr.Textbox(lines=3, label="Text Input")
153
- image_input = gr.Image(type="pil", label="Image Input")
154
- output_box = gr.JSON(label="Model Output")
155
 
156
  run_button = gr.Button("Run")
157
 
 
7
 
8
 
9
  # ---------------------------------------------------------
10
+ # Helper functions
11
  # ---------------------------------------------------------
12
  def softmax(x):
13
  e = np.exp(x - np.max(x))
14
  return e / e.sum()
15
 
16
 
17
+ def greedy_decode_onnx(session, tokenizer, prompt, max_new_tokens=64):
18
+ """
19
+ Minimal greedy decoding loop for decoder-only ONNX models that:
20
+ - Take input_ids
21
+ - Return logits for the last position
22
+ """
23
+ # Encode prompt
24
+ ids = tokenizer(prompt, return_tensors="np")["input_ids"].astype(np.int64)
25
+
26
+ for _ in range(max_new_tokens):
27
+ ort_inputs = {"input_ids": ids}
28
+ logits = session.run(None, ort_inputs)[0] # shape: [batch, seq, vocab]
29
+ next_token_logits = logits[:, -1, :] # last position
30
+ next_token = int(np.argmax(next_token_logits, axis=-1)[0])
31
+ ids = np.concatenate([ids, [[next_token]]], axis=1)
32
+ if next_token in tokenizer.eos_token_id or next_token == tokenizer.eos_token_id:
33
+ break
34
+
35
+ return tokenizer.decode(ids[0], skip_special_tokens=True)
36
+
37
+
38
  # ---------------------------------------------------------
39
+ # Load ONNX models + tokenizers + configs
40
  # ---------------------------------------------------------
41
 
42
+ # --- Model 1: Multilingual DistilBERT ---
43
  multilingual_onnx_path = hf_hub_download(
44
  repo_id="lxyuan/distilbert-base-multilingual-cased-sentiments-student",
45
  filename="onnx/model.onnx"
 
65
  session_sdg = ort.InferenceSession(sdg_onnx_path, providers=["CPUExecutionProvider"])
66
 
67
 
68
+ # --- Model 3: German Sentiment ---
69
  german_onnx_path = hf_hub_download(
70
  repo_id="oliverguhr/german-sentiment-bert",
71
  filename="onnx/model.onnx"
 
88
  STD = [0.229, 0.224, 0.225]
89
 
90
 
91
+ # --- Model 5: DeepSeek Coder (PR #8) ---
92
+ ds_onnx_path = hf_hub_download(
93
+ repo_id="deepseek-ai/deepseek-coder-1.3b-base",
94
+ filename="model.onnx", # you said this exists ― so we trust you :)
95
+ revision="refs/pr/8"
96
+ )
97
+ tokenizer_ds = AutoTokenizer.from_pretrained(
98
+ "deepseek-ai/deepseek-coder-1.3b-base",
99
+ revision="refs/pr/8"
100
+ )
101
+ session_ds = ort.InferenceSession(ds_onnx_path, providers=["CPUExecutionProvider"])
102
+
103
+
104
  # ---------------------------------------------------------
105
+ # Inference functions for classification models
106
  # ---------------------------------------------------------
 
107
  def run_multilingual(text):
108
  inputs = tokenizer_multilingual(text, return_tensors="np", truncation=True, padding=True)
109
  inputs = {k: v.astype(np.int64) for k, v in inputs.items()}
 
145
  return {f"class_{i}": float(probs[i]) for i in top5}
146
 
147
 
148
+ def run_deepseek(prompt):
149
+ return greedy_decode_onnx(session_ds, tokenizer_ds, prompt, max_new_tokens=64)
150
+
151
+
152
  # ---------------------------------------------------------
153
+ # Unified model router
154
  # ---------------------------------------------------------
 
155
  def inference(model_name, text, image):
156
  if model_name == "Multilingual Sentiment":
157
  return run_multilingual(text)
 
163
  if image is None:
164
  return {"error": "Please upload an image."}
165
  return run_vit(image)
166
+ elif model_name == "DeepSeek Coder":
167
+ return {"generated_text": run_deepseek(text)}
168
  else:
169
  return {"error": "Invalid model selected."}
170
 
 
172
  # ---------------------------------------------------------
173
  # Gradio UI
174
  # ---------------------------------------------------------
 
175
  with gr.Blocks() as demo:
176
+ gr.Markdown("# πŸ” Multi-Model ONNX Inference Demo")
177
+ gr.Markdown("All models downloaded directly from the Hugging Face Hub via `hf_hub_download`.")
178
 
179
  model_selector = gr.Dropdown(
180
  [
181
  "Multilingual Sentiment",
182
  "SDG Classification",
183
  "German Sentiment",
184
+ "ViT Image Classification",
185
+ "DeepSeek Coder"
186
  ],
187
+ label="Choose a Model"
188
  )
189
 
190
+ text_input = gr.Textbox(lines=3, label="Text Prompt / Input")
191
+ image_input = gr.Image(type="pil", label="Image Input (for ViT)", visible=True)
192
+ output_box = gr.JSON(label="Output")
193
 
194
  run_button = gr.Button("Run")
195