YasirAhmad0810 commited on
Commit
f14a7b0
·
verified ·
1 Parent(s): 3435088

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +330 -19
index.html CHANGED
@@ -1,19 +1,330 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>Camera Interaction App</title>
7
+ <style>
8
+ body {
9
+ font-family: sans-serif;
10
+ display: flex;
11
+ flex-direction: column;
12
+ align-items: center;
13
+ gap: 20px;
14
+ padding: 20px;
15
+ background-color: #f0f0f0;
16
+ }
17
+ .controls,
18
+ .io-areas {
19
+ display: flex;
20
+ gap: 10px;
21
+ align-items: center;
22
+ background-color: #fff;
23
+ padding: 15px;
24
+ border-radius: 8px;
25
+ box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
26
+ }
27
+ .io-areas {
28
+ flex-direction: column;
29
+ align-items: stretch;
30
+ }
31
+ textarea {
32
+ width: 300px;
33
+ height: 80px;
34
+ padding: 8px;
35
+ border: 1px solid #ccc;
36
+ border-radius: 4px;
37
+ font-size: 14px;
38
+ }
39
+ #videoFeed {
40
+ display: block;
41
+ width: 100%;
42
+ height: 100%;
43
+ border-radius: 6px;
44
+ object-fit: cover;
45
+ }
46
+ #videoContainer {
47
+ position: relative;
48
+ width: 480px;
49
+ height: 360px;
50
+ border: 2px solid #333;
51
+ background-color: #000;
52
+ border-radius: 8px;
53
+ margin: 0 auto;
54
+ }
55
+ #loadingOverlay {
56
+ position: absolute;
57
+ top: 0;
58
+ left: 0;
59
+ width: 100%;
60
+ height: 100%;
61
+ display: none;
62
+ justify-content: center;
63
+ align-items: center;
64
+ background-color: rgba(0, 0, 0, 0.7);
65
+ z-index: 10;
66
+ border-radius: 6px;
67
+ color: #ffffff;
68
+ font-size: 1.5em;
69
+ font-weight: bold;
70
+ }
71
+ #startButton {
72
+ padding: 10px 20px;
73
+ font-size: 16px;
74
+ cursor: pointer;
75
+ border: none;
76
+ border-radius: 4px;
77
+ color: white;
78
+ }
79
+ #startButton.start {
80
+ background-color: #28a745; /* Green */
81
+ }
82
+ #startButton.stop {
83
+ background-color: #dc3545; /* Red */
84
+ }
85
+ label {
86
+ font-weight: bold;
87
+ }
88
+ select {
89
+ padding: 8px;
90
+ border-radius: 4px;
91
+ border: 1px solid #ccc;
92
+ }
93
+ .hidden {
94
+ display: none;
95
+ }
96
+ </style>
97
+ </head>
98
+ <body>
99
+ <h1>Camera Interaction App</h1>
100
+
101
+ <div id="videoContainer">
102
+ <video id="videoFeed" autoplay playsinline></video>
103
+ <div id="loadingOverlay">Loading...</div>
104
+ </div>
105
+ <canvas id="canvas" class="hidden"></canvas>
106
+ <!-- For capturing frames -->
107
+
108
+ <div class="io-areas">
109
+ <div>
110
+ <label for="instructionText">Instruction:</label><br />
111
+ <textarea
112
+ id="instructionText"
113
+ style="height: 2em; width: 40em"
114
+ name="Instruction"
115
+ ></textarea>
116
+ </div>
117
+ <div>
118
+ <label for="responseText">Response:</label><br />
119
+ <textarea
120
+ id="responseText"
121
+ style="height: 2em; width: 40em"
122
+ name="Response"
123
+ readonly
124
+ placeholder="Server response will appear here..."
125
+ ></textarea>
126
+ </div>
127
+ </div>
128
+
129
+ <div class="controls">
130
+ <label for="intervalSelect">Interval between 2 requests:</label>
131
+ <select id="intervalSelect" name="Interval between 2 requests">
132
+ <option value="0" selected>0ms</option>
133
+ <option value="100">100ms</option>
134
+ <option value="250">250ms</option>
135
+ <option value="500">500ms</option>
136
+ <option value="1000">1s</option>
137
+ <option value="2000">2s</option>
138
+ </select>
139
+ <button id="startButton" class="start">Start</button>
140
+ </div>
141
+
142
+ <script type="module">
143
+ import {
144
+ AutoProcessor,
145
+ AutoModelForVision2Seq,
146
+ RawImage,
147
+ } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers/dist/transformers.min.js";
148
+
149
+ const video = document.getElementById("videoFeed");
150
+ const canvas = document.getElementById("canvas");
151
+ const instructionText = document.getElementById("instructionText");
152
+ const responseText = document.getElementById("responseText");
153
+ const intervalSelect = document.getElementById("intervalSelect");
154
+ const startButton = document.getElementById("startButton");
155
+ const loadingOverlay = document.getElementById("loadingOverlay");
156
+
157
+ instructionText.value = "What do you see?"; // default instruction
158
+
159
+ let stream;
160
+ let isProcessing = false;
161
+
162
+ let processor, model;
163
+
164
+ async function initModel() {
165
+ const modelId = "HuggingFaceTB/SmolVLM-500M-Instruct"; // or "HuggingFaceTB/SmolVLM-Instruct";
166
+ loadingOverlay.style.display = "flex";
167
+ responseText.value = "Loading processor...";
168
+ processor = await AutoProcessor.from_pretrained(modelId);
169
+ responseText.value = "Processor loaded. Loading model...";
170
+ model = await AutoModelForVision2Seq.from_pretrained(modelId, {
171
+ dtype: {
172
+ embed_tokens: "fp16",
173
+ vision_encoder: "q4",
174
+ decoder_model_merged: "q4",
175
+ },
176
+ device: "webgpu",
177
+ });
178
+ responseText.value = "Model loaded. Initializing camera...";
179
+ loadingOverlay.style.display = "none";
180
+ }
181
+
182
+ async function initCamera() {
183
+ try {
184
+ stream = await navigator.mediaDevices.getUserMedia({
185
+ video: true,
186
+ audio: false,
187
+ });
188
+ video.srcObject = stream;
189
+ responseText.value = "Camera access granted. Ready to start.";
190
+ } catch (err) {
191
+ console.error("Error accessing camera:", err);
192
+ responseText.value = `Error accessing camera: ${err.name} - ${err.message}. Please ensure permissions are granted and you are on HTTPS or localhost.`;
193
+ alert(
194
+ `Error accessing camera: ${err.name}. Make sure you've granted permission and are on HTTPS or localhost.`
195
+ );
196
+ }
197
+ }
198
+
199
+ function captureImage() {
200
+ if (!stream || !video.videoWidth) {
201
+ console.warn("Video stream not ready for capture.");
202
+ return null;
203
+ }
204
+ canvas.width = video.videoWidth;
205
+ canvas.height = video.videoHeight;
206
+ const context = canvas.getContext("2d", { willReadFrequently: true });
207
+ context.drawImage(video, 0, 0, canvas.width, canvas.height);
208
+ const frame = context.getImageData(0, 0, canvas.width, canvas.height);
209
+ return new RawImage(frame.data, frame.width, frame.height, 4);
210
+ }
211
+
212
+ async function runLocalVisionInference(imgElement, instruction) {
213
+ const messages = [
214
+ {
215
+ role: "user",
216
+ content: [{ type: "image" }, { type: "text", text: instruction }],
217
+ },
218
+ ];
219
+ const text = processor.apply_chat_template(messages, {
220
+ add_generation_prompt: true,
221
+ });
222
+ const inputs = await processor(text, [imgElement], {
223
+ do_image_splitting: false,
224
+ });
225
+ const generatedIds = await model.generate({
226
+ ...inputs,
227
+ max_new_tokens: 100,
228
+ });
229
+ const output = processor.batch_decode(
230
+ generatedIds.slice(null, [inputs.input_ids.dims.at(-1), null]),
231
+ { skip_special_tokens: true }
232
+ );
233
+ return output[0].trim();
234
+ }
235
+
236
+ async function sendData() {
237
+ if (!isProcessing) return;
238
+ const instruction = instructionText.value;
239
+ const rawImg = captureImage();
240
+ if (!rawImg) {
241
+ responseText.value = "Capture failed";
242
+ return;
243
+ }
244
+ try {
245
+ const reply = await runLocalVisionInference(rawImg, instruction);
246
+ responseText.value = reply;
247
+ } catch (e) {
248
+ console.error(e);
249
+ responseText.value = `Error: ${e.message}`;
250
+ }
251
+ }
252
+
253
+ function sleep(ms) {
254
+ return new Promise((resolve) => setTimeout(resolve, ms));
255
+ }
256
+
257
+ async function processingLoop() {
258
+ const intervalMs = parseInt(intervalSelect.value, 10);
259
+ while (isProcessing) {
260
+ await sendData();
261
+ if (!isProcessing) break;
262
+ await sleep(intervalMs);
263
+ }
264
+ }
265
+
266
+ function handleStart() {
267
+ if (!stream) {
268
+ responseText.value = "Camera not available. Cannot start.";
269
+ alert("Camera not available. Please grant permission first.");
270
+ return;
271
+ }
272
+ isProcessing = true;
273
+ startButton.textContent = "Stop";
274
+ startButton.classList.replace("start", "stop");
275
+
276
+ instructionText.disabled = true;
277
+ intervalSelect.disabled = true;
278
+
279
+ responseText.value = "Processing started...";
280
+
281
+ processingLoop();
282
+ }
283
+
284
+ function handleStop() {
285
+ isProcessing = false;
286
+ startButton.textContent = "Start";
287
+ startButton.classList.replace("stop", "start");
288
+
289
+ instructionText.disabled = false;
290
+ intervalSelect.disabled = false;
291
+ if (responseText.value.startsWith("Processing started...")) {
292
+ responseText.value = "Processing stopped.";
293
+ }
294
+ }
295
+
296
+ startButton.addEventListener("click", () => {
297
+ if (isProcessing) {
298
+ handleStop();
299
+ } else {
300
+ handleStart();
301
+ }
302
+ });
303
+
304
+ window.addEventListener("DOMContentLoaded", async () => {
305
+ // Check for WebGPU support
306
+ if (!navigator.gpu) {
307
+ const videoElement = document.getElementById("videoFeed");
308
+ const warningElement = document.createElement("p");
309
+ warningElement.textContent =
310
+ "WebGPU is not available in this browser.";
311
+ warningElement.style.color = "red";
312
+ warningElement.style.textAlign = "center";
313
+ videoElement.parentNode.insertBefore(
314
+ warningElement,
315
+ videoElement.nextSibling
316
+ );
317
+ }
318
+
319
+ await initModel();
320
+ await initCamera();
321
+ });
322
+
323
+ window.addEventListener("beforeunload", () => {
324
+ if (stream) {
325
+ stream.getTracks().forEach((track) => track.stop());
326
+ }
327
+ });
328
+ </script>
329
+ </body>
330
+ </html>