Spaces:
Running
Running
add quantized models support (#4)
Browse files- add quantized models support (e5f217ff864b7c048b430487ac42ad6e0ca23095)
Co-authored-by: Radamés Ajna <[email protected]>
- build/m.d.ts +9 -2
- build/m.js +72 -9
- build/m_bg.wasm +2 -2
- build/m_bg.wasm.d.ts +2 -1
- index.html +59 -40
- whisperWorker.js +57 -14
build/m.d.ts
CHANGED
|
@@ -8,8 +8,14 @@ export class Decoder {
|
|
| 8 |
* @param {Uint8Array} weights
|
| 9 |
* @param {Uint8Array} tokenizer
|
| 10 |
* @param {Uint8Array} mel_filters
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
*/
|
| 12 |
-
constructor(weights: Uint8Array, tokenizer: Uint8Array, mel_filters: Uint8Array);
|
| 13 |
/**
|
| 14 |
* @param {Uint8Array} wav_input
|
| 15 |
* @returns {string}
|
|
@@ -22,11 +28,12 @@ export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembl
|
|
| 22 |
export interface InitOutput {
|
| 23 |
readonly memory: WebAssembly.Memory;
|
| 24 |
readonly __wbg_decoder_free: (a: number) => void;
|
| 25 |
-
readonly decoder_new: (a: number, b: number, c: number, d: number, e: number, f: number, g: number) => void;
|
| 26 |
readonly decoder_decode: (a: number, b: number, c: number, d: number) => void;
|
| 27 |
readonly main: (a: number, b: number) => number;
|
| 28 |
readonly __wbindgen_add_to_stack_pointer: (a: number) => number;
|
| 29 |
readonly __wbindgen_malloc: (a: number, b: number) => number;
|
|
|
|
| 30 |
readonly __wbindgen_free: (a: number, b: number, c: number) => void;
|
| 31 |
readonly __wbindgen_start: () => void;
|
| 32 |
}
|
|
|
|
| 8 |
* @param {Uint8Array} weights
|
| 9 |
* @param {Uint8Array} tokenizer
|
| 10 |
* @param {Uint8Array} mel_filters
|
| 11 |
+
* @param {Uint8Array} config
|
| 12 |
+
* @param {boolean} quantized
|
| 13 |
+
* @param {boolean} is_multilingual
|
| 14 |
+
* @param {boolean} timestamps
|
| 15 |
+
* @param {string | undefined} task
|
| 16 |
+
* @param {string | undefined} language
|
| 17 |
*/
|
| 18 |
+
constructor(weights: Uint8Array, tokenizer: Uint8Array, mel_filters: Uint8Array, config: Uint8Array, quantized: boolean, is_multilingual: boolean, timestamps: boolean, task?: string, language?: string);
|
| 19 |
/**
|
| 20 |
* @param {Uint8Array} wav_input
|
| 21 |
* @returns {string}
|
|
|
|
| 28 |
export interface InitOutput {
|
| 29 |
readonly memory: WebAssembly.Memory;
|
| 30 |
readonly __wbg_decoder_free: (a: number) => void;
|
| 31 |
+
readonly decoder_new: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number, i: number, j: number, k: number, l: number, m: number, n: number, o: number, p: number) => void;
|
| 32 |
readonly decoder_decode: (a: number, b: number, c: number, d: number) => void;
|
| 33 |
readonly main: (a: number, b: number) => number;
|
| 34 |
readonly __wbindgen_add_to_stack_pointer: (a: number) => number;
|
| 35 |
readonly __wbindgen_malloc: (a: number, b: number) => number;
|
| 36 |
+
readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
|
| 37 |
readonly __wbindgen_free: (a: number, b: number, c: number) => void;
|
| 38 |
readonly __wbindgen_start: () => void;
|
| 39 |
}
|
build/m.js
CHANGED
|
@@ -42,6 +42,63 @@ function passArray8ToWasm0(arg, malloc) {
|
|
| 42 |
return ptr;
|
| 43 |
}
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
let cachedInt32Memory0 = null;
|
| 46 |
|
| 47 |
function getInt32Memory0() {
|
|
@@ -91,8 +148,14 @@ export class Decoder {
|
|
| 91 |
* @param {Uint8Array} weights
|
| 92 |
* @param {Uint8Array} tokenizer
|
| 93 |
* @param {Uint8Array} mel_filters
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
*/
|
| 95 |
-
constructor(weights, tokenizer, mel_filters) {
|
| 96 |
try {
|
| 97 |
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
| 98 |
const ptr0 = passArray8ToWasm0(weights, wasm.__wbindgen_malloc);
|
|
@@ -101,7 +164,13 @@ export class Decoder {
|
|
| 101 |
const len1 = WASM_VECTOR_LEN;
|
| 102 |
const ptr2 = passArray8ToWasm0(mel_filters, wasm.__wbindgen_malloc);
|
| 103 |
const len2 = WASM_VECTOR_LEN;
|
| 104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
var r0 = getInt32Memory0()[retptr / 4 + 0];
|
| 106 |
var r1 = getInt32Memory0()[retptr / 4 + 1];
|
| 107 |
var r2 = getInt32Memory0()[retptr / 4 + 2];
|
|
@@ -183,15 +252,9 @@ function __wbg_get_imports() {
|
|
| 183 |
const ret = new Error(getStringFromWasm0(arg0, arg1));
|
| 184 |
return addHeapObject(ret);
|
| 185 |
};
|
| 186 |
-
imports.wbg.
|
| 187 |
console.log(getStringFromWasm0(arg0, arg1));
|
| 188 |
};
|
| 189 |
-
imports.wbg.__wbg_time_fa135a7c2786e907 = function(arg0, arg1) {
|
| 190 |
-
console.time(getStringFromWasm0(arg0, arg1));
|
| 191 |
-
};
|
| 192 |
-
imports.wbg.__wbg_timeEnd_594d82f147c9776f = function(arg0, arg1) {
|
| 193 |
-
console.timeEnd(getStringFromWasm0(arg0, arg1));
|
| 194 |
-
};
|
| 195 |
imports.wbg.__wbindgen_throw = function(arg0, arg1) {
|
| 196 |
throw new Error(getStringFromWasm0(arg0, arg1));
|
| 197 |
};
|
|
|
|
| 42 |
return ptr;
|
| 43 |
}
|
| 44 |
|
| 45 |
+
const cachedTextEncoder = (typeof TextEncoder !== 'undefined' ? new TextEncoder('utf-8') : { encode: () => { throw Error('TextEncoder not available') } } );
|
| 46 |
+
|
| 47 |
+
const encodeString = (typeof cachedTextEncoder.encodeInto === 'function'
|
| 48 |
+
? function (arg, view) {
|
| 49 |
+
return cachedTextEncoder.encodeInto(arg, view);
|
| 50 |
+
}
|
| 51 |
+
: function (arg, view) {
|
| 52 |
+
const buf = cachedTextEncoder.encode(arg);
|
| 53 |
+
view.set(buf);
|
| 54 |
+
return {
|
| 55 |
+
read: arg.length,
|
| 56 |
+
written: buf.length
|
| 57 |
+
};
|
| 58 |
+
});
|
| 59 |
+
|
| 60 |
+
function passStringToWasm0(arg, malloc, realloc) {
|
| 61 |
+
|
| 62 |
+
if (realloc === undefined) {
|
| 63 |
+
const buf = cachedTextEncoder.encode(arg);
|
| 64 |
+
const ptr = malloc(buf.length, 1) >>> 0;
|
| 65 |
+
getUint8Memory0().subarray(ptr, ptr + buf.length).set(buf);
|
| 66 |
+
WASM_VECTOR_LEN = buf.length;
|
| 67 |
+
return ptr;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
let len = arg.length;
|
| 71 |
+
let ptr = malloc(len, 1) >>> 0;
|
| 72 |
+
|
| 73 |
+
const mem = getUint8Memory0();
|
| 74 |
+
|
| 75 |
+
let offset = 0;
|
| 76 |
+
|
| 77 |
+
for (; offset < len; offset++) {
|
| 78 |
+
const code = arg.charCodeAt(offset);
|
| 79 |
+
if (code > 0x7F) break;
|
| 80 |
+
mem[ptr + offset] = code;
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
if (offset !== len) {
|
| 84 |
+
if (offset !== 0) {
|
| 85 |
+
arg = arg.slice(offset);
|
| 86 |
+
}
|
| 87 |
+
ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0;
|
| 88 |
+
const view = getUint8Memory0().subarray(ptr + offset, ptr + len);
|
| 89 |
+
const ret = encodeString(arg, view);
|
| 90 |
+
|
| 91 |
+
offset += ret.written;
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
WASM_VECTOR_LEN = offset;
|
| 95 |
+
return ptr;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
function isLikeNone(x) {
|
| 99 |
+
return x === undefined || x === null;
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
let cachedInt32Memory0 = null;
|
| 103 |
|
| 104 |
function getInt32Memory0() {
|
|
|
|
| 148 |
* @param {Uint8Array} weights
|
| 149 |
* @param {Uint8Array} tokenizer
|
| 150 |
* @param {Uint8Array} mel_filters
|
| 151 |
+
* @param {Uint8Array} config
|
| 152 |
+
* @param {boolean} quantized
|
| 153 |
+
* @param {boolean} is_multilingual
|
| 154 |
+
* @param {boolean} timestamps
|
| 155 |
+
* @param {string | undefined} task
|
| 156 |
+
* @param {string | undefined} language
|
| 157 |
*/
|
| 158 |
+
constructor(weights, tokenizer, mel_filters, config, quantized, is_multilingual, timestamps, task, language) {
|
| 159 |
try {
|
| 160 |
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
| 161 |
const ptr0 = passArray8ToWasm0(weights, wasm.__wbindgen_malloc);
|
|
|
|
| 164 |
const len1 = WASM_VECTOR_LEN;
|
| 165 |
const ptr2 = passArray8ToWasm0(mel_filters, wasm.__wbindgen_malloc);
|
| 166 |
const len2 = WASM_VECTOR_LEN;
|
| 167 |
+
const ptr3 = passArray8ToWasm0(config, wasm.__wbindgen_malloc);
|
| 168 |
+
const len3 = WASM_VECTOR_LEN;
|
| 169 |
+
var ptr4 = isLikeNone(task) ? 0 : passStringToWasm0(task, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
| 170 |
+
var len4 = WASM_VECTOR_LEN;
|
| 171 |
+
var ptr5 = isLikeNone(language) ? 0 : passStringToWasm0(language, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
| 172 |
+
var len5 = WASM_VECTOR_LEN;
|
| 173 |
+
wasm.decoder_new(retptr, ptr0, len0, ptr1, len1, ptr2, len2, ptr3, len3, quantized, is_multilingual, timestamps, ptr4, len4, ptr5, len5);
|
| 174 |
var r0 = getInt32Memory0()[retptr / 4 + 0];
|
| 175 |
var r1 = getInt32Memory0()[retptr / 4 + 1];
|
| 176 |
var r2 = getInt32Memory0()[retptr / 4 + 2];
|
|
|
|
| 252 |
const ret = new Error(getStringFromWasm0(arg0, arg1));
|
| 253 |
return addHeapObject(ret);
|
| 254 |
};
|
| 255 |
+
imports.wbg.__wbg_log_0d9af0379e7a06b8 = function(arg0, arg1) {
|
| 256 |
console.log(getStringFromWasm0(arg0, arg1));
|
| 257 |
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
imports.wbg.__wbindgen_throw = function(arg0, arg1) {
|
| 259 |
throw new Error(getStringFromWasm0(arg0, arg1));
|
| 260 |
};
|
build/m_bg.wasm
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:393c1add1a180c1f0403cf5bb26db587ec59d19bec0c756b613f89b5e12fa512
|
| 3 |
+
size 4070269
|
build/m_bg.wasm.d.ts
CHANGED
|
@@ -2,10 +2,11 @@
|
|
| 2 |
/* eslint-disable */
|
| 3 |
export const memory: WebAssembly.Memory;
|
| 4 |
export function __wbg_decoder_free(a: number): void;
|
| 5 |
-
export function decoder_new(a: number, b: number, c: number, d: number, e: number, f: number, g: number): void;
|
| 6 |
export function decoder_decode(a: number, b: number, c: number, d: number): void;
|
| 7 |
export function main(a: number, b: number): number;
|
| 8 |
export function __wbindgen_add_to_stack_pointer(a: number): number;
|
| 9 |
export function __wbindgen_malloc(a: number, b: number): number;
|
|
|
|
| 10 |
export function __wbindgen_free(a: number, b: number, c: number): void;
|
| 11 |
export function __wbindgen_start(): void;
|
|
|
|
| 2 |
/* eslint-disable */
|
| 3 |
export const memory: WebAssembly.Memory;
|
| 4 |
export function __wbg_decoder_free(a: number): void;
|
| 5 |
+
export function decoder_new(a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number, i: number, j: number, k: number, l: number, m: number, n: number, o: number, p: number): void;
|
| 6 |
export function decoder_decode(a: number, b: number, c: number, d: number): void;
|
| 7 |
export function main(a: number, b: number): number;
|
| 8 |
export function __wbindgen_add_to_stack_pointer(a: number): number;
|
| 9 |
export function __wbindgen_malloc(a: number, b: number): number;
|
| 10 |
+
export function __wbindgen_realloc(a: number, b: number, c: number, d: number): number;
|
| 11 |
export function __wbindgen_free(a: number, b: number, c: number): void;
|
| 12 |
export function __wbindgen_start(): void;
|
index.html
CHANGED
|
@@ -6,7 +6,7 @@
|
|
| 6 |
<body></body>
|
| 7 |
</html>
|
| 8 |
|
| 9 |
-
<!
|
| 10 |
<html>
|
| 11 |
<head>
|
| 12 |
<meta charset="UTF-8" />
|
|
@@ -26,9 +26,30 @@
|
|
| 26 |
|
| 27 |
// models base url
|
| 28 |
const MODELS = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
tiny_en: {
|
| 30 |
base_url:
|
| 31 |
-
"https://huggingface.co/openai/whisper-tiny.en/resolve/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
},
|
| 33 |
};
|
| 34 |
const whisperWorker = new Worker("./whisperWorker.js", {
|
|
@@ -39,6 +60,7 @@
|
|
| 39 |
weightsURL, // URL to the weights file
|
| 40 |
modelID, // model ID
|
| 41 |
tokenizerURL, // URL to the tokenizer file
|
|
|
|
| 42 |
mel_filtersURL, // URL to the mel filters file
|
| 43 |
audioURL, // URL to the audio file
|
| 44 |
updateStatus // function to update the status
|
|
@@ -48,21 +70,25 @@
|
|
| 48 |
weightsURL,
|
| 49 |
modelID,
|
| 50 |
tokenizerURL,
|
|
|
|
| 51 |
mel_filtersURL,
|
| 52 |
audioURL,
|
| 53 |
});
|
| 54 |
-
|
| 55 |
console.log(event.data);
|
| 56 |
if ("status" in event.data) {
|
| 57 |
updateStatus(event.data);
|
| 58 |
}
|
| 59 |
if ("error" in event.data) {
|
|
|
|
| 60 |
reject(new Error(event.data.error));
|
| 61 |
}
|
| 62 |
if (event.data.status === "complete") {
|
|
|
|
| 63 |
resolve(event.data);
|
| 64 |
}
|
| 65 |
-
}
|
|
|
|
| 66 |
});
|
| 67 |
}
|
| 68 |
|
|
@@ -125,13 +151,16 @@
|
|
| 125 |
return;
|
| 126 |
}
|
| 127 |
const modelID = document.querySelector("#model").value;
|
| 128 |
-
const
|
| 129 |
-
const
|
|
|
|
|
|
|
| 130 |
|
| 131 |
classifyAudio(
|
| 132 |
modelURL,
|
| 133 |
modelID,
|
| 134 |
tokenizerURL,
|
|
|
|
| 135 |
"mel_filters.safetensors",
|
| 136 |
audioURL,
|
| 137 |
updateStatus
|
|
@@ -175,8 +204,7 @@
|
|
| 175 |
<a
|
| 176 |
href="https://huggingface.co/openai/"
|
| 177 |
target="_blank"
|
| 178 |
-
class="underline hover:text-blue-500 hover:no-underline"
|
| 179 |
-
>
|
| 180 |
OpenAI Whisper models
|
| 181 |
</a>
|
| 182 |
and WASM runtime built with
|
|
@@ -193,37 +221,38 @@
|
|
| 193 |
<label for="model" class="font-medium">Models Options: </label>
|
| 194 |
<select
|
| 195 |
id="model"
|
| 196 |
-
class="border-2 border-gray-500 rounded-md font-light"
|
| 197 |
-
|
| 198 |
<option value="tiny_en" selected>tiny.en (151 MB)</option>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
</select>
|
| 200 |
</div>
|
| 201 |
<!-- drag and drop area -->
|
| 202 |
<div class="relative">
|
| 203 |
<div
|
| 204 |
id="drop-area"
|
| 205 |
-
class="flex flex-col items-center justify-center border-2 border-gray-300 border-dashed rounded-xl relative h-48 w-full overflow-hidden"
|
| 206 |
-
>
|
| 207 |
<div
|
| 208 |
-
class="flex flex-col items-center justify-center space-y-1 text-center"
|
| 209 |
-
>
|
| 210 |
<svg
|
| 211 |
width="25"
|
| 212 |
height="25"
|
| 213 |
viewBox="0 0 25 25"
|
| 214 |
fill="none"
|
| 215 |
-
xmlns="http://www.w3.org/2000/svg"
|
| 216 |
-
>
|
| 217 |
<path
|
| 218 |
d="M3.5 24.3a3 3 0 0 1-1.9-.8c-.5-.5-.8-1.2-.8-1.9V2.9c0-.7.3-1.3.8-1.9.6-.5 1.2-.7 2-.7h18.6c.7 0 1.3.2 1.9.7.5.6.7 1.2.7 2v18.6c0 .7-.2 1.4-.7 1.9a3 3 0 0 1-2 .8H3.6Zm0-2.7h18.7V2.9H3.5v18.7Zm2.7-2.7h13.3c.3 0 .5 0 .6-.3v-.7l-3.7-5a.6.6 0 0 0-.6-.2c-.2 0-.4 0-.5.3l-3.5 4.6-2.4-3.3a.6.6 0 0 0-.6-.3c-.2 0-.4.1-.5.3l-2.7 3.6c-.1.2-.2.4 0 .7.1.2.3.3.6.3Z"
|
| 219 |
-
fill="#000"
|
| 220 |
-
/>
|
| 221 |
</svg>
|
| 222 |
<div class="flex text-sm text-gray-600">
|
| 223 |
<label
|
| 224 |
for="file-upload"
|
| 225 |
-
class="relative cursor-pointer bg-white rounded-md font-medium text-blue-950 hover:text-blue-700"
|
| 226 |
-
>
|
| 227 |
<span>Drag and drop your audio here</span>
|
| 228 |
<span class="block text-xs">or</span>
|
| 229 |
<span class="block text-xs">Click to upload</span>
|
|
@@ -234,15 +263,13 @@
|
|
| 234 |
name="file-upload"
|
| 235 |
type="file"
|
| 236 |
accept="audio/*"
|
| 237 |
-
class="sr-only"
|
| 238 |
-
/>
|
| 239 |
</div>
|
| 240 |
<audio
|
| 241 |
id="audio"
|
| 242 |
hidden
|
| 243 |
controls
|
| 244 |
-
class="w-full p-2 select-none"
|
| 245 |
-
></audio>
|
| 246 |
</div>
|
| 247 |
</div>
|
| 248 |
<div>
|
|
@@ -250,43 +277,37 @@
|
|
| 250 |
<h3 class="font-medium">Examples:</h3>
|
| 251 |
<button
|
| 252 |
data-value="samples_jfk.wav"
|
| 253 |
-
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline"
|
| 254 |
-
>
|
| 255 |
<span>jfk.wav</span>
|
| 256 |
<span class="text-xs block"> (352 kB)</span>
|
| 257 |
</button>
|
| 258 |
<button
|
| 259 |
data-value="samples_a13.wav"
|
| 260 |
-
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline"
|
| 261 |
-
>
|
| 262 |
<span>a13.wav</span>
|
| 263 |
<span class="text-xs block"> (960 kB)</span>
|
| 264 |
</button>
|
| 265 |
<button
|
| 266 |
data-value="samples_mm0.wav"
|
| 267 |
-
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline"
|
| 268 |
-
>
|
| 269 |
<span>mm0.wav</span>
|
| 270 |
<span class="text-xs block new"> (957 kB)</span>
|
| 271 |
</button>
|
| 272 |
<button
|
| 273 |
data-value="samples_gb0.wav"
|
| 274 |
-
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline"
|
| 275 |
-
>
|
| 276 |
<span>gb0.wav </span>
|
| 277 |
<span class="text-xs block">(4.08 MB)</span>
|
| 278 |
</button>
|
| 279 |
<button
|
| 280 |
data-value="samples_gb1.wav"
|
| 281 |
-
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline"
|
| 282 |
-
>
|
| 283 |
<span>gb1.wav </span>
|
| 284 |
<span class="text-xs block">(6.36 MB)</span>
|
| 285 |
</button>
|
| 286 |
<button
|
| 287 |
data-value="samples_hp0.wav"
|
| 288 |
-
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline"
|
| 289 |
-
>
|
| 290 |
<span>hp0.wav </span>
|
| 291 |
<span class="text-xs block">(8.75 MB)</span>
|
| 292 |
</button>
|
|
@@ -297,16 +318,14 @@
|
|
| 297 |
<button
|
| 298 |
id="detect"
|
| 299 |
disabled
|
| 300 |
-
class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-2 px-4 rounded disabled:bg-gray-300 disabled:cursor-not-allowed"
|
| 301 |
-
>
|
| 302 |
Transcribe Audio
|
| 303 |
</button>
|
| 304 |
</div>
|
| 305 |
<div>
|
| 306 |
<h3 class="font-medium">Transcription:</h3>
|
| 307 |
<div
|
| 308 |
-
class="min-h-[250px] bg-slate-100 text-gray-500 p-4 rounded-md flex flex-col gap-2"
|
| 309 |
-
>
|
| 310 |
<p hidden id="output-generation" class="grid-rows-2"></p>
|
| 311 |
<span id="output-status" class="m-auto font-light"
|
| 312 |
>No transcription results yet</span
|
|
|
|
| 6 |
<body></body>
|
| 7 |
</html>
|
| 8 |
|
| 9 |
+
<!DOCTYPE html>
|
| 10 |
<html>
|
| 11 |
<head>
|
| 12 |
<meta charset="UTF-8" />
|
|
|
|
| 26 |
|
| 27 |
// models base url
|
| 28 |
const MODELS = {
|
| 29 |
+
tiny_multilingual: {
|
| 30 |
+
base_url: "https://huggingface.co/openai/whisper-tiny/resolve/main/",
|
| 31 |
+
model: "model.safetensors",
|
| 32 |
+
tokenizer: "tokenizer.json",
|
| 33 |
+
config: "config.json",
|
| 34 |
+
},
|
| 35 |
tiny_en: {
|
| 36 |
base_url:
|
| 37 |
+
"https://huggingface.co/openai/whisper-tiny.en/resolve/main/",
|
| 38 |
+
model: "model.safetensors",
|
| 39 |
+
tokenizer: "tokenizer.json",
|
| 40 |
+
config: "config.json",
|
| 41 |
+
},
|
| 42 |
+
tiny_quantized_multilingual_q80: {
|
| 43 |
+
base_url: "https://huggingface.co/lmz/candle-whisper/resolve/main/",
|
| 44 |
+
model: "model-tiny-q80.gguf",
|
| 45 |
+
tokenizer: "tokenizer-tiny.json",
|
| 46 |
+
config: "config-tiny.json",
|
| 47 |
+
},
|
| 48 |
+
tiny_en_quantized_q80: {
|
| 49 |
+
base_url: "https://huggingface.co/lmz/candle-whisper/resolve/main/",
|
| 50 |
+
model: "model-tiny-q80.gguf",
|
| 51 |
+
tokenizer: "tokenizer-tiny-en.json",
|
| 52 |
+
config: "config-tiny-en.json",
|
| 53 |
},
|
| 54 |
};
|
| 55 |
const whisperWorker = new Worker("./whisperWorker.js", {
|
|
|
|
| 60 |
weightsURL, // URL to the weights file
|
| 61 |
modelID, // model ID
|
| 62 |
tokenizerURL, // URL to the tokenizer file
|
| 63 |
+
configURL, // model config URL
|
| 64 |
mel_filtersURL, // URL to the mel filters file
|
| 65 |
audioURL, // URL to the audio file
|
| 66 |
updateStatus // function to update the status
|
|
|
|
| 70 |
weightsURL,
|
| 71 |
modelID,
|
| 72 |
tokenizerURL,
|
| 73 |
+
configURL,
|
| 74 |
mel_filtersURL,
|
| 75 |
audioURL,
|
| 76 |
});
|
| 77 |
+
function messageHandler(event) {
|
| 78 |
console.log(event.data);
|
| 79 |
if ("status" in event.data) {
|
| 80 |
updateStatus(event.data);
|
| 81 |
}
|
| 82 |
if ("error" in event.data) {
|
| 83 |
+
whisperWorker.removeEventListener("message", messageHandler);
|
| 84 |
reject(new Error(event.data.error));
|
| 85 |
}
|
| 86 |
if (event.data.status === "complete") {
|
| 87 |
+
whisperWorker.removeEventListener("message", messageHandler);
|
| 88 |
resolve(event.data);
|
| 89 |
}
|
| 90 |
+
}
|
| 91 |
+
whisperWorker.addEventListener("message", messageHandler);
|
| 92 |
});
|
| 93 |
}
|
| 94 |
|
|
|
|
| 151 |
return;
|
| 152 |
}
|
| 153 |
const modelID = document.querySelector("#model").value;
|
| 154 |
+
const model = MODELS[modelID];
|
| 155 |
+
const modelURL = model.base_url + model.model;
|
| 156 |
+
const tokenizerURL = model.base_url + model.tokenizer;
|
| 157 |
+
const configURL = model.base_url + model.config;
|
| 158 |
|
| 159 |
classifyAudio(
|
| 160 |
modelURL,
|
| 161 |
modelID,
|
| 162 |
tokenizerURL,
|
| 163 |
+
configURL,
|
| 164 |
"mel_filters.safetensors",
|
| 165 |
audioURL,
|
| 166 |
updateStatus
|
|
|
|
| 204 |
<a
|
| 205 |
href="https://huggingface.co/openai/"
|
| 206 |
target="_blank"
|
| 207 |
+
class="underline hover:text-blue-500 hover:no-underline">
|
|
|
|
| 208 |
OpenAI Whisper models
|
| 209 |
</a>
|
| 210 |
and WASM runtime built with
|
|
|
|
| 221 |
<label for="model" class="font-medium">Models Options: </label>
|
| 222 |
<select
|
| 223 |
id="model"
|
| 224 |
+
class="border-2 border-gray-500 rounded-md font-light">
|
| 225 |
+
<option value="tiny_multilingual" selected>tiny.en (151 MB)</option>
|
| 226 |
<option value="tiny_en" selected>tiny.en (151 MB)</option>
|
| 227 |
+
<option value="tiny_quantized_multilingual_q80">
|
| 228 |
+
tiny quantized q80 (41.5 MB)
|
| 229 |
+
</option>
|
| 230 |
+
<option value="tiny_en_quantized_q80">
|
| 231 |
+
tiny.en quantized q80 (41.8 MB)
|
| 232 |
+
</option>
|
| 233 |
</select>
|
| 234 |
</div>
|
| 235 |
<!-- drag and drop area -->
|
| 236 |
<div class="relative">
|
| 237 |
<div
|
| 238 |
id="drop-area"
|
| 239 |
+
class="flex flex-col items-center justify-center border-2 border-gray-300 border-dashed rounded-xl relative h-48 w-full overflow-hidden">
|
|
|
|
| 240 |
<div
|
| 241 |
+
class="flex flex-col items-center justify-center space-y-1 text-center">
|
|
|
|
| 242 |
<svg
|
| 243 |
width="25"
|
| 244 |
height="25"
|
| 245 |
viewBox="0 0 25 25"
|
| 246 |
fill="none"
|
| 247 |
+
xmlns="http://www.w3.org/2000/svg">
|
|
|
|
| 248 |
<path
|
| 249 |
d="M3.5 24.3a3 3 0 0 1-1.9-.8c-.5-.5-.8-1.2-.8-1.9V2.9c0-.7.3-1.3.8-1.9.6-.5 1.2-.7 2-.7h18.6c.7 0 1.3.2 1.9.7.5.6.7 1.2.7 2v18.6c0 .7-.2 1.4-.7 1.9a3 3 0 0 1-2 .8H3.6Zm0-2.7h18.7V2.9H3.5v18.7Zm2.7-2.7h13.3c.3 0 .5 0 .6-.3v-.7l-3.7-5a.6.6 0 0 0-.6-.2c-.2 0-.4 0-.5.3l-3.5 4.6-2.4-3.3a.6.6 0 0 0-.6-.3c-.2 0-.4.1-.5.3l-2.7 3.6c-.1.2-.2.4 0 .7.1.2.3.3.6.3Z"
|
| 250 |
+
fill="#000" />
|
|
|
|
| 251 |
</svg>
|
| 252 |
<div class="flex text-sm text-gray-600">
|
| 253 |
<label
|
| 254 |
for="file-upload"
|
| 255 |
+
class="relative cursor-pointer bg-white rounded-md font-medium text-blue-950 hover:text-blue-700">
|
|
|
|
| 256 |
<span>Drag and drop your audio here</span>
|
| 257 |
<span class="block text-xs">or</span>
|
| 258 |
<span class="block text-xs">Click to upload</span>
|
|
|
|
| 263 |
name="file-upload"
|
| 264 |
type="file"
|
| 265 |
accept="audio/*"
|
| 266 |
+
class="sr-only" />
|
|
|
|
| 267 |
</div>
|
| 268 |
<audio
|
| 269 |
id="audio"
|
| 270 |
hidden
|
| 271 |
controls
|
| 272 |
+
class="w-full p-2 select-none"></audio>
|
|
|
|
| 273 |
</div>
|
| 274 |
</div>
|
| 275 |
<div>
|
|
|
|
| 277 |
<h3 class="font-medium">Examples:</h3>
|
| 278 |
<button
|
| 279 |
data-value="samples_jfk.wav"
|
| 280 |
+
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline">
|
|
|
|
| 281 |
<span>jfk.wav</span>
|
| 282 |
<span class="text-xs block"> (352 kB)</span>
|
| 283 |
</button>
|
| 284 |
<button
|
| 285 |
data-value="samples_a13.wav"
|
| 286 |
+
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline">
|
|
|
|
| 287 |
<span>a13.wav</span>
|
| 288 |
<span class="text-xs block"> (960 kB)</span>
|
| 289 |
</button>
|
| 290 |
<button
|
| 291 |
data-value="samples_mm0.wav"
|
| 292 |
+
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline">
|
|
|
|
| 293 |
<span>mm0.wav</span>
|
| 294 |
<span class="text-xs block new"> (957 kB)</span>
|
| 295 |
</button>
|
| 296 |
<button
|
| 297 |
data-value="samples_gb0.wav"
|
| 298 |
+
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline">
|
|
|
|
| 299 |
<span>gb0.wav </span>
|
| 300 |
<span class="text-xs block">(4.08 MB)</span>
|
| 301 |
</button>
|
| 302 |
<button
|
| 303 |
data-value="samples_gb1.wav"
|
| 304 |
+
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline">
|
|
|
|
| 305 |
<span>gb1.wav </span>
|
| 306 |
<span class="text-xs block">(6.36 MB)</span>
|
| 307 |
</button>
|
| 308 |
<button
|
| 309 |
data-value="samples_hp0.wav"
|
| 310 |
+
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline">
|
|
|
|
| 311 |
<span>hp0.wav </span>
|
| 312 |
<span class="text-xs block">(8.75 MB)</span>
|
| 313 |
</button>
|
|
|
|
| 318 |
<button
|
| 319 |
id="detect"
|
| 320 |
disabled
|
| 321 |
+
class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-2 px-4 rounded disabled:bg-gray-300 disabled:cursor-not-allowed">
|
|
|
|
| 322 |
Transcribe Audio
|
| 323 |
</button>
|
| 324 |
</div>
|
| 325 |
<div>
|
| 326 |
<h3 class="font-medium">Transcription:</h3>
|
| 327 |
<div
|
| 328 |
+
class="min-h-[250px] bg-slate-100 text-gray-500 p-4 rounded-md flex flex-col gap-2">
|
|
|
|
| 329 |
<p hidden id="output-generation" class="grid-rows-2"></p>
|
| 330 |
<span id="output-status" class="m-auto font-light"
|
| 331 |
>No transcription results yet</span
|
whisperWorker.js
CHANGED
|
@@ -17,23 +17,46 @@ class Whisper {
|
|
| 17 |
static instance = {};
|
| 18 |
// Retrieve the Whisper model. When called for the first time,
|
| 19 |
// this will load the model and save it for future use.
|
| 20 |
-
static async getInstance(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
// load individual modelID only once
|
| 22 |
if (!this.instance[modelID]) {
|
| 23 |
await init();
|
| 24 |
|
| 25 |
self.postMessage({ status: "loading", message: "Loading Model" });
|
| 26 |
-
const [
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
this.instance[modelID] = new Decoder(
|
| 34 |
weightsArrayU8,
|
| 35 |
tokenizerArrayU8,
|
| 36 |
-
mel_filtersArrayU8
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
);
|
| 38 |
} else {
|
| 39 |
self.postMessage({ status: "loading", message: "Model Already Loaded" });
|
|
@@ -43,17 +66,37 @@ class Whisper {
|
|
| 43 |
}
|
| 44 |
|
| 45 |
self.addEventListener("message", async (event) => {
|
| 46 |
-
const {
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
try {
|
| 49 |
self.postMessage({ status: "decoding", message: "Starting Decoder" });
|
| 50 |
-
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
weightsURL,
|
| 53 |
modelID,
|
| 54 |
tokenizerURL,
|
| 55 |
-
mel_filtersURL
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
self.postMessage({ status: "decoding", message: "Loading Audio" });
|
| 59 |
const audioArrayU8 = await fetchArrayBuffer(audioURL);
|
|
|
|
| 17 |
static instance = {};
|
| 18 |
// Retrieve the Whisper model. When called for the first time,
|
| 19 |
// this will load the model and save it for future use.
|
| 20 |
+
static async getInstance(params) {
|
| 21 |
+
const {
|
| 22 |
+
weightsURL,
|
| 23 |
+
modelID,
|
| 24 |
+
tokenizerURL,
|
| 25 |
+
mel_filtersURL,
|
| 26 |
+
configURL,
|
| 27 |
+
quantized,
|
| 28 |
+
is_multilingual,
|
| 29 |
+
timestamps,
|
| 30 |
+
task,
|
| 31 |
+
language,
|
| 32 |
+
} = params;
|
| 33 |
// load individual modelID only once
|
| 34 |
if (!this.instance[modelID]) {
|
| 35 |
await init();
|
| 36 |
|
| 37 |
self.postMessage({ status: "loading", message: "Loading Model" });
|
| 38 |
+
const [
|
| 39 |
+
weightsArrayU8,
|
| 40 |
+
tokenizerArrayU8,
|
| 41 |
+
mel_filtersArrayU8,
|
| 42 |
+
configArrayU8,
|
| 43 |
+
] = await Promise.all([
|
| 44 |
+
fetchArrayBuffer(weightsURL),
|
| 45 |
+
fetchArrayBuffer(tokenizerURL),
|
| 46 |
+
fetchArrayBuffer(mel_filtersURL),
|
| 47 |
+
fetchArrayBuffer(configURL),
|
| 48 |
+
]);
|
| 49 |
|
| 50 |
this.instance[modelID] = new Decoder(
|
| 51 |
weightsArrayU8,
|
| 52 |
tokenizerArrayU8,
|
| 53 |
+
mel_filtersArrayU8,
|
| 54 |
+
configArrayU8,
|
| 55 |
+
quantized,
|
| 56 |
+
is_multilingual,
|
| 57 |
+
timestamps,
|
| 58 |
+
task,
|
| 59 |
+
language
|
| 60 |
);
|
| 61 |
} else {
|
| 62 |
self.postMessage({ status: "loading", message: "Model Already Loaded" });
|
|
|
|
| 66 |
}
|
| 67 |
|
| 68 |
self.addEventListener("message", async (event) => {
|
| 69 |
+
const {
|
| 70 |
+
weightsURL,
|
| 71 |
+
modelID,
|
| 72 |
+
tokenizerURL,
|
| 73 |
+
configURL,
|
| 74 |
+
mel_filtersURL,
|
| 75 |
+
audioURL,
|
| 76 |
+
} = event.data;
|
| 77 |
try {
|
| 78 |
self.postMessage({ status: "decoding", message: "Starting Decoder" });
|
| 79 |
+
let quantized = false;
|
| 80 |
+
if (modelID.includes("quantized")) {
|
| 81 |
+
quantized = true;
|
| 82 |
+
}
|
| 83 |
+
let is_multilingual = false;
|
| 84 |
+
if (modelID.includes("multilingual")) {
|
| 85 |
+
is_multilingual = true;
|
| 86 |
+
}
|
| 87 |
+
let timestamps = true;
|
| 88 |
+
const decoder = await Whisper.getInstance({
|
| 89 |
weightsURL,
|
| 90 |
modelID,
|
| 91 |
tokenizerURL,
|
| 92 |
+
mel_filtersURL,
|
| 93 |
+
configURL,
|
| 94 |
+
quantized,
|
| 95 |
+
is_multilingual,
|
| 96 |
+
timestamps,
|
| 97 |
+
task: null,
|
| 98 |
+
language: null,
|
| 99 |
+
});
|
| 100 |
|
| 101 |
self.postMessage({ status: "decoding", message: "Loading Audio" });
|
| 102 |
const audioArrayU8 = await fetchArrayBuffer(audioURL);
|