Update README.md
Browse files
README.md
CHANGED
|
@@ -208,19 +208,17 @@ The "<|en|>" token is used to specify that the speech is in english and should b
|
|
| 208 |
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
|
| 209 |
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
|
| 210 |
|
| 211 |
-
>>> decoder_input_ids = processor.tokenizer.encode("<|startoftranscript|><|en|><|transcribe|><|notimestamps|>", return_tensors="pt")
|
| 212 |
-
|
| 213 |
>>> # load dummy dataset and read soundfiles
|
| 214 |
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
| 215 |
|
| 216 |
>>> # tokenize
|
| 217 |
>>> input_features = processor(ds[0]["audio"]["array"], return_tensors="pt").input_features
|
| 218 |
>>> # retrieve logits
|
| 219 |
-
>>> logits = model(input_features
|
| 220 |
>>> # take argmax and decode
|
| 221 |
>>> predicted_ids = torch.argmax(logits, dim=-1)
|
| 222 |
>>> transcription = processor.batch_decode(predicted_ids)
|
| 223 |
-
['<|en|><|
|
| 224 |
```
|
| 225 |
|
| 226 |
### French to French
|
|
|
|
| 208 |
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
|
| 209 |
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
|
| 210 |
|
|
|
|
|
|
|
| 211 |
>>> # load dummy dataset and read soundfiles
|
| 212 |
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
| 213 |
|
| 214 |
>>> # tokenize
|
| 215 |
>>> input_features = processor(ds[0]["audio"]["array"], return_tensors="pt").input_features
|
| 216 |
>>> # retrieve logits
|
| 217 |
+
>>> logits = model(input_features).logits
|
| 218 |
>>> # take argmax and decode
|
| 219 |
>>> predicted_ids = torch.argmax(logits, dim=-1)
|
| 220 |
>>> transcription = processor.batch_decode(predicted_ids)
|
| 221 |
+
['<|startoftranscript|><|en|><|notimestamps|> Mr']
|
| 222 |
```
|
| 223 |
|
| 224 |
### French to French
|