Gijs Wijngaard commited on
Commit
57cdedc
·
1 Parent(s): bf043e5
Files changed (1) hide show
  1. app.py +3 -4
app.py CHANGED
@@ -24,7 +24,7 @@ def _load_audio_mono_16k(file_path: str) -> torch.Tensor:
24
  return wav
25
 
26
  @spaces.GPU
27
- def process_audio(audio_path: str, instruction: str, max_tokens: int = 200) -> str:
28
  if not audio_path:
29
  return "Please upload an audio file."
30
 
@@ -47,7 +47,7 @@ def process_audio(audio_path: str, instruction: str, max_tokens: int = 200) -> s
47
  model_inputs = processor(prompt, wav, device=device, return_tensors="pt").to(device)
48
  outputs = model.generate(
49
  **model_inputs,
50
- max_new_tokens=int(max_tokens),
51
  do_sample=False,
52
  num_beams=1,
53
  )
@@ -69,12 +69,11 @@ with gr.Blocks(title="Granite Speech Demo") as demo:
69
  label="Instruction",
70
  value="can you transcribe the speech into a written format?",
71
  )
72
- max_tokens = gr.Slider(50, 1000, value=200, step=50, label="Max Output Tokens")
73
  submit_btn = gr.Button("Transcribe", variant="primary")
74
  with gr.Column():
75
  output_text = gr.Textbox(label="Output", lines=12)
76
 
77
- submit_btn.click(process_audio, [audio_input, instruction, max_tokens], output_text)
78
 
79
 
80
  if __name__ == "__main__":
 
24
  return wav
25
 
26
  @spaces.GPU
27
+ def process_audio(audio_path: str, instruction: str) -> str:
28
  if not audio_path:
29
  return "Please upload an audio file."
30
 
 
47
  model_inputs = processor(prompt, wav, device=device, return_tensors="pt").to(device)
48
  outputs = model.generate(
49
  **model_inputs,
50
+ max_new_tokens=4096,
51
  do_sample=False,
52
  num_beams=1,
53
  )
 
69
  label="Instruction",
70
  value="can you transcribe the speech into a written format?",
71
  )
 
72
  submit_btn = gr.Button("Transcribe", variant="primary")
73
  with gr.Column():
74
  output_text = gr.Textbox(label="Output", lines=12)
75
 
76
+ submit_btn.click(process_audio, [audio_input, instruction], output_text)
77
 
78
 
79
  if __name__ == "__main__":