ACloudCenter commited on
Commit
ec2f83b
·
verified ·
1 Parent(s): 1f6640c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -2
app.py CHANGED
@@ -15,6 +15,11 @@ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
15
  model = MoonshineForConditionalGeneration.from_pretrained('UsefulSensors/moonshine-tiny').to(device).to(torch_dtype)
16
  processor = AutoProcessor.from_pretrained('UsefulSensors/moonshine-tiny')
17
 
 
 
 
 
 
18
  # Define transcription function using HF Zero GPU
19
  @spaces.GPU
20
  def transcribe_audio(audio_file):
@@ -38,9 +43,12 @@ def transcribe_audio(audio_file):
38
  return_tensors="pt"
39
  ).to(device, torch_dtype)
40
 
41
- # Duration-based max_new_tokens calculation
42
  duration_sec = len(audio_array) / float(target_sr)
43
- max_new_tokens = max(24, int(math.ceil(duration_sec * 7.0)))
 
 
 
44
 
45
  # Generate transcription with adjusted max_new_tokens
46
  generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=max_new_tokens)
 
15
  model = MoonshineForConditionalGeneration.from_pretrained('UsefulSensors/moonshine-tiny').to(device).to(torch_dtype)
16
  processor = AutoProcessor.from_pretrained('UsefulSensors/moonshine-tiny')
17
 
18
+ # --- Longer token limits (simple) ---
19
+ TOKENS_PER_SEC = 12.0 # was ~7.0 before
20
+ MIN_NEW_TOKENS = 48 # was 24; gives short clips more room
21
+ MAX_NEW_TOKENS_CAP = 3200 # generous cap to avoid runaway
22
+
23
  # Define transcription function using HF Zero GPU
24
  @spaces.GPU
25
  def transcribe_audio(audio_file):
 
43
  return_tensors="pt"
44
  ).to(device, torch_dtype)
45
 
46
+ # Duration-based max_new_tokens calculation (longer limits)
47
  duration_sec = len(audio_array) / float(target_sr)
48
+ max_new_tokens = min(
49
+ MAX_NEW_TOKENS_CAP,
50
+ max(MIN_NEW_TOKENS, int(math.ceil(duration_sec * TOKENS_PER_SEC)))
51
+ )
52
 
53
  # Generate transcription with adjusted max_new_tokens
54
  generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=max_new_tokens)