tenet commited on
Commit
72165a4
·
verified ·
1 Parent(s): 4dcf554

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -6
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
-
4
  # ----------------
5
  # TEXT MODELS
6
  # ----------------
@@ -42,9 +42,11 @@ segmentation_pipeline = pipeline(
42
 
43
  def segment_image(image):
44
  results = segmentation_pipeline(image)
45
- # Gradio AnnotatedImage expects (image, annotations)
46
- ann = [(image, r["mask"]) for r in results]
47
- return (image, ann)
 
 
48
 
49
 
50
  # ----------------
@@ -52,10 +54,13 @@ def segment_image(image):
52
  # ----------------
53
  asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
54
 
55
- def transcribe(audio):
56
- return asr_pipeline(audio)["text"]
57
 
58
 
 
 
 
 
 
59
  # ----------------
60
  # GRADIO APP
61
  # ----------------
 
1
  import gradio as gr
2
  from transformers import pipeline
3
+ import librosa
4
  # ----------------
5
  # TEXT MODELS
6
  # ----------------
 
42
 
43
  def segment_image(image):
44
  results = segmentation_pipeline(image)
45
+ # Combine masks into a single image with labels
46
+ annotated = {}
47
+ for r in results:
48
+ annotated[r["label"]] = r["mask"] # label → mask
49
+ return (image, annotated)
50
 
51
 
52
  # ----------------
 
54
  # ----------------
55
  asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
56
 
 
 
57
 
58
 
59
+ def transcribe(audio):
60
+ # Load with max 30s duration
61
+ speech, sr = librosa.load(audio, sr=16000, duration=30)
62
+ return asr_pipeline({"array": speech, "sampling_rate": sr}, return_timestamps=True)["text"]
63
+
64
  # ----------------
65
  # GRADIO APP
66
  # ----------------