aiconq commited on
Commit
848785c
·
verified ·
1 Parent(s): b29f1c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -30
app.py CHANGED
@@ -1,47 +1,64 @@
1
- import sys
2
  import whisper
3
  from transformers import MarianMTModel, MarianTokenizer
4
  from gtts import gTTS
 
5
  import os
6
-
7
  import certifi
 
8
 
9
  os.environ["SSL_CERT_FILE"] = certifi.where()
10
 
11
- def process_audio(input_path, output_path, target_language):
12
- # Model 1: Speech-to-Text using Whisper
13
- model = whisper.load_model("tiny")
14
- result = model.transcribe(input_path)
 
 
 
 
 
15
  text = result["text"]
16
 
17
- # Model 2: Translation (Dynamic model selection based on target_language)
18
- if target_language == 'hi': # Hindi
19
- model_name = "Helsinki-NLP/opus-mt-en-hi"
20
- elif target_language == 'es': # Spanish
21
- model_name = "Helsinki-NLP/opus-mt-en-es"
22
- elif target_language == 'fr': # French
23
- model_name = "Helsinki-NLP/opus-mt-en-fr"
24
- elif target_language == 'bn': # Bengali
25
- model_name = "shhossain/opus-mt-en-to-bn"
26
- else:
27
- raise ValueError(f"Unsupported target language: {target_language}")
28
-
29
- # Load translation model
30
  tokenizer = MarianTokenizer.from_pretrained(model_name)
31
- model = MarianMTModel.from_pretrained(model_name)
32
- inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
33
- outputs = model.generate(**inputs)
34
- translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
35
 
36
- # Model 3: Text-to-Speech using gTTS
37
- tts = gTTS(translation, lang=target_language)
 
38
 
39
- # Save the translated text as an audio file
 
 
40
  tts.save(output_path)
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  if __name__ == "__main__":
44
- input_file = sys.argv[1] # Path to input audio file
45
- output_file = sys.argv[2] # Path to output audio file
46
- target_language = sys.argv[3] # Target language passed from backend
47
- process_audio(input_file, output_file, target_language)
 
 
1
  import whisper
2
  from transformers import MarianMTModel, MarianTokenizer
3
  from gtts import gTTS
4
+ import tempfile
5
  import os
 
6
  import certifi
7
+ import gradio as gr
8
 
9
  os.environ["SSL_CERT_FILE"] = certifi.where()
10
 
11
+ def process_audio(audio_file, target_language):
12
+ # Save audio to temp file
13
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
14
+ tmp.write(audio_file.read())
15
+ tmp_path = tmp.name
16
+
17
+ # 1. Transcribe with Whisper
18
+ whisper_model = whisper.load_model("tiny")
19
+ result = whisper_model.transcribe(tmp_path)
20
  text = result["text"]
21
 
22
+ # 2. Translate text
23
+ lang_map = {
24
+ 'hi': "Helsinki-NLP/opus-mt-en-hi",
25
+ 'es': "Helsinki-NLP/opus-mt-en-es",
26
+ 'fr': "Helsinki-NLP/opus-mt-en-fr",
27
+ 'bn': "shhossain/opus-mt-en-to-bn"
28
+ }
29
+
30
+ if target_language not in lang_map:
31
+ return "Unsupported language selected", None
32
+
33
+ model_name = lang_map[target_language]
 
34
  tokenizer = MarianTokenizer.from_pretrained(model_name)
35
+ translation_model = MarianMTModel.from_pretrained(model_name)
 
 
 
36
 
37
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
38
+ outputs = translation_model.generate(**inputs)
39
+ translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
40
 
41
+ # 3. Convert to speech with gTTS
42
+ tts = gTTS(translated_text, lang=target_language)
43
+ output_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
44
  tts.save(output_path)
45
 
46
+ return translated_text, output_path
47
+
48
+ # Gradio Interface
49
+ iface = gr.Interface(
50
+ fn=process_audio,
51
+ inputs=[
52
+ gr.Audio(source="upload", type="file", label="Upload English Audio"),
53
+ gr.Dropdown(["hi", "es", "fr", "bn"], label="Target Language")
54
+ ],
55
+ outputs=[
56
+ gr.Textbox(label="Translated Text"),
57
+ gr.Audio(label="Output Audio")
58
+ ],
59
+ title="Audio Translator",
60
+ description="Upload an English audio clip to transcribe it, translate it to another language, and hear it spoken."
61
+ )
62
 
63
  if __name__ == "__main__":
64
+ iface.launch()