aiapp / processing.py
aiconq's picture
Create processing.py
4a76fcb verified
import whisper
from transformers import MarianMTModel, MarianTokenizer
from gtts import gTTS
import tempfile
import os
import certifi
os.environ["SSL_CERT_FILE"] = certifi.where()
def transcribe_translate_speak(audio_path, target_language):
# Transcribe
model = whisper.load_model("tiny")
result = model.transcribe(audio_path)
transcription = result["text"]
# Translate
model_map = {
'hi': "Helsinki-NLP/opus-mt-en-hi",
'es': "Helsinki-NLP/opus-mt-en-es",
'fr': "Helsinki-NLP/opus-mt-en-fr",
'bn': "shhossain/opus-mt-en-to-bn"
}
if target_language not in model_map:
raise ValueError(f"Unsupported language: {target_language}")
trans_model = MarianMTModel.from_pretrained(model_map[target_language])
tokenizer = MarianTokenizer.from_pretrained(model_map[target_language])
inputs = tokenizer(transcription, return_tensors="pt", padding=True, truncation=True)
outputs = trans_model.generate(**inputs)
translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# TTS
tts = gTTS(translated_text, lang=target_language)
tts_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
tts.save(tts_path)
return transcription, translated_text, tts_path