Spaces:
Running
Running
| #%% | |
| import azure.cognitiveservices.speech as speechsdk | |
| import re | |
| import os | |
| import hashlib | |
| import random | |
| from dotenv import load_dotenv | |
| load_dotenv(".env") | |
| print(os.environ.get('SPEECH_KEY')) | |
| print(os.environ.get('SPEECH_REGION')) | |
| speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), | |
| region=os.environ.get('SPEECH_REGION')) | |
| def do_cleanup(dir='wavs', num_files=100): | |
| files = os.listdir(dir) | |
| if len(files) > num_files: | |
| for file in files[:len(files) - num_files]: | |
| os.remove(f"{dir}/{file}") | |
| def add_sukun(text): | |
| # Define Arabic letters and sukun | |
| arabic_letters = 'اأإآةبتثجحخدذرزسشصضطظعغفقكلمنهوي' | |
| shadda = 'ّ' | |
| arabic_letters += shadda | |
| sukun = 'ْ' | |
| punctuation = '.,;!?،؛؟' | |
| def process_word(word): | |
| # If the last character is punctuation, process the letter before it | |
| if word[-1] in punctuation: | |
| if len(word) > 1 and word[-2] in arabic_letters and word[-2] != sukun: | |
| return word[:-2] + word[-2] + sukun + word[-1] | |
| return word | |
| # If the last character is an Arabic letter and does not have a sukun, add one | |
| elif word[-1] in arabic_letters and word[-1] != sukun: | |
| return word + sukun | |
| return word | |
| # Use regex to split text into words and punctuation | |
| words = re.findall(r'\S+|[.,;!?،؛؟]', text) | |
| processed_text = ' '.join(process_word(word) for word in words) | |
| return processed_text | |
| def get_ssml(text, voice='de-DE-SeraphinaMultilingualNeural'): | |
| return f'<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="ar-SA"><voice name="{voice}"><lang xml:lang="ar-SA">{text}</lang></voice></speak>' | |
| def get_audio(input_text, voice='de-DE-FlorianMultilingualNeural', use_ssml=True): | |
| input_text = add_sukun(input_text) | |
| hash = hashlib.md5(input_text.encode()).hexdigest() | |
| if os.path.exists(f"wavs/{hash}.wav"): | |
| return f"wavs/{hash}.wav" | |
| audio_config = speechsdk.audio.AudioOutputConfig(filename=f"wavs/{hash}.wav") | |
| # speech_config.speech_synthesis_voice_name=voice | |
| # speech_config.speech_synthesis_language = "ar-EG" | |
| speech_config.set_speech_synthesis_output_format( | |
| speechsdk.SpeechSynthesisOutputFormat.Riff24Khz16BitMonoPcm | |
| ) | |
| speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, | |
| audio_config=audio_config) | |
| if use_ssml: | |
| # print("Using SSML") | |
| ssml = get_ssml(input_text, voice=voice) | |
| result = speech_synthesizer.speak_ssml_async(ssml).get() | |
| else: | |
| # print("Using text") | |
| result = speech_synthesizer.speak_text_async(input_text).get() | |
| if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted: | |
| print("Speech synthesized for text [{}]".format(input_text)) | |
| elif result.reason == speechsdk.ResultReason.Canceled: | |
| cancellation_details = result.cancellation_details | |
| print("Speech synthesis canceled: {}".format(cancellation_details.reason)) | |
| if cancellation_details.reason == speechsdk.CancellationReason.Error: | |
| print("Error details: {}".format(cancellation_details.error_details)) | |
| # randomly every 50 calls, clean up the wavs folder | |
| if random.randint(1, 50) == 1: | |
| do_cleanup() | |
| return f"wavs/{hash}.wav" | |