Video-Translator-with-Voice-Cloning-and-Subtitles

Paused

App Files Files Community

BoldActionMan commited on Aug 27, 2024

Commit

9558dda

verified ·

1 Parent(s): f86b927

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -5

app.py CHANGED Viewed

@@ -58,7 +58,6 @@ def process_video(video_file, language_choice):
     audio, _ = load_audio(reference_audio, sr=df_state.sr())
     enhanced = enhance(model, df_state, audio)
     save_audio(reference_audio, enhanced, df_state.sr())
-    reference_speaker = reference_audio  # This is the voice you want to clone
     src_path = os.path.join(output_dir, "tmp.wav")
@@ -67,7 +66,7 @@ def process_video(video_file, language_choice):
     # Transcribe the original audio with timestamps
     sttmodel = whisper.load_model("base")
-    sttresult = sttmodel.transcribe(reference_speaker, verbose=True)
     # Print the original transcription
     print(sttresult["text"])
@@ -116,9 +115,7 @@ def process_video(video_file, language_choice):
         print(f"Final video with subtitles saved to: {final_video_with_subs_path}")
         return final_video_with_subs_path, "Video language and language selection are the same, audio not changed."
-    else:
-        target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, vad=False)
         # Choose the target language for translation
         language = 'EN_NEWEST'
         match language_choice[0:2]:
@@ -178,6 +175,8 @@ def process_video(video_file, language_choice):
                     batch_segment_files = [item for sublist in batch_segment_files for item in sublist]  # Flatten the list
                 for segment_file, start, end, translated_text in batch_segment_files:
                     # Run the tone color converter
                     encode_message = "@MyShell"
                     tone_color_converter.convert(

     audio, _ = load_audio(reference_audio, sr=df_state.sr())
     enhanced = enhance(model, df_state, audio)
     save_audio(reference_audio, enhanced, df_state.sr())
     src_path = os.path.join(output_dir, "tmp.wav")
     # Transcribe the original audio with timestamps
     sttmodel = whisper.load_model("base")
+    sttresult = sttmodel.transcribe(reference_audio, verbose=True)
     # Print the original transcription
     print(sttresult["text"])
         print(f"Final video with subtitles saved to: {final_video_with_subs_path}")
         return final_video_with_subs_path, "Video language and language selection are the same, audio not changed."
+    else:
         # Choose the target language for translation
         language = 'EN_NEWEST'
         match language_choice[0:2]:
                     batch_segment_files = [item for sublist in batch_segment_files for item in sublist]  # Flatten the list
                 for segment_file, start, end, translated_text in batch_segment_files:
+                    reference_speaker = reference_audio[start:end]  # This is the voice you want to clone
+                    target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, vad=False)
                     # Run the tone color converter
                     encode_message = "@MyShell"
                     tone_color_converter.convert(