Update app.py
Browse files
app.py
CHANGED
|
@@ -58,7 +58,6 @@ def process_video(video_file, language_choice):
|
|
| 58 |
audio, _ = load_audio(reference_audio, sr=df_state.sr())
|
| 59 |
enhanced = enhance(model, df_state, audio)
|
| 60 |
save_audio(reference_audio, enhanced, df_state.sr())
|
| 61 |
-
reference_speaker = reference_audio # This is the voice you want to clone
|
| 62 |
|
| 63 |
src_path = os.path.join(output_dir, "tmp.wav")
|
| 64 |
|
|
@@ -67,7 +66,7 @@ def process_video(video_file, language_choice):
|
|
| 67 |
|
| 68 |
# Transcribe the original audio with timestamps
|
| 69 |
sttmodel = whisper.load_model("base")
|
| 70 |
-
sttresult = sttmodel.transcribe(
|
| 71 |
|
| 72 |
# Print the original transcription
|
| 73 |
print(sttresult["text"])
|
|
@@ -116,9 +115,7 @@ def process_video(video_file, language_choice):
|
|
| 116 |
|
| 117 |
print(f"Final video with subtitles saved to: {final_video_with_subs_path}")
|
| 118 |
return final_video_with_subs_path, "Video language and language selection are the same, audio not changed."
|
| 119 |
-
else:
|
| 120 |
-
target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, vad=False)
|
| 121 |
-
|
| 122 |
# Choose the target language for translation
|
| 123 |
language = 'EN_NEWEST'
|
| 124 |
match language_choice[0:2]:
|
|
@@ -178,6 +175,8 @@ def process_video(video_file, language_choice):
|
|
| 178 |
batch_segment_files = [item for sublist in batch_segment_files for item in sublist] # Flatten the list
|
| 179 |
|
| 180 |
for segment_file, start, end, translated_text in batch_segment_files:
|
|
|
|
|
|
|
| 181 |
# Run the tone color converter
|
| 182 |
encode_message = "@MyShell"
|
| 183 |
tone_color_converter.convert(
|
|
|
|
| 58 |
audio, _ = load_audio(reference_audio, sr=df_state.sr())
|
| 59 |
enhanced = enhance(model, df_state, audio)
|
| 60 |
save_audio(reference_audio, enhanced, df_state.sr())
|
|
|
|
| 61 |
|
| 62 |
src_path = os.path.join(output_dir, "tmp.wav")
|
| 63 |
|
|
|
|
| 66 |
|
| 67 |
# Transcribe the original audio with timestamps
|
| 68 |
sttmodel = whisper.load_model("base")
|
| 69 |
+
sttresult = sttmodel.transcribe(reference_audio, verbose=True)
|
| 70 |
|
| 71 |
# Print the original transcription
|
| 72 |
print(sttresult["text"])
|
|
|
|
| 115 |
|
| 116 |
print(f"Final video with subtitles saved to: {final_video_with_subs_path}")
|
| 117 |
return final_video_with_subs_path, "Video language and language selection are the same, audio not changed."
|
| 118 |
+
else:
|
|
|
|
|
|
|
| 119 |
# Choose the target language for translation
|
| 120 |
language = 'EN_NEWEST'
|
| 121 |
match language_choice[0:2]:
|
|
|
|
| 175 |
batch_segment_files = [item for sublist in batch_segment_files for item in sublist] # Flatten the list
|
| 176 |
|
| 177 |
for segment_file, start, end, translated_text in batch_segment_files:
|
| 178 |
+
reference_speaker = reference_audio[start:end] # This is the voice you want to clone
|
| 179 |
+
target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, vad=False)
|
| 180 |
# Run the tone color converter
|
| 181 |
encode_message = "@MyShell"
|
| 182 |
tone_color_converter.convert(
|