Commit
·
19ae156
1
Parent(s):
946a55b
(wip)update html
Browse files- models.py +0 -8
- templates/arena.html +8 -25
- tts.py +26 -4
models.py
CHANGED
|
@@ -433,14 +433,6 @@ def insert_initial_models():
|
|
| 433 |
# is_active=False, # ran out of credits
|
| 434 |
# model_url="https://cartesia.ai/",
|
| 435 |
# ),
|
| 436 |
-
# Model(
|
| 437 |
-
# id="spark-tts",
|
| 438 |
-
# name="Spark TTS",
|
| 439 |
-
# model_type=ModelType.TTS,
|
| 440 |
-
# is_open=False,
|
| 441 |
-
# is_active=False, # API stopped working
|
| 442 |
-
# model_url="https://github.com/SparkAudio/Spark-TTS",
|
| 443 |
-
# ),
|
| 444 |
Model(
|
| 445 |
id="spark-tts",
|
| 446 |
name="Spark TTS",
|
|
|
|
| 433 |
# is_active=False, # ran out of credits
|
| 434 |
# model_url="https://cartesia.ai/",
|
| 435 |
# ),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 436 |
Model(
|
| 437 |
id="spark-tts",
|
| 438 |
name="Spark TTS",
|
templates/arena.html
CHANGED
|
@@ -13,7 +13,7 @@
|
|
| 13 |
<div id="tts-tab" class="tab-content active">
|
| 14 |
<form class="input-container">
|
| 15 |
<div class="input-group">
|
| 16 |
-
<label for="voice-file"
|
| 17 |
<input type="file" id="voice-file" accept="audio/*">
|
| 18 |
<audio id="voice-preview" controls style="display:none;"></audio>
|
| 19 |
</div>
|
|
@@ -992,7 +992,7 @@
|
|
| 992 |
<script src="{{ url_for('static', filename='js/waveplayer.js') }}"></script>
|
| 993 |
<script>
|
| 994 |
document.addEventListener('DOMContentLoaded', function() {
|
| 995 |
-
//
|
| 996 |
const voiceFileInput = document.getElementById('voice-file');
|
| 997 |
const voicePreview = document.getElementById('voice-preview');
|
| 998 |
if (voiceFileInput && voicePreview) {
|
|
@@ -1036,7 +1036,7 @@
|
|
| 1036 |
let modelNames = { a: '', b: '' };
|
| 1037 |
let wavePlayers = { a: null, b: null };
|
| 1038 |
let cachedSentences = []; // To store sentences available in cache
|
| 1039 |
-
let hasVoted = false; //
|
| 1040 |
|
| 1041 |
// Initialize WavePlayers with mobile settings
|
| 1042 |
wavePlayerContainers.forEach(container => {
|
|
@@ -2039,22 +2039,6 @@
|
|
| 2039 |
{% block scripts %}
|
| 2040 |
{{ super() }}
|
| 2041 |
<script>
|
| 2042 |
-
// 1. 参考音色试听功能
|
| 2043 |
-
const voiceFileInput = document.getElementById('voice-file');
|
| 2044 |
-
const voicePreview = document.getElementById('voice-preview');
|
| 2045 |
-
voiceFileInput.addEventListener('change', function() {
|
| 2046 |
-
const file = this.files[0];
|
| 2047 |
-
if (file) {
|
| 2048 |
-
const url = URL.createObjectURL(file);
|
| 2049 |
-
voicePreview.src = url;
|
| 2050 |
-
voicePreview.style.display = 'inline-block';
|
| 2051 |
-
voicePreview.load();
|
| 2052 |
-
} else {
|
| 2053 |
-
voicePreview.src = '';
|
| 2054 |
-
voicePreview.style.display = 'none';
|
| 2055 |
-
}
|
| 2056 |
-
});
|
| 2057 |
-
|
| 2058 |
// 2. 阻止输入框Enter触发合成,只允许点击按钮合成
|
| 2059 |
const ttsForm = document.querySelector('#tts-tab form.input-container');
|
| 2060 |
const textInput = ttsForm.querySelector('.text-input');
|
|
@@ -2062,16 +2046,15 @@ const synthBtn = ttsForm.querySelector('.synth-btn');
|
|
| 2062 |
|
| 2063 |
textInput.addEventListener('keydown', function(e) {
|
| 2064 |
if (e.key === 'Enter') {
|
| 2065 |
-
e.preventDefault(); //
|
| 2066 |
}
|
| 2067 |
});
|
| 2068 |
-
//
|
| 2069 |
ttsForm.addEventListener('submit', function(e) {
|
| 2070 |
-
e.preventDefault(); //
|
| 2071 |
-
//
|
| 2072 |
if (document.activeElement === synthBtn || e.submitter === synthBtn) {
|
| 2073 |
-
//
|
| 2074 |
-
// 例如:triggerSynthesize();
|
| 2075 |
if (typeof window.triggerSynthesize === 'function') {
|
| 2076 |
window.triggerSynthesize();
|
| 2077 |
}
|
|
|
|
| 13 |
<div id="tts-tab" class="tab-content active">
|
| 14 |
<form class="input-container">
|
| 15 |
<div class="input-group">
|
| 16 |
+
<label for="voice-file">Upload reference voice:</label>
|
| 17 |
<input type="file" id="voice-file" accept="audio/*">
|
| 18 |
<audio id="voice-preview" controls style="display:none;"></audio>
|
| 19 |
</div>
|
|
|
|
| 992 |
<script src="{{ url_for('static', filename='js/waveplayer.js') }}"></script>
|
| 993 |
<script>
|
| 994 |
document.addEventListener('DOMContentLoaded', function() {
|
| 995 |
+
// Reference voice preview function
|
| 996 |
const voiceFileInput = document.getElementById('voice-file');
|
| 997 |
const voicePreview = document.getElementById('voice-preview');
|
| 998 |
if (voiceFileInput && voicePreview) {
|
|
|
|
| 1036 |
let modelNames = { a: '', b: '' };
|
| 1037 |
let wavePlayers = { a: null, b: null };
|
| 1038 |
let cachedSentences = []; // To store sentences available in cache
|
| 1039 |
+
let hasVoted = false; // Prevent duplicate voting
|
| 1040 |
|
| 1041 |
// Initialize WavePlayers with mobile settings
|
| 1042 |
wavePlayerContainers.forEach(container => {
|
|
|
|
| 2039 |
{% block scripts %}
|
| 2040 |
{{ super() }}
|
| 2041 |
<script>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2042 |
// 2. 阻止输入框Enter触发合成,只允许点击按钮合成
|
| 2043 |
const ttsForm = document.querySelector('#tts-tab form.input-container');
|
| 2044 |
const textInput = ttsForm.querySelector('.text-input');
|
|
|
|
| 2046 |
|
| 2047 |
textInput.addEventListener('keydown', function(e) {
|
| 2048 |
if (e.key === 'Enter') {
|
| 2049 |
+
e.preventDefault(); // Prevent Enter submit
|
| 2050 |
}
|
| 2051 |
});
|
| 2052 |
+
// Optional: prevent form Enter auto submit
|
| 2053 |
ttsForm.addEventListener('submit', function(e) {
|
| 2054 |
+
e.preventDefault(); // Prevent default submit
|
| 2055 |
+
// Only trigger synth when clicking the synth button
|
| 2056 |
if (document.activeElement === synthBtn || e.submitter === synthBtn) {
|
| 2057 |
+
// Call original synth logic if exists
|
|
|
|
| 2058 |
if (typeof window.triggerSynthesize === 'function') {
|
| 2059 |
window.triggerSynthesize();
|
| 2060 |
}
|
tts.py
CHANGED
|
@@ -51,10 +51,10 @@ model_mapping = {
|
|
| 51 |
# "provider": "styletts",
|
| 52 |
# "model": "styletts2",
|
| 53 |
# },
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
# "papla-p1": {
|
| 59 |
# "provider": "papla",
|
| 60 |
# "model": "papla_p1",
|
|
@@ -225,6 +225,26 @@ def predict_spark_tts(text, reference_audio_path=None):
|
|
| 225 |
return result
|
| 226 |
|
| 227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
def predict_tts(text, model, reference_audio_path=None):
|
| 229 |
global client
|
| 230 |
print(f"Predicting TTS for {model}")
|
|
@@ -239,6 +259,8 @@ def predict_tts(text, model, reference_audio_path=None):
|
|
| 239 |
return predict_index_tts(text, reference_audio_path)
|
| 240 |
elif model == "spark-tts":
|
| 241 |
return predict_spark_tts(text, reference_audio_path)
|
|
|
|
|
|
|
| 242 |
|
| 243 |
if not model in model_mapping:
|
| 244 |
raise ValueError(f"Model {model} not found")
|
|
|
|
| 51 |
# "provider": "styletts",
|
| 52 |
# "model": "styletts2",
|
| 53 |
# },
|
| 54 |
+
"cosyvoice-2.0": {
|
| 55 |
+
"provider": "cosyvoice",
|
| 56 |
+
"model": "cosyvoice_2_0",
|
| 57 |
+
},
|
| 58 |
# "papla-p1": {
|
| 59 |
# "provider": "papla",
|
| 60 |
# "model": "papla_p1",
|
|
|
|
| 225 |
return result
|
| 226 |
|
| 227 |
|
| 228 |
+
def predict_cosyvoice_tts(text, reference_audio_path=None):
|
| 229 |
+
from gradio_client import Client, file
|
| 230 |
+
client = Client("https://iic-cosyvoice2-0-5b.ms.show/")
|
| 231 |
+
if not reference_audio_path:
|
| 232 |
+
raise ValueError("cosyvoice-2.0 需要 reference_audio_path")
|
| 233 |
+
prompt_wav = file(reference_audio_path)
|
| 234 |
+
result = client.predict(
|
| 235 |
+
tts_text=text,
|
| 236 |
+
mode_checkbox_group="3s极速复刻",
|
| 237 |
+
prompt_text="",
|
| 238 |
+
prompt_wav_upload=prompt_wav,
|
| 239 |
+
prompt_wav_record=prompt_wav,
|
| 240 |
+
instruct_text="",
|
| 241 |
+
seed=0,
|
| 242 |
+
stream="false",
|
| 243 |
+
api_name="/generate_audio"
|
| 244 |
+
)
|
| 245 |
+
return result
|
| 246 |
+
|
| 247 |
+
|
| 248 |
def predict_tts(text, model, reference_audio_path=None):
|
| 249 |
global client
|
| 250 |
print(f"Predicting TTS for {model}")
|
|
|
|
| 259 |
return predict_index_tts(text, reference_audio_path)
|
| 260 |
elif model == "spark-tts":
|
| 261 |
return predict_spark_tts(text, reference_audio_path)
|
| 262 |
+
elif model == "cosyvoice-2.0":
|
| 263 |
+
return predict_cosyvoice_tts(text, reference_audio_path)
|
| 264 |
|
| 265 |
if not model in model_mapping:
|
| 266 |
raise ValueError(f"Model {model} not found")
|