Spaces:
Running
Running
New TTS: Chatterbox
Browse files- app/models.py +21 -0
- test_tts_chatterbox.py +50 -0
- test_tts_zonos.py +4 -3
app/models.py
CHANGED
|
@@ -115,6 +115,9 @@ AVAILABLE_MODELS = {
|
|
| 115 |
# Dia
|
| 116 |
# 'nari-labs/Dia-1.6B': 'nari-labs/Dia-1.6B', # single speaker hallucinates
|
| 117 |
|
|
|
|
|
|
|
|
|
|
| 118 |
# HF TTS w issues
|
| 119 |
# 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
|
| 120 |
# 'PolyAI/pheme': '/predict#0', # sleepy HF Space
|
|
@@ -543,6 +546,15 @@ HF_SPACES = {
|
|
| 543 |
'is_zero_gpu_space': True,
|
| 544 |
'series': 'Dia',
|
| 545 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 546 |
}
|
| 547 |
|
| 548 |
# for zero-shot TTS - voice sample used by XTTS (11 seconds)
|
|
@@ -866,6 +878,15 @@ OVERRIDE_INPUTS = {
|
|
| 866 |
'cfg_filter_top_k': 35, # Top k filter for CFG guidance.
|
| 867 |
'speed_factor': 0.94, # Adjusts the speed of the generated audio (1.0 = original speed).
|
| 868 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 869 |
}
|
| 870 |
|
| 871 |
# minor mods to model from the same space
|
|
|
|
| 115 |
# Dia
|
| 116 |
# 'nari-labs/Dia-1.6B': 'nari-labs/Dia-1.6B', # single speaker hallucinates
|
| 117 |
|
| 118 |
+
# Chatterbox
|
| 119 |
+
'ResembleAI/Chatterbox': 'ResembleAI/Chatterbox',
|
| 120 |
+
|
| 121 |
# HF TTS w issues
|
| 122 |
# 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
|
| 123 |
# 'PolyAI/pheme': '/predict#0', # sleepy HF Space
|
|
|
|
| 546 |
'is_zero_gpu_space': True,
|
| 547 |
'series': 'Dia',
|
| 548 |
},
|
| 549 |
+
|
| 550 |
+
'ResembleAI/Chatterbox' : {
|
| 551 |
+
'name': 'Chatterbox',
|
| 552 |
+
'function': '/generate_tts_audio',
|
| 553 |
+
'text_param_index': 'text_input',
|
| 554 |
+
'return_audio_index': 0,
|
| 555 |
+
'is_zero_gpu_space': True,
|
| 556 |
+
'series': 'Chatterbox',
|
| 557 |
+
},
|
| 558 |
}
|
| 559 |
|
| 560 |
# for zero-shot TTS - voice sample used by XTTS (11 seconds)
|
|
|
|
| 878 |
'cfg_filter_top_k': 35, # Top k filter for CFG guidance.
|
| 879 |
'speed_factor': 0.94, # Adjusts the speed of the generated audio (1.0 = original speed).
|
| 880 |
},
|
| 881 |
+
|
| 882 |
+
# Chatterbox
|
| 883 |
+
'ResembleAI/Chatterbox': {
|
| 884 |
+
'audio_prompt_path_input': DEFAULT_VOICE_SAMPLE, # voice
|
| 885 |
+
'exaggeration_input': 0.5, # 1-2
|
| 886 |
+
'temperature_input': 0.8, # Lower values make the output more deterministic, higher values increase randomness.
|
| 887 |
+
'seed_num_input': 1, # Seed for random number generation, can be any integer.
|
| 888 |
+
'cfgw_input': 0.5, # CFG/Pace weight, can be any float value.
|
| 889 |
+
}
|
| 890 |
}
|
| 891 |
|
| 892 |
# minor mods to model from the same space
|
test_tts_chatterbox.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from test_overrides import _get_param_examples, _override_params
|
| 3 |
+
from gradio_client import Client, file
|
| 4 |
+
|
| 5 |
+
model = "ResembleAI/Chatterbox"
|
| 6 |
+
client = Client(model, hf_token=os.getenv('HF_TOKEN'))
|
| 7 |
+
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
| 8 |
+
# print(endpoints)
|
| 9 |
+
|
| 10 |
+
api_name = '/generate_tts_audio'
|
| 11 |
+
fn_index = None
|
| 12 |
+
end_parameters = None
|
| 13 |
+
text = 'This is what my voice sounds like.'
|
| 14 |
+
|
| 15 |
+
end_parameters = _get_param_examples(
|
| 16 |
+
endpoints['named_endpoints'][api_name]['parameters']
|
| 17 |
+
)
|
| 18 |
+
print(end_parameters)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
space_inputs = end_parameters
|
| 22 |
+
# override some or all default parameters
|
| 23 |
+
space_inputs = _override_params(end_parameters, model)
|
| 24 |
+
|
| 25 |
+
if(type(space_inputs) == dict):
|
| 26 |
+
space_inputs['text_input'] = text
|
| 27 |
+
result = client.predict(
|
| 28 |
+
**space_inputs,
|
| 29 |
+
api_name=api_name,
|
| 30 |
+
fn_index=fn_index
|
| 31 |
+
)
|
| 32 |
+
else:
|
| 33 |
+
space_inputs[0] = text
|
| 34 |
+
result = client.predict(
|
| 35 |
+
*space_inputs,
|
| 36 |
+
api_name=api_name,
|
| 37 |
+
fn_index=fn_index
|
| 38 |
+
)
|
| 39 |
+
# space_inputs = {str(i): value for i, value in enumerate(space_inputs)}
|
| 40 |
+
|
| 41 |
+
print(space_inputs)
|
| 42 |
+
# print(*space_inputs)
|
| 43 |
+
# print(**space_inputs)
|
| 44 |
+
|
| 45 |
+
# result = client.predict(
|
| 46 |
+
# **space_inputs,
|
| 47 |
+
# api_name=api_name,
|
| 48 |
+
# fn_index=fn_index
|
| 49 |
+
# )
|
| 50 |
+
print(result)
|
test_tts_zonos.py
CHANGED
|
@@ -2,7 +2,8 @@ import os
|
|
| 2 |
from test_overrides import _get_param_examples, _override_params
|
| 3 |
from gradio_client import Client, file
|
| 4 |
|
| 5 |
-
model = "Steveeeeeeen/Zonos
|
|
|
|
| 6 |
# client = Client("Pendrokar/Zonos", hf_token=os.getenv('HF_TOKEN'))
|
| 7 |
client = Client("Steveeeeeeen/Zonos", hf_token=os.getenv('HF_TOKEN'))
|
| 8 |
# client = Client(model, hf_token=os.getenv('HF_TOKEN'))
|
|
@@ -12,7 +13,7 @@ endpoints = client.view_api(all_endpoints=True, print_info=False, return_format=
|
|
| 12 |
api_name = '/generate_audio'
|
| 13 |
fn_index = None
|
| 14 |
end_parameters = None
|
| 15 |
-
text = '
|
| 16 |
|
| 17 |
end_parameters = _get_param_examples(
|
| 18 |
endpoints['named_endpoints'][api_name]['parameters']
|
|
@@ -20,7 +21,7 @@ end_parameters = _get_param_examples(
|
|
| 20 |
print(end_parameters)
|
| 21 |
|
| 22 |
|
| 23 |
-
space_inputs = end_parameters
|
| 24 |
# override some or all default parameters
|
| 25 |
space_inputs = _override_params(end_parameters, model)
|
| 26 |
|
|
|
|
| 2 |
from test_overrides import _get_param_examples, _override_params
|
| 3 |
from gradio_client import Client, file
|
| 4 |
|
| 5 |
+
model = "Steveeeeeeen/Zonos"
|
| 6 |
+
# model = "Steveeeeeeen/Zonos/hybrid"
|
| 7 |
# client = Client("Pendrokar/Zonos", hf_token=os.getenv('HF_TOKEN'))
|
| 8 |
client = Client("Steveeeeeeen/Zonos", hf_token=os.getenv('HF_TOKEN'))
|
| 9 |
# client = Client(model, hf_token=os.getenv('HF_TOKEN'))
|
|
|
|
| 13 |
api_name = '/generate_audio'
|
| 14 |
fn_index = None
|
| 15 |
end_parameters = None
|
| 16 |
+
text = 'The young girl.'
|
| 17 |
|
| 18 |
end_parameters = _get_param_examples(
|
| 19 |
endpoints['named_endpoints'][api_name]['parameters']
|
|
|
|
| 21 |
print(end_parameters)
|
| 22 |
|
| 23 |
|
| 24 |
+
# space_inputs = end_parameters
|
| 25 |
# override some or all default parameters
|
| 26 |
space_inputs = _override_params(end_parameters, model)
|
| 27 |
|