pnnbao-ump commited on
Commit
f01210e
·
1 Parent(s): ac26d96

add more examples

Browse files
utils/__pycache__/__init__.cpython-312.pyc CHANGED
Binary files a/utils/__pycache__/__init__.cpython-312.pyc and b/utils/__pycache__/__init__.cpython-312.pyc differ
 
utils/__pycache__/normalize_text.cpython-312.pyc CHANGED
Binary files a/utils/__pycache__/normalize_text.cpython-312.pyc and b/utils/__pycache__/normalize_text.cpython-312.pyc differ
 
utils/__pycache__/phonemize_text.cpython-312.pyc CHANGED
Binary files a/utils/__pycache__/phonemize_text.cpython-312.pyc and b/utils/__pycache__/phonemize_text.cpython-312.pyc differ
 
vieneu_tts.py CHANGED
@@ -37,7 +37,7 @@ def _linear_overlap_add(frames: list[np.ndarray], stride: int) -> np.ndarray:
37
  class VieNeuTTS:
38
  def __init__(
39
  self,
40
- backbone_repo="pnnbao-ump/VieNeu-TTS-1000h",
41
  backbone_device="cpu",
42
  codec_repo="neuphonic/neucodec",
43
  codec_device="cpu",
@@ -57,9 +57,6 @@ class VieNeuTTS:
57
  self._is_quantized_model = False
58
  self._is_onnx_codec = False
59
 
60
- # backbone repo
61
- self.advanced_model = backbone_repo.endswith("1000h")
62
-
63
  # HF tokenizer
64
  self.tokenizer = None
65
 
@@ -194,10 +191,7 @@ class VieNeuTTS:
194
  return recon[0, 0, :]
195
 
196
  def _apply_chat_template(self, ref_codes: list[int], ref_text: str, input_text: str) -> list[int]:
197
- if self.advanced_model:
198
- input_text = phonemize_with_dict(ref_text) + " " + phonemize_with_dict(input_text)
199
- else:
200
- input_text = phonemize_text(ref_text) + " " + phonemize_text(input_text)
201
 
202
  speech_replace = self.tokenizer.convert_tokens_to_ids("<|SPEECH_REPLACE|>")
203
  speech_gen_start = self.tokenizer.convert_tokens_to_ids("<|SPEECH_GENERATION_START|>")
@@ -246,12 +240,8 @@ class VieNeuTTS:
246
  return output_str
247
 
248
  def _infer_ggml(self, ref_codes: list[int], ref_text: str, input_text: str) -> str:
249
- if self.advanced_model:
250
- ref_text = phonemize_with_dict(ref_text)
251
- input_text = phonemize_with_dict(input_text)
252
- else:
253
- ref_text = phonemize_text(ref_text)
254
- input_text = phonemize_text(input_text)
255
 
256
  codes_str = "".join([f"<|speech_{idx}|>" for idx in ref_codes])
257
  prompt = (
@@ -269,12 +259,8 @@ class VieNeuTTS:
269
  return output_str
270
 
271
  def _infer_stream_ggml(self, ref_codes: torch.Tensor, ref_text: str, input_text: str) -> Generator[np.ndarray, None, None]:
272
- if self.advanced_model:
273
- ref_text = phonemize_with_dict(ref_text)
274
- input_text = phonemize_with_dict(input_text)
275
- else:
276
- ref_text = phonemize_text(ref_text)
277
- input_text = phonemize_text(input_text)
278
 
279
  codes_str = "".join([f"<|speech_{idx}|>" for idx in ref_codes])
280
  prompt = (
 
37
  class VieNeuTTS:
38
  def __init__(
39
  self,
40
+ backbone_repo="pnnbao-ump/VieNeu-TTS",
41
  backbone_device="cpu",
42
  codec_repo="neuphonic/neucodec",
43
  codec_device="cpu",
 
57
  self._is_quantized_model = False
58
  self._is_onnx_codec = False
59
 
 
 
 
60
  # HF tokenizer
61
  self.tokenizer = None
62
 
 
191
  return recon[0, 0, :]
192
 
193
  def _apply_chat_template(self, ref_codes: list[int], ref_text: str, input_text: str) -> list[int]:
194
+ input_text = phonemize_with_dict(ref_text) + " " + phonemize_with_dict(input_text)
 
 
 
195
 
196
  speech_replace = self.tokenizer.convert_tokens_to_ids("<|SPEECH_REPLACE|>")
197
  speech_gen_start = self.tokenizer.convert_tokens_to_ids("<|SPEECH_GENERATION_START|>")
 
240
  return output_str
241
 
242
  def _infer_ggml(self, ref_codes: list[int], ref_text: str, input_text: str) -> str:
243
+ ref_text = phonemize_with_dict(ref_text)
244
+ input_text = phonemize_with_dict(input_text)
 
 
 
 
245
 
246
  codes_str = "".join([f"<|speech_{idx}|>" for idx in ref_codes])
247
  prompt = (
 
259
  return output_str
260
 
261
  def _infer_stream_ggml(self, ref_codes: torch.Tensor, ref_text: str, input_text: str) -> Generator[np.ndarray, None, None]:
262
+ ref_text = phonemize_with_dict(ref_text)
263
+ input_text = phonemize_with_dict(input_text)
 
 
 
 
264
 
265
  codes_str = "".join([f"<|speech_{idx}|>" for idx in ref_codes])
266
  prompt = (