KasunUoM commited on
Commit
2b669b0
·
verified ·
1 Parent(s): fa4fd3c

Initial upload

Browse files
Files changed (3) hide show
  1. app.py +153 -0
  2. requirements.txt +6 -0
  3. romanizer.py +131 -0
app.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Created by Kasun Ranasinghe (@kasunUoM) | Oct.2025
2
+ #
3
+ # Gradio App for SinhalaVITS TTS Inference
4
+ # Converted from Streamlit by Gemini
5
+ # ===========================================================
6
+
7
+ import gradio as gr
8
+ from TTS.utils.synthesizer import Synthesizer
9
+ from romanizer import sinhala_to_roman
10
+ from huggingface_hub import hf_hub_download
11
+ import os
12
+
13
+ # -------------------------------
14
+ # Load Multiple Speaker Models
15
+ # -------------------------------
16
+ # This function will run once when the app starts.
17
+ def load_models():
18
+ model_sources = {
19
+ "Roshan": {
20
+ "repo": "dialoglk/SinhalaVITS-TTS-M2",
21
+ "model_file": "Roshan_270000.pth",
22
+ "config_file": "Roshan_config.json"
23
+ },
24
+ "Nipunika": {
25
+ "repo": "dialoglk/SinhalaVITS-TTS-F1",
26
+ "model_file": "Nipunika_210000.pth",
27
+ "config_file": "Nipunika_config.json"
28
+ },
29
+ "Sanjaya": {
30
+ "repo": "dialoglk/SinhalaVITS-TTS-M1",
31
+ "model_file": "Sanjaya_170000.pth",
32
+ "config_file": "Sanjaya_config.json"
33
+ },
34
+ "Sanuki": {
35
+ "repo": "dialoglk/SinhalaVITS-TTS-F2",
36
+ "model_file": "Sanuki_190000.pth",
37
+ "config_file": "Sanuki_config.json"
38
+ }
39
+ }
40
+
41
+ loaded = {}
42
+ print("Downloading and loading models...")
43
+ for spk, info in model_sources.items():
44
+ print(f"Loading speaker: {spk}")
45
+ ckpt_path = hf_hub_download(info["repo"], info["model_file"])
46
+ cfg_path = hf_hub_download(info["repo"], info["config_file"])
47
+
48
+ loaded[spk] = Synthesizer(
49
+ tts_checkpoint=ckpt_path,
50
+ tts_config_path=cfg_path,
51
+ use_cuda=False # Set to False for CPU-based HF Spaces
52
+ )
53
+ print("All models loaded successfully.")
54
+ return loaded
55
+
56
+ # Load models globally when the script runs
57
+ MODELS = load_models()
58
+
59
+ # -------------------------------
60
+ # The Core Inference Function
61
+ # -------------------------------
62
+ # This is the function that Gradio will call
63
+ def generate_speech(sinhala_text, speaker):
64
+ if not sinhala_text.strip():
65
+ print("Warning: Empty text submitted.")
66
+ return (None, None) # Return None for both audio and status
67
+
68
+ print(f"Generating speech for speaker: {speaker}")
69
+ try:
70
+ # 1. Convert Sinhala → Roman
71
+ roman_text = sinhala_to_roman(sinhala_text)
72
+ print(f"Romanized text: {roman_text}")
73
+
74
+ # 2. Select model
75
+ model = MODELS[speaker]
76
+
77
+ # 3. Generate audio
78
+ wav = model.tts(roman_text)
79
+ sample_rate = model.output_sample_rate
80
+
81
+ # 4. Return the audio (sample_rate, numpy_array)
82
+ # Gradio's Audio component handles this format perfectly
83
+ print("Speech generated successfully.")
84
+ return (sample_rate, wav)
85
+
86
+ except Exception as e:
87
+ print(f"Error during synthesis: {e}")
88
+ return (None, None)
89
+
90
+ # -------------------------------
91
+ # GRADIO UI
92
+ # -------------------------------
93
+ # We use gr.Blocks() for a custom layout
94
+
95
+ # Define a custom theme to match the original app's modern feel
96
+ theme = gr.themes.Soft(
97
+ primary_hue="blue",
98
+ secondary_hue="blue",
99
+ font=[gr.themes.GoogleFont("Segoe UI"), "sans-serif"],
100
+ ).set(
101
+ button_primary_background_fill="#007BFF",
102
+ button_primary_background_fill_hover="#0056b3",
103
+ )
104
+
105
+ with gr.Blocks(theme=theme, title="SinhalaVITS Client 🎙️") as demo:
106
+ gr.Markdown(
107
+ """
108
+ # SinhalaVITS Client 🎙️
109
+ ## Text to Speech in Sinhala | by Dialog & UoM
110
+ """
111
+ )
112
+
113
+ with gr.Row(variant="panel"):
114
+ with gr.Column(scale=2):
115
+ sinhala_text = gr.Textbox(
116
+ label="Enter Sinhala text:",
117
+ placeholder="කරුණාකර මෙහි ටයිප් කරන්න...",
118
+ lines=4
119
+ )
120
+ speaker = gr.Dropdown(
121
+ label="Choose a speaker",
122
+ choices=["Roshan", "Nipunika", "Sanjaya", "Sanuki"],
123
+ value="Roshan"
124
+ )
125
+ generate_btn = gr.Button("Generate Speech", variant="primary")
126
+
127
+ with gr.Column(scale=1):
128
+ audio_output = gr.Audio(
129
+ label="Generated Speech",
130
+ type="numpy" # 'numpy' is for input, output just needs (sr, data)
131
+ )
132
+ gr.Markdown("The audio player includes a ⬇️ download button.")
133
+
134
+
135
+ # Connect the UI elements to the function
136
+ generate_btn.click(
137
+ fn=generate_speech,
138
+ inputs=[sinhala_text, speaker],
139
+ outputs=[audio_output]
140
+ )
141
+
142
+ gr.Examples(
143
+ examples=[
144
+ ["ආයුබෝවන්.", "Sanjaya"],
145
+ ["මම ගෙදර යනවා.", "Roshan"],
146
+ ["ඔබට කොහොමද?", "Nipunika"],
147
+ ],
148
+ inputs=[sinhala_text, speaker]
149
+ )
150
+
151
+ # Launch the app
152
+ if __name__ == "__main__":
153
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ TTS==0.11.1
3
+ huggingface-hub
4
+ torch==2.5.1
5
+ torchaudio==2.5.1
6
+ numpy==1.22.4
romanizer.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This helps converting Sinhala text to standard Romanized text
2
+
3
+ import re
4
+
5
+ # -- Specials (vowels, diacritics, standalone signs) --
6
+
7
+ ro_specials = [
8
+ ['ඓ', 'ai'],
9
+ ['ඖ', 'au'],
10
+ ['ඍ', 'ṛ'],
11
+ ['ඎ', 'ṝ'],
12
+ ['ඐ', 'ḹ'],
13
+ ['අ', 'a'],
14
+ ['ආ', 'ā'],
15
+ ['ඇ', 'æ'], ['ඇ', 'Æ'],
16
+ ['ඈ', 'ǣ'],
17
+ ['ඉ', 'i'],
18
+ ['ඊ', 'ī'],
19
+ ['උ', 'u'],
20
+ ['ඌ', 'ū'],
21
+ ['එ', 'e'],
22
+ ['ඒ', 'ē'],
23
+ ['ඔ', 'o'],
24
+ ['ඕ', 'ō'],
25
+ ['ඞ්', 'ṅ'],
26
+ ['ං', 'ṁ'], ['ං', 'ṃ'],
27
+ ['ඃ', 'ḥ'], ['ඃ', 'Ḥ'],
28
+ ]
29
+
30
+ # -- Consonants --
31
+
32
+ ro_consonants = [
33
+ ['ඛ', 'kh'],
34
+ ['ඨ', 'ṭh'],
35
+ ['ඝ', 'gh'],
36
+ ['ඡ', 'ch'],
37
+ ['ඣ', 'jh'],
38
+ ['ඦ', 'ñj'],
39
+ ['ඪ', 'ḍh'],
40
+ ['ඬ', 'ṇḍ'],
41
+ ['ථ', 'th'],
42
+ ['ධ', 'dh'],
43
+ ['ඵ', 'ph'],
44
+ ['භ', 'bh'],
45
+ ['ඹ', 'mb'],
46
+ ['ඳ', 'ṉd'],
47
+ ['ඟ', 'ṉg'],
48
+ ['ඥ', 'gn'],
49
+ ['ක', 'k'],
50
+ ['ග', 'g'],
51
+ ['ච', 'c'],
52
+ ['ජ', 'j'],
53
+ ['ඤ', 'ñ'],
54
+ ['ට', 'ṭ'],
55
+ ['ඩ', 'ḍ'],
56
+ ['ණ', 'ṇ'],
57
+ ['ත', 't'],
58
+ ['ද', 'd'],
59
+ ['න', 'n'],
60
+ ['ප', 'p'],
61
+ ['බ', 'b'],
62
+ ['ම', 'm'],
63
+ ['ය', 'y'],
64
+ ['ර', 'r'],
65
+ ['ල', 'l'],
66
+ ['ව', 'v'],
67
+ ['ශ', 'ś'],
68
+ ['ෂ', 'ş'], ['ෂ', 'ṣ'],
69
+ ['ස', 's'],
70
+ ['හ', 'h'],
71
+ ['ළ', 'ḷ'],
72
+ ['ෆ', 'f']
73
+ ]
74
+
75
+ # -- Combinations (consonant + vowel signs) --
76
+
77
+ ro_combinations = [
78
+ ['', '', '්'],
79
+ ['', 'a', ''],
80
+ ['', 'ā', 'ා'],
81
+ ['', 'æ', 'ැ'],
82
+ ['', 'ǣ', 'ෑ'],
83
+ ['', 'i', 'ි'],
84
+ ['', 'ī', 'ී'],
85
+ ['', 'u', 'ු'],
86
+ ['', 'ū', 'ූ'],
87
+ ['', 'e', 'ෙ'],
88
+ ['', 'ē', 'ේ'],
89
+ ['', 'ai', 'ෛ'],
90
+ ['', 'o', 'ො'],
91
+ ['', 'ō', 'ෝ'],
92
+ ['', 'ṛ', 'ෘ'],
93
+ ['', 'ṝ', 'ෲ'],
94
+ ['', 'au', 'ෞ'],
95
+ ['', 'ḹ', 'ෳ']
96
+ ]
97
+
98
+ # -- Generate consonant+vowel combos --
99
+
100
+ def create_conso_combi(combinations, consonants):
101
+ conso_combi = []
102
+ for combi in combinations:
103
+ for conso in consonants:
104
+ base_sinh = conso[0] + combi[2]
105
+ base_rom = combi[0] + conso[1] + combi[1]
106
+ conso_combi.append((base_sinh, base_rom))
107
+ return conso_combi
108
+
109
+ ro_conso_combi = create_conso_combi(ro_combinations, ro_consonants)
110
+
111
+ # -- Core replace function --
112
+ def replace_all(text, mapping):
113
+
114
+ # sort by length (to handle longest matches first)
115
+ mapping = sorted(mapping, key=lambda x: len(x[0]), reverse=True)
116
+ for sinh, rom in mapping:
117
+ text = re.sub(sinh, rom, text)
118
+ return text
119
+
120
+ # -- Main Sinhala → Roman Function --
121
+ def sinhala_to_roman(text):
122
+
123
+ # remove ZWJ (zero-width joiner)
124
+ text = text.replace("\u200D", "")
125
+
126
+ # do consonant+vowel combos first
127
+ text = replace_all(text, ro_conso_combi)
128
+
129
+ # then specials
130
+ text = replace_all(text, ro_specials)
131
+ return text