Spaces:

dialoglk
/

SinhalaVITS

Running

App Files Files Community

KasunUoM commited on 19 days ago

Commit

2b669b0

verified ·

1 Parent(s): fa4fd3c

Initial upload

Browse files

Files changed (3) hide show

app.py +153 -0
requirements.txt +6 -0
romanizer.py +131 -0

app.py ADDED Viewed

	@@ -0,0 +1,153 @@

+# Created by Kasun Ranasinghe (@kasunUoM) | Oct.2025
+#
+# Gradio App for SinhalaVITS TTS Inference
+# Converted from Streamlit by Gemini
+# ===========================================================
+import gradio as gr
+from TTS.utils.synthesizer import Synthesizer
+from romanizer import sinhala_to_roman
+from huggingface_hub import hf_hub_download
+import os
+# -------------------------------
+# Load Multiple Speaker Models
+# -------------------------------
+# This function will run once when the app starts.
+def load_models():
+    model_sources = {
+        "Roshan": {
+            "repo": "dialoglk/SinhalaVITS-TTS-M2",
+            "model_file": "Roshan_270000.pth",
+            "config_file": "Roshan_config.json"
+        },
+        "Nipunika": {
+            "repo": "dialoglk/SinhalaVITS-TTS-F1",
+            "model_file": "Nipunika_210000.pth",
+            "config_file": "Nipunika_config.json"
+        },
+        "Sanjaya": {
+            "repo": "dialoglk/SinhalaVITS-TTS-M1",
+            "model_file": "Sanjaya_170000.pth",
+            "config_file": "Sanjaya_config.json"
+        },
+        "Sanuki": {
+            "repo": "dialoglk/SinhalaVITS-TTS-F2",
+            "model_file": "Sanuki_190000.pth",
+            "config_file": "Sanuki_config.json"
+        }
+    }
+    loaded = {}
+    print("Downloading and loading models...")
+    for spk, info in model_sources.items():
+        print(f"Loading speaker: {spk}")
+        ckpt_path = hf_hub_download(info["repo"], info["model_file"])
+        cfg_path = hf_hub_download(info["repo"], info["config_file"])
+        loaded[spk] = Synthesizer(
+            tts_checkpoint=ckpt_path,
+            tts_config_path=cfg_path,
+            use_cuda=False  # Set to False for CPU-based HF Spaces
+        )
+    print("All models loaded successfully.")
+    return loaded
+# Load models globally when the script runs
+MODELS = load_models()
+# -------------------------------
+# The Core Inference Function
+# -------------------------------
+# This is the function that Gradio will call
+def generate_speech(sinhala_text, speaker):
+    if not sinhala_text.strip():
+        print("Warning: Empty text submitted.")
+        return (None, None) # Return None for both audio and status
+    print(f"Generating speech for speaker: {speaker}")
+    try:
+        # 1. Convert Sinhala → Roman
+        roman_text = sinhala_to_roman(sinhala_text)
+        print(f"Romanized text: {roman_text}")
+        # 2. Select model
+        model = MODELS[speaker]
+        # 3. Generate audio
+        wav = model.tts(roman_text)
+        sample_rate = model.output_sample_rate
+        # 4. Return the audio (sample_rate, numpy_array)
+        # Gradio's Audio component handles this format perfectly
+        print("Speech generated successfully.")
+        return (sample_rate, wav)
+    except Exception as e:
+        print(f"Error during synthesis: {e}")
+        return (None, None)
+# -------------------------------
+# GRADIO UI
+# -------------------------------
+# We use gr.Blocks() for a custom layout
+# Define a custom theme to match the original app's modern feel
+theme = gr.themes.Soft(
+    primary_hue="blue",
+    secondary_hue="blue",
+    font=[gr.themes.GoogleFont("Segoe UI"), "sans-serif"],
+).set(
+    button_primary_background_fill="#007BFF",
+    button_primary_background_fill_hover="#0056b3",
+)
+with gr.Blocks(theme=theme, title="SinhalaVITS Client 🎙️") as demo:
+    gr.Markdown(
+        """
+        # SinhalaVITS Client 🎙️
+        ## Text to Speech in Sinhala | by Dialog & UoM
+        """
+    )
+    with gr.Row(variant="panel"):
+        with gr.Column(scale=2):
+            sinhala_text = gr.Textbox(
+                label="Enter Sinhala text:",
+                placeholder="කරුණාකර මෙහි ටයිප් කරන්න...",
+                lines=4
+            )
+            speaker = gr.Dropdown(
+                label="Choose a speaker",
+                choices=["Roshan", "Nipunika", "Sanjaya", "Sanuki"],
+                value="Roshan"
+            )
+            generate_btn = gr.Button("Generate Speech", variant="primary")
+        with gr.Column(scale=1):
+            audio_output = gr.Audio(
+                label="Generated Speech",
+                type="numpy" # 'numpy' is for input, output just needs (sr, data)
+            )
+            gr.Markdown("The audio player includes a ⬇️ download button.")
+    # Connect the UI elements to the function
+    generate_btn.click(
+        fn=generate_speech,
+        inputs=[sinhala_text, speaker],
+        outputs=[audio_output]
+    )
+    gr.Examples(
+        examples=[
+            ["ආයුබෝවන්.", "Sanjaya"],
+            ["මම ගෙදර යනවා.", "Roshan"],
+            ["ඔබට කොහොමද?", "Nipunika"],
+        ],
+        inputs=[sinhala_text, speaker]
+    )
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio
+TTS==0.11.1
+huggingface-hub
+torch==2.5.1
+torchaudio==2.5.1
+numpy==1.22.4

romanizer.py ADDED Viewed

	@@ -0,0 +1,131 @@

+# This helps converting Sinhala text to standard Romanized text
+import re
+# -- Specials (vowels, diacritics, standalone signs) --
+ro_specials = [
+    ['ඓ', 'ai'],
+    ['ඖ', 'au'],
+    ['ඍ', 'ṛ'],
+    ['ඎ', 'ṝ'],
+    ['ඐ', 'ḹ'],
+    ['අ', 'a'],
+    ['ආ', 'ā'],
+    ['ඇ', 'æ'], ['ඇ', 'Æ'],
+    ['ඈ', 'ǣ'],
+    ['ඉ', 'i'],
+    ['ඊ', 'ī'],
+    ['උ', 'u'],
+    ['ඌ', 'ū'],
+    ['එ', 'e'],
+    ['ඒ', 'ē'],
+    ['ඔ', 'o'],
+    ['ඕ', 'ō'],
+    ['ඞ්', 'ṅ'],
+    ['ං', 'ṁ'], ['ං', 'ṃ'],
+    ['ඃ', 'ḥ'], ['ඃ', 'Ḥ'],
+]
+# -- Consonants --
+ro_consonants = [
+    ['ඛ', 'kh'],
+    ['ඨ', 'ṭh'],
+    ['ඝ', 'gh'],
+    ['ඡ', 'ch'],
+    ['ඣ', 'jh'],
+    ['ඦ', 'ñj'],
+    ['ඪ', 'ḍh'],
+    ['ඬ', 'ṇḍ'],
+    ['ථ', 'th'],
+    ['ධ', 'dh'],
+    ['ඵ', 'ph'],
+    ['භ', 'bh'],
+    ['ඹ', 'mb'],
+    ['ඳ', 'ṉd'],
+    ['ඟ', 'ṉg'],
+    ['ඥ', 'gn'],
+    ['ක', 'k'],
+    ['ග', 'g'],
+    ['ච', 'c'],
+    ['ජ', 'j'],
+    ['ඤ', 'ñ'],
+    ['ට', 'ṭ'],
+    ['ඩ', 'ḍ'],
+    ['ණ', 'ṇ'],
+    ['ත', 't'],
+    ['ද', 'd'],
+    ['න', 'n'],
+    ['ප', 'p'],
+    ['බ', 'b'],
+    ['ම', 'm'],
+    ['ය', 'y'],
+    ['ර', 'r'],
+    ['ල', 'l'],
+    ['ව', 'v'],
+    ['ශ', 'ś'],
+    ['ෂ', 'ş'], ['ෂ', 'ṣ'],
+    ['ස', 's'],
+    ['හ', 'h'],
+    ['ළ', 'ḷ'],
+    ['ෆ', 'f']
+]
+# -- Combinations (consonant + vowel signs) --
+ro_combinations = [
+    ['', '', '්'],
+    ['', 'a', ''],
+    ['', 'ā', 'ා'],
+    ['', 'æ', 'ැ'],
+    ['', 'ǣ', 'ෑ'],
+    ['', 'i', 'ි'],
+    ['', 'ī', 'ී'],
+    ['', 'u', 'ු'],
+    ['', 'ū', 'ූ'],
+    ['', 'e', 'ෙ'],
+    ['', 'ē', 'ේ'],
+    ['', 'ai', 'ෛ'],
+    ['', 'o', 'ො'],
+    ['', 'ō', 'ෝ'],
+    ['', 'ṛ', 'ෘ'],
+    ['', 'ṝ', 'ෲ'],
+    ['', 'au', 'ෞ'],
+    ['', 'ḹ', 'ෳ']
+]
+# -- Generate consonant+vowel combos --
+def create_conso_combi(combinations, consonants):
+    conso_combi = []
+    for combi in combinations:
+        for conso in consonants:
+            base_sinh = conso[0] + combi[2]
+            base_rom = combi[0] + conso[1] + combi[1]
+            conso_combi.append((base_sinh, base_rom))
+    return conso_combi
+ro_conso_combi = create_conso_combi(ro_combinations, ro_consonants)
+# -- Core replace function --
+def replace_all(text, mapping):
+    # sort by length (to handle longest matches first)
+    mapping = sorted(mapping, key=lambda x: len(x[0]), reverse=True)
+    for sinh, rom in mapping:
+        text = re.sub(sinh, rom, text)
+    return text
+# -- Main Sinhala → Roman Function --
+def sinhala_to_roman(text):
+    # remove ZWJ (zero-width joiner)
+    text = text.replace("\u200D", "")
+    # do consonant+vowel combos first
+    text = replace_all(text, ro_conso_combi)
+    # then specials
+    text = replace_all(text, ro_specials)
+    return text