Spaces:

kunalpro379
/

tts

Sleeping

App Files Files Community

kunalpro379 commited on Jun 4

Commit

6fea906

verified ·

1 Parent(s): ecb7b63

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -30

app.py CHANGED Viewed

@@ -1,31 +1,55 @@
 import gradio as gr
-import subprocess
-import os
-import uuid
-MODEL = "en_US-amy-medium.onnx"
-MODEL_PATH = f"./models/en_US/{MODEL}"
-CONFIG_PATH = f"./models/en_US/en_US-amy-medium.onnx.json"
-# Download model from Hugging Face if not present
-if not os.path.exists(MODEL_PATH):
-    os.makedirs("./models/en_US", exist_ok=True)
-    subprocess.run(["wget", "-O", MODEL_PATH,
-                    "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/amy-medium.onnx"])
-    subprocess.run(["wget", "-O", CONFIG_PATH,
-                    "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/en_US-amy-medium.onnx.json"])
-def tts_piper(text):
-    output_file = f"output_{uuid.uuid4().hex}.wav"
-    command = [
-        "piper",
-        "--model", MODEL_PATH,
-        "--config", CONFIG_PATH,
-        "--output_file", output_file,
-        "--text", text
-    ]
-    subprocess.run(command)
-    return output_file
-demo = gr.Interface(fn=tts_piper, inputs="text", outputs="audio", title="Piper TTS - Hugging Face Demo")
-demo.launch()

+import torch
+from transformers import AutoModel, AutoTokenizer
 import gradio as gr
+import soundfile as sf
+import numpy as np
+import tempfile
+# Load model and tokenizer
+device = "cpu"  # or "cuda" if available
+model = AutoModel.from_pretrained("ai4bharat/vits_rasa_13", trust_remote_code=True).to(device)
+tokenizer = AutoTokenizer.from_pretrained("ai4bharat/vits_rasa_13", trust_remote_code=True)
+# Mapping: language -> speaker_id
+LANG_SPEAKER_MAP = {
+    "asm": 0, "ben": 2, "brx": 4, "doi": 6,
+    "kan": 8, "mai": 10, "mal": 11,
+    "mar": 13, "nep": 14, "pan": 16,
+    "san": 17, "tam": 18, "tel": 19,
+    "hin": 13  # use Marathi Male voice for Hindi (close)
+}
+# Mapping: Style (fixed default)
+DEFAULT_STYLE_ID = 0  # ALEXA
+def tts_from_json(json_input):
+    try:
+        text = json_input["text"]
+        lang = json_input["language"].lower()
+        speaker_id = LANG_SPEAKER_MAP.get(lang)
+        if speaker_id is None:
+            return f"Language '{lang}' not supported."
+        inputs = tokenizer(text=text, return_tensors="pt").to(device)
+        outputs = model(inputs['input_ids'], speaker_id=speaker_id, emotion_id=DEFAULT_STYLE_ID)
+        waveform = outputs.waveform.squeeze().cpu().numpy()
+        sample_rate = model.config.sampling_rate
+        # Save to temp file for Gradio playback
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+            sf.write(f.name, waveform, sample_rate)
+            return sample_rate, waveform
+    except Exception as e:
+        return f"Error: {str(e)}"
+iface = gr.Interface(
+    fn=tts_from_json,
+    inputs=gr.JSON(label="Input JSON: {'text': '...', 'language': 'mar/hin/san'}"),
+    outputs=gr.Audio(label="Generated Audio"),
+    title="VITS TTS for Indian Languages (Marathi, Hindi, Sanskrit)",
+    description="Uses ai4bharat/vits_rasa_13. Supports Marathi, Hindi, and Sanskrit."
+)
+iface.launch()