kunalpro379 commited on
Commit
6fea906
·
verified ·
1 Parent(s): ecb7b63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -30
app.py CHANGED
@@ -1,31 +1,55 @@
 
 
1
  import gradio as gr
2
- import subprocess
3
- import os
4
- import uuid
5
-
6
- MODEL = "en_US-amy-medium.onnx"
7
- MODEL_PATH = f"./models/en_US/{MODEL}"
8
- CONFIG_PATH = f"./models/en_US/en_US-amy-medium.onnx.json"
9
-
10
- # Download model from Hugging Face if not present
11
- if not os.path.exists(MODEL_PATH):
12
- os.makedirs("./models/en_US", exist_ok=True)
13
- subprocess.run(["wget", "-O", MODEL_PATH,
14
- "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/amy-medium.onnx"])
15
- subprocess.run(["wget", "-O", CONFIG_PATH,
16
- "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/en_US-amy-medium.onnx.json"])
17
-
18
- def tts_piper(text):
19
- output_file = f"output_{uuid.uuid4().hex}.wav"
20
- command = [
21
- "piper",
22
- "--model", MODEL_PATH,
23
- "--config", CONFIG_PATH,
24
- "--output_file", output_file,
25
- "--text", text
26
- ]
27
- subprocess.run(command)
28
- return output_file
29
-
30
- demo = gr.Interface(fn=tts_piper, inputs="text", outputs="audio", title="Piper TTS - Hugging Face Demo")
31
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModel, AutoTokenizer
3
  import gradio as gr
4
+ import soundfile as sf
5
+ import numpy as np
6
+ import tempfile
7
+
8
+ # Load model and tokenizer
9
+ device = "cpu" # or "cuda" if available
10
+ model = AutoModel.from_pretrained("ai4bharat/vits_rasa_13", trust_remote_code=True).to(device)
11
+ tokenizer = AutoTokenizer.from_pretrained("ai4bharat/vits_rasa_13", trust_remote_code=True)
12
+
13
+ # Mapping: language -> speaker_id
14
+ LANG_SPEAKER_MAP = {
15
+ "asm": 0, "ben": 2, "brx": 4, "doi": 6,
16
+ "kan": 8, "mai": 10, "mal": 11,
17
+ "mar": 13, "nep": 14, "pan": 16,
18
+ "san": 17, "tam": 18, "tel": 19,
19
+ "hin": 13 # use Marathi Male voice for Hindi (close)
20
+ }
21
+
22
+ # Mapping: Style (fixed default)
23
+ DEFAULT_STYLE_ID = 0 # ALEXA
24
+
25
+ def tts_from_json(json_input):
26
+ try:
27
+ text = json_input["text"]
28
+ lang = json_input["language"].lower()
29
+
30
+ speaker_id = LANG_SPEAKER_MAP.get(lang)
31
+ if speaker_id is None:
32
+ return f"Language '{lang}' not supported."
33
+
34
+ inputs = tokenizer(text=text, return_tensors="pt").to(device)
35
+ outputs = model(inputs['input_ids'], speaker_id=speaker_id, emotion_id=DEFAULT_STYLE_ID)
36
+
37
+ waveform = outputs.waveform.squeeze().cpu().numpy()
38
+ sample_rate = model.config.sampling_rate
39
+
40
+ # Save to temp file for Gradio playback
41
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
42
+ sf.write(f.name, waveform, sample_rate)
43
+ return sample_rate, waveform
44
+ except Exception as e:
45
+ return f"Error: {str(e)}"
46
+
47
+ iface = gr.Interface(
48
+ fn=tts_from_json,
49
+ inputs=gr.JSON(label="Input JSON: {'text': '...', 'language': 'mar/hin/san'}"),
50
+ outputs=gr.Audio(label="Generated Audio"),
51
+ title="VITS TTS for Indian Languages (Marathi, Hindi, Sanskrit)",
52
+ description="Uses ai4bharat/vits_rasa_13. Supports Marathi, Hindi, and Sanskrit."
53
+ )
54
+
55
+ iface.launch()