Update app.py
Browse files
app.py
CHANGED
@@ -111,7 +111,7 @@ def normalize_text(text):
|
|
111 |
|
112 |
return text
|
113 |
|
114 |
-
@spaces.GPU(duration
|
115 |
def text_to_speech(text, audio_file=None):
|
116 |
# Normalize the input text
|
117 |
normalized_text = normalize_text(text)
|
@@ -120,9 +120,18 @@ def text_to_speech(text, audio_file=None):
|
|
120 |
|
121 |
speaker_embeddings = default_embedding
|
122 |
|
|
|
123 |
speech = model.generate_speech(inputs["input_ids"], speaker_embeddings.unsqueeze(0), vocoder=vocoder)
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
iface = gr.Interface(
|
128 |
fn=text_to_speech,
|
|
|
111 |
|
112 |
return text
|
113 |
|
114 |
+
@spaces.GPU(duration=60)
|
115 |
def text_to_speech(text, audio_file=None):
|
116 |
# Normalize the input text
|
117 |
normalized_text = normalize_text(text)
|
|
|
120 |
|
121 |
speaker_embeddings = default_embedding
|
122 |
|
123 |
+
# Generate speech
|
124 |
speech = model.generate_speech(inputs["input_ids"], speaker_embeddings.unsqueeze(0), vocoder=vocoder)
|
125 |
+
|
126 |
+
# Convert the generated speech to numpy array format
|
127 |
+
speech_np = speech.cpu().numpy()
|
128 |
+
|
129 |
+
# Write the output to a temporary file
|
130 |
+
output_file = "output.wav"
|
131 |
+
sf.write(output_file, speech_np, samplerate=16000)
|
132 |
+
|
133 |
+
# Return the numpy array and the sample rate
|
134 |
+
return speech_np, 16000
|
135 |
|
136 |
iface = gr.Interface(
|
137 |
fn=text_to_speech,
|