Spaces:

emirhanbilgic
/

Text-to-speech-Turkish

Running

emirhanbilgic commited on Aug 31, 2024

Commit

7ca15a8

verified ·

1 Parent(s): 6ce3012

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -111,7 +111,7 @@ def normalize_text(text):
     return text
-@spaces.GPU(duration = 60)
 def text_to_speech(text, audio_file=None):
     # Normalize the input text
     normalized_text = normalize_text(text)
@@ -120,9 +120,18 @@ def text_to_speech(text, audio_file=None):
     speaker_embeddings = default_embedding
     speech = model.generate_speech(inputs["input_ids"], speaker_embeddings.unsqueeze(0), vocoder=vocoder)
-    sf.write("output.wav", speech.cpu().numpy(), samplerate=16000)
-    return "output.wav", normalized_text
 iface = gr.Interface(
     fn=text_to_speech,

     return text
+@spaces.GPU(duration=60)
 def text_to_speech(text, audio_file=None):
     # Normalize the input text
     normalized_text = normalize_text(text)
     speaker_embeddings = default_embedding
+    # Generate speech
     speech = model.generate_speech(inputs["input_ids"], speaker_embeddings.unsqueeze(0), vocoder=vocoder)
+    # Convert the generated speech to numpy array format
+    speech_np = speech.cpu().numpy()
+    # Write the output to a temporary file
+    output_file = "output.wav"
+    sf.write(output_file, speech_np, samplerate=16000)
+    # Return the numpy array and the sample rate
+    return speech_np, 16000
 iface = gr.Interface(
     fn=text_to_speech,