Spaces:

emirhanbilgic
/

Text-to-speech-Turkish

Running

emirhanbilgic commited on Sep 1, 2024

Commit

3625755

verified ·

1 Parent(s): c089867

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -126,23 +126,17 @@ def text_to_speech(text, audio_file=None):
     with torch.no_grad():
         speech = model.generate_speech(inputs["input_ids"], speaker_embeddings.unsqueeze(0), vocoder=vocoder)
-    # Convert the generated speech to numpy array format
     speech_np = speech.cpu().numpy()
-    # Save the speech to a temporary file in WAV format
-    output_file = "output.wav"
-    sf.write(output_file, speech_np, 16000)
-    # Return the path to the audio file
-    return output_file
 iface = gr.Interface(
     fn=text_to_speech,
     inputs=[
         gr.Textbox(label="Enter Turkish text to convert to speech")
     ],
     outputs=[
-        gr.Audio(label="Generated Speech")
     ],
     title="Turkish SpeechT5 Text-to-Speech Demo",
     description="Enter Turkish text, and listen to the generated speech."

     with torch.no_grad():
         speech = model.generate_speech(inputs["input_ids"], speaker_embeddings.unsqueeze(0), vocoder=vocoder)
     speech_np = speech.cpu().numpy()
+    speech_np = speech_np / np.max(np.abs(speech_np))
+    return (16000, speech_np)
 iface = gr.Interface(
     fn=text_to_speech,
     inputs=[
         gr.Textbox(label="Enter Turkish text to convert to speech")
     ],
     outputs=[
+        gr.Audio(label="Generated Speech", type="numpy")
     ],
     title="Turkish SpeechT5 Text-to-Speech Demo",
     description="Enter Turkish text, and listen to the generated speech."