Spaces:

emirhanbilgic
/

Text-to-speech-Turkish

Runtime error

emirhanbilgic commited on Aug 29, 2024

Commit

2b8ee16

verified ·

1 Parent(s): 4997d29

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -48,15 +48,7 @@ default_embedding = prepare_default_embedding(default_example)
 def text_to_speech(text, audio_file=None):
     inputs = processor(text=text, return_tensors="pt").to(device)
-    if audio_file is not None:
-        # Load the audio file and create speaker embedding
-        waveform, sample_rate = sf.read(audio_file)
-        if len(waveform.shape) > 1:
-            waveform = waveform[:, 0]  # Take the first channel if stereo
-        speaker_embeddings = create_speaker_embedding(waveform)
-    else:
-        # Use default embedding if no audio file is provided
-        speaker_embeddings = default_embedding
     speech = model.generate_speech(inputs["input_ids"], speaker_embeddings.unsqueeze(0), vocoder=vocoder)
     sf.write("output.wav", speech.cpu().numpy(), samplerate=16000)
@@ -65,8 +57,7 @@ def text_to_speech(text, audio_file=None):
 iface = gr.Interface(
     fn=text_to_speech,
     inputs=[
-        gr.Textbox(label="Enter Turkish text to convert to speech"),
-        gr.Audio(label="Upload a short audio sample of the target speaker (optional)", type="filepath")
     ],
     outputs=gr.Audio(label="Generated Speech"),
     title="Turkish SpeechT5 Text-to-Speech Demo with Optional Custom Voice",

 def text_to_speech(text, audio_file=None):
     inputs = processor(text=text, return_tensors="pt").to(device)
+    speaker_embeddings = default_embedding
     speech = model.generate_speech(inputs["input_ids"], speaker_embeddings.unsqueeze(0), vocoder=vocoder)
     sf.write("output.wav", speech.cpu().numpy(), samplerate=16000)
 iface = gr.Interface(
     fn=text_to_speech,
     inputs=[
+        gr.Textbox(label="Enter Turkish text to convert to speech")
     ],
     outputs=gr.Audio(label="Generated Speech"),
     title="Turkish SpeechT5 Text-to-Speech Demo with Optional Custom Voice",