Update app.py
Browse files
app.py
CHANGED
@@ -48,15 +48,7 @@ default_embedding = prepare_default_embedding(default_example)
|
|
48 |
def text_to_speech(text, audio_file=None):
|
49 |
inputs = processor(text=text, return_tensors="pt").to(device)
|
50 |
|
51 |
-
|
52 |
-
# Load the audio file and create speaker embedding
|
53 |
-
waveform, sample_rate = sf.read(audio_file)
|
54 |
-
if len(waveform.shape) > 1:
|
55 |
-
waveform = waveform[:, 0] # Take the first channel if stereo
|
56 |
-
speaker_embeddings = create_speaker_embedding(waveform)
|
57 |
-
else:
|
58 |
-
# Use default embedding if no audio file is provided
|
59 |
-
speaker_embeddings = default_embedding
|
60 |
|
61 |
speech = model.generate_speech(inputs["input_ids"], speaker_embeddings.unsqueeze(0), vocoder=vocoder)
|
62 |
sf.write("output.wav", speech.cpu().numpy(), samplerate=16000)
|
@@ -65,8 +57,7 @@ def text_to_speech(text, audio_file=None):
|
|
65 |
iface = gr.Interface(
|
66 |
fn=text_to_speech,
|
67 |
inputs=[
|
68 |
-
gr.Textbox(label="Enter Turkish text to convert to speech")
|
69 |
-
gr.Audio(label="Upload a short audio sample of the target speaker (optional)", type="filepath")
|
70 |
],
|
71 |
outputs=gr.Audio(label="Generated Speech"),
|
72 |
title="Turkish SpeechT5 Text-to-Speech Demo with Optional Custom Voice",
|
|
|
48 |
def text_to_speech(text, audio_file=None):
|
49 |
inputs = processor(text=text, return_tensors="pt").to(device)
|
50 |
|
51 |
+
speaker_embeddings = default_embedding
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
speech = model.generate_speech(inputs["input_ids"], speaker_embeddings.unsqueeze(0), vocoder=vocoder)
|
54 |
sf.write("output.wav", speech.cpu().numpy(), samplerate=16000)
|
|
|
57 |
iface = gr.Interface(
|
58 |
fn=text_to_speech,
|
59 |
inputs=[
|
60 |
+
gr.Textbox(label="Enter Turkish text to convert to speech")
|
|
|
61 |
],
|
62 |
outputs=gr.Audio(label="Generated Speech"),
|
63 |
title="Turkish SpeechT5 Text-to-Speech Demo with Optional Custom Voice",
|