emirhanbilgic commited on
Commit
2b8ee16
·
verified ·
1 Parent(s): 4997d29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -11
app.py CHANGED
@@ -48,15 +48,7 @@ default_embedding = prepare_default_embedding(default_example)
48
  def text_to_speech(text, audio_file=None):
49
  inputs = processor(text=text, return_tensors="pt").to(device)
50
 
51
- if audio_file is not None:
52
- # Load the audio file and create speaker embedding
53
- waveform, sample_rate = sf.read(audio_file)
54
- if len(waveform.shape) > 1:
55
- waveform = waveform[:, 0] # Take the first channel if stereo
56
- speaker_embeddings = create_speaker_embedding(waveform)
57
- else:
58
- # Use default embedding if no audio file is provided
59
- speaker_embeddings = default_embedding
60
 
61
  speech = model.generate_speech(inputs["input_ids"], speaker_embeddings.unsqueeze(0), vocoder=vocoder)
62
  sf.write("output.wav", speech.cpu().numpy(), samplerate=16000)
@@ -65,8 +57,7 @@ def text_to_speech(text, audio_file=None):
65
  iface = gr.Interface(
66
  fn=text_to_speech,
67
  inputs=[
68
- gr.Textbox(label="Enter Turkish text to convert to speech"),
69
- gr.Audio(label="Upload a short audio sample of the target speaker (optional)", type="filepath")
70
  ],
71
  outputs=gr.Audio(label="Generated Speech"),
72
  title="Turkish SpeechT5 Text-to-Speech Demo with Optional Custom Voice",
 
48
  def text_to_speech(text, audio_file=None):
49
  inputs = processor(text=text, return_tensors="pt").to(device)
50
 
51
+ speaker_embeddings = default_embedding
 
 
 
 
 
 
 
 
52
 
53
  speech = model.generate_speech(inputs["input_ids"], speaker_embeddings.unsqueeze(0), vocoder=vocoder)
54
  sf.write("output.wav", speech.cpu().numpy(), samplerate=16000)
 
57
  iface = gr.Interface(
58
  fn=text_to_speech,
59
  inputs=[
60
+ gr.Textbox(label="Enter Turkish text to convert to speech")
 
61
  ],
62
  outputs=gr.Audio(label="Generated Speech"),
63
  title="Turkish SpeechT5 Text-to-Speech Demo with Optional Custom Voice",