emirhanbilgic commited on
Commit
7ca15a8
·
verified ·
1 Parent(s): 6ce3012

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -3
app.py CHANGED
@@ -111,7 +111,7 @@ def normalize_text(text):
111
 
112
  return text
113
 
114
- @spaces.GPU(duration = 60)
115
  def text_to_speech(text, audio_file=None):
116
  # Normalize the input text
117
  normalized_text = normalize_text(text)
@@ -120,9 +120,18 @@ def text_to_speech(text, audio_file=None):
120
 
121
  speaker_embeddings = default_embedding
122
 
 
123
  speech = model.generate_speech(inputs["input_ids"], speaker_embeddings.unsqueeze(0), vocoder=vocoder)
124
- sf.write("output.wav", speech.cpu().numpy(), samplerate=16000)
125
- return "output.wav", normalized_text
 
 
 
 
 
 
 
 
126
 
127
  iface = gr.Interface(
128
  fn=text_to_speech,
 
111
 
112
  return text
113
 
114
+ @spaces.GPU(duration=60)
115
  def text_to_speech(text, audio_file=None):
116
  # Normalize the input text
117
  normalized_text = normalize_text(text)
 
120
 
121
  speaker_embeddings = default_embedding
122
 
123
+ # Generate speech
124
  speech = model.generate_speech(inputs["input_ids"], speaker_embeddings.unsqueeze(0), vocoder=vocoder)
125
+
126
+ # Convert the generated speech to numpy array format
127
+ speech_np = speech.cpu().numpy()
128
+
129
+ # Write the output to a temporary file
130
+ output_file = "output.wav"
131
+ sf.write(output_file, speech_np, samplerate=16000)
132
+
133
+ # Return the numpy array and the sample rate
134
+ return speech_np, 16000
135
 
136
  iface = gr.Interface(
137
  fn=text_to_speech,