Spaces:

Shamik
/

cascaded_speech_to_speech_translation

Runtime error

shamik commited on Feb 4, 2024

Commit

d33896e

1 Parent(s): 8949488

Modified the app.

Files changed (1) hide show

app.py CHANGED Viewed

@@ -31,11 +31,15 @@ def synthesise(text):
     speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
     return speech.cpu()
 def speech_to_speech_translation(audio):
     translated_text = translate(audio)
     synthesised_speech = synthesise(translated_text)
-    synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
     return 16000, synthesised_speech

     speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
     return speech.cpu()
+# converting the output audio array to int16,which is expected by gradio
+target_dtype = np.int16
+max_range = np.iinfo(target_dtype).max
 def speech_to_speech_translation(audio):
     translated_text = translate(audio)
     synthesised_speech = synthesise(translated_text)
+    # converting for gradio
+    synthesised_speech = (synthesised_speech.numpy() * max_range).astype(np.int16)
     return 16000, synthesised_speech