kokoro-onnx-api-test

Running

bcci commited on Feb 8

Commit

e334297

verified ·

1 Parent(s): b452b41

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -249,20 +249,14 @@ def tts_full(text: str, voice: str = "af_heart", speed: float = 1.0, format: str
         speed=np.ones(1, dtype=np.float32),
     ))[0]
-    print(audio)
     # Write the concatenated audio to an in-memory WAV or Opus file.
     sample_rate = 24000
-    # Normalize audio data to the range [-1.0, 1.0]
-    # audio_normalized = audio / np.max(np.abs(audio))
-    # Scale to 16-bit integer range
-    audio_scaled = np.int16(audio * 32767)
-    print(audio_scaled)
-    if np.any(audio_scaled < -32768) or np.any(audio_scaled > 32767):
-        raise ValueError("Scaled audio data is outside the valid range for 16-bit WAV files.")
     if format.lower() == "wav":
@@ -270,14 +264,14 @@ def tts_full(text: str, voice: str = "af_heart", speed: float = 1.0, format: str
         wav_io = io.BytesIO()
         # Write the audio data to the buffer in WAV format
-        write_wav(wav_io, sample_rate, audio_scaled)
         # Seek to the beginning of the buffer
         wav_io.seek(0)
         return Response(content=wav_io.read(), media_type="audio/wav")
     elif format.lower() == "opus":
-        opus_data = audio_tensor_to_opus_bytes(torch.from_numpy(audio_scaled), sample_rate=sample_rate)
         return Response(content=opus_data, media_type="audio/opus")
     else:
         raise HTTPException(status_code=400, detail=f"Unsupported audio format: {format}")

         speed=np.ones(1, dtype=np.float32),
     ))[0]
     # Write the concatenated audio to an in-memory WAV or Opus file.
     sample_rate = 24000
+    audio = np.array(audio, dtype=np.float32)  # Ensure it's float32 first
+    audio = (audio * 32767).astype(np.int16)   # Scale to int16 range
+    # Flatten the array if it's 2D
+    audio = audio.flatten()
     if format.lower() == "wav":
         wav_io = io.BytesIO()
         # Write the audio data to the buffer in WAV format
+        write_wav(wav_io, sample_rate, audio)
         # Seek to the beginning of the buffer
         wav_io.seek(0)
         return Response(content=wav_io.read(), media_type="audio/wav")
     elif format.lower() == "opus":
+        opus_data = audio_tensor_to_opus_bytes(torch.from_numpy(audio), sample_rate=sample_rate)
         return Response(content=opus_data, media_type="audio/opus")
     else:
         raise HTTPException(status_code=400, detail=f"Unsupported audio format: {format}")