Spaces:

bcci
/

kokoro-api-test

Runtime error

App Files Files Community

bcci commited on Feb 10

Commit

64b0296

verified ·

1 Parent(s): bc93f92

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -46

app.py CHANGED Viewed

@@ -139,52 +139,52 @@ def tts_streaming(text: str, voice: str = "af_heart", speed: float = 1.0, format
-@app.get("/tts/full", summary="Full TTS")
-def tts_full(text: str, voice: str = "af_heart", speed: float = 1.0, format: str = "wav"):
-    """
-    Full TTS endpoint (no streaming).  Synthesizes the entire text and returns
-    a complete WAV or Opus file.
-    """
-    # Use newline-based splitting.  This is the *original* KPipeline,
-    # which is better for full synthesis.  It's important to use
-    # the right pipeline for the right task.
-    from kokoro.pipeline import KPipeline  # Import here to avoid circular import
-    full_pipeline = KPipeline(lang_code="a")
-    results = list(full_pipeline(text, voice=voice, speed=speed, split_pattern=r"\n+"))
-    audio_segments = []
-    for result in results:
-        if result.audio is not None:
-            audio_np = result.audio.cpu().numpy()
-            if audio_np.ndim > 1:
-                audio_np = audio_np.flatten()
-            audio_segments.append(audio_np)
-    if not audio_segments:
-        raise HTTPException(status_code=500, detail="No audio generated.")
-    # Concatenate all audio segments.
-    full_audio = np.concatenate(audio_segments)
-    # Write the concatenated audio to an in-memory WAV or Opus file.
-    sample_rate = 24000
-    num_channels = 1
-    sample_width = 2  # 16-bit PCM -> 2 bytes per sample
-    if format.lower() == "wav":
-        wav_io = io.BytesIO()
-        with wave.open(wav_io, "wb") as wav_file:
-            wav_file.setnchannels(num_channels)
-            wav_file.setsampwidth(sample_width)
-            wav_file.setframerate(sample_rate)
-            full_audio_int16 = np.int16(full_audio * 32767)
-            wav_file.writeframes(full_audio_int16.tobytes())
-        wav_io.seek(0)
-        return Response(content=wav_io.read(), media_type="audio/wav")
-    elif format.lower() == "opus":
-        opus_data = audio_tensor_to_opus_bytes(torch.from_numpy(full_audio), sample_rate=sample_rate)
-        return Response(content=opus_data, media_type="audio/opus")
-    else:
-        raise HTTPException(status_code=400, detail=f"Unsupported audio format: {format}")

+# @app.get("/tts/full", summary="Full TTS")
+# def tts_full(text: str, voice: str = "af_heart", speed: float = 1.0, format: str = "wav"):
+#     """
+#     Full TTS endpoint (no streaming).  Synthesizes the entire text and returns
+#     a complete WAV or Opus file.
+#     """
+#     # Use newline-based splitting.  This is the *original* KPipeline,
+#     # which is better for full synthesis.  It's important to use
+#     # the right pipeline for the right task.
+#     from kokoro.pipeline import KPipeline  # Import here to avoid circular import
+#     full_pipeline = KPipeline(lang_code="a")
+#     results = list(full_pipeline(text, voice=voice, speed=speed, split_pattern=r"\n+"))
+#     audio_segments = []
+#     for result in results:
+#         if result.audio is not None:
+#             audio_np = result.audio.cpu().numpy()
+#             if audio_np.ndim > 1:
+#                 audio_np = audio_np.flatten()
+#             audio_segments.append(audio_np)
+#     if not audio_segments:
+#         raise HTTPException(status_code=500, detail="No audio generated.")
+#     # Concatenate all audio segments.
+#     full_audio = np.concatenate(audio_segments)
+#     # Write the concatenated audio to an in-memory WAV or Opus file.
+#     sample_rate = 24000
+#     num_channels = 1
+#     sample_width = 2  # 16-bit PCM -> 2 bytes per sample
+#     if format.lower() == "wav":
+#         wav_io = io.BytesIO()
+#         with wave.open(wav_io, "wb") as wav_file:
+#             wav_file.setnchannels(num_channels)
+#             wav_file.setsampwidth(sample_width)
+#             wav_file.setframerate(sample_rate)
+#             full_audio_int16 = np.int16(full_audio * 32767)
+#             wav_file.writeframes(full_audio_int16.tobytes())
+#         wav_io.seek(0)
+#         return Response(content=wav_io.read(), media_type="audio/wav")
+#     elif format.lower() == "opus":
+#         opus_data = audio_tensor_to_opus_bytes(torch.from_numpy(full_audio), sample_rate=sample_rate)
+#         return Response(content=opus_data, media_type="audio/opus")
+#     else:
+#         raise HTTPException(status_code=400, detail=f"Unsupported audio format: {format}")