bcci commited on
Commit
64b0296
Β·
verified Β·
1 Parent(s): bc93f92

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -46
app.py CHANGED
@@ -139,52 +139,52 @@ def tts_streaming(text: str, voice: str = "af_heart", speed: float = 1.0, format
139
 
140
 
141
 
142
- @app.get("/tts/full", summary="Full TTS")
143
- def tts_full(text: str, voice: str = "af_heart", speed: float = 1.0, format: str = "wav"):
144
- """
145
- Full TTS endpoint (no streaming). Synthesizes the entire text and returns
146
- a complete WAV or Opus file.
147
- """
148
- # Use newline-based splitting. This is the *original* KPipeline,
149
- # which is better for full synthesis. It's important to use
150
- # the right pipeline for the right task.
151
- from kokoro.pipeline import KPipeline # Import here to avoid circular import
152
- full_pipeline = KPipeline(lang_code="a")
153
-
154
- results = list(full_pipeline(text, voice=voice, speed=speed, split_pattern=r"\n+"))
155
- audio_segments = []
156
- for result in results:
157
- if result.audio is not None:
158
- audio_np = result.audio.cpu().numpy()
159
- if audio_np.ndim > 1:
160
- audio_np = audio_np.flatten()
161
- audio_segments.append(audio_np)
162
-
163
- if not audio_segments:
164
- raise HTTPException(status_code=500, detail="No audio generated.")
165
-
166
- # Concatenate all audio segments.
167
- full_audio = np.concatenate(audio_segments)
168
-
169
- # Write the concatenated audio to an in-memory WAV or Opus file.
170
- sample_rate = 24000
171
- num_channels = 1
172
- sample_width = 2 # 16-bit PCM -> 2 bytes per sample
173
- if format.lower() == "wav":
174
- wav_io = io.BytesIO()
175
- with wave.open(wav_io, "wb") as wav_file:
176
- wav_file.setnchannels(num_channels)
177
- wav_file.setsampwidth(sample_width)
178
- wav_file.setframerate(sample_rate)
179
- full_audio_int16 = np.int16(full_audio * 32767)
180
- wav_file.writeframes(full_audio_int16.tobytes())
181
- wav_io.seek(0)
182
- return Response(content=wav_io.read(), media_type="audio/wav")
183
- elif format.lower() == "opus":
184
- opus_data = audio_tensor_to_opus_bytes(torch.from_numpy(full_audio), sample_rate=sample_rate)
185
- return Response(content=opus_data, media_type="audio/opus")
186
- else:
187
- raise HTTPException(status_code=400, detail=f"Unsupported audio format: {format}")
188
 
189
 
190
 
 
139
 
140
 
141
 
142
+ # @app.get("/tts/full", summary="Full TTS")
143
+ # def tts_full(text: str, voice: str = "af_heart", speed: float = 1.0, format: str = "wav"):
144
+ # """
145
+ # Full TTS endpoint (no streaming). Synthesizes the entire text and returns
146
+ # a complete WAV or Opus file.
147
+ # """
148
+ # # Use newline-based splitting. This is the *original* KPipeline,
149
+ # # which is better for full synthesis. It's important to use
150
+ # # the right pipeline for the right task.
151
+ # from kokoro.pipeline import KPipeline # Import here to avoid circular import
152
+ # full_pipeline = KPipeline(lang_code="a")
153
+
154
+ # results = list(full_pipeline(text, voice=voice, speed=speed, split_pattern=r"\n+"))
155
+ # audio_segments = []
156
+ # for result in results:
157
+ # if result.audio is not None:
158
+ # audio_np = result.audio.cpu().numpy()
159
+ # if audio_np.ndim > 1:
160
+ # audio_np = audio_np.flatten()
161
+ # audio_segments.append(audio_np)
162
+
163
+ # if not audio_segments:
164
+ # raise HTTPException(status_code=500, detail="No audio generated.")
165
+
166
+ # # Concatenate all audio segments.
167
+ # full_audio = np.concatenate(audio_segments)
168
+
169
+ # # Write the concatenated audio to an in-memory WAV or Opus file.
170
+ # sample_rate = 24000
171
+ # num_channels = 1
172
+ # sample_width = 2 # 16-bit PCM -> 2 bytes per sample
173
+ # if format.lower() == "wav":
174
+ # wav_io = io.BytesIO()
175
+ # with wave.open(wav_io, "wb") as wav_file:
176
+ # wav_file.setnchannels(num_channels)
177
+ # wav_file.setsampwidth(sample_width)
178
+ # wav_file.setframerate(sample_rate)
179
+ # full_audio_int16 = np.int16(full_audio * 32767)
180
+ # wav_file.writeframes(full_audio_int16.tobytes())
181
+ # wav_io.seek(0)
182
+ # return Response(content=wav_io.read(), media_type="audio/wav")
183
+ # elif format.lower() == "opus":
184
+ # opus_data = audio_tensor_to_opus_bytes(torch.from_numpy(full_audio), sample_rate=sample_rate)
185
+ # return Response(content=opus_data, media_type="audio/opus")
186
+ # else:
187
+ # raise HTTPException(status_code=400, detail=f"Unsupported audio format: {format}")
188
 
189
 
190