Commit
·
0bd106b
1
Parent(s):
4156639
Update app.py
Browse files
app.py
CHANGED
@@ -592,32 +592,31 @@ def generate_speech_for_sentence(history, chatbot_role, sentence, return_as_byte
|
|
592 |
# XTTS is actually using streaming response but we are playing audio by sentence
|
593 |
# If you want direct XTTS voice streaming (send each chunk to voice ) you may set DIRECT_STREAM=1 environment variable
|
594 |
if audio_stream is not None:
|
595 |
-
frame_length = 0
|
596 |
for chunk in audio_stream:
|
597 |
try:
|
598 |
wav_bytestream += chunk
|
599 |
-
frame_length += len(chunk)
|
600 |
except:
|
601 |
# hack to continue on playing. sometimes last chunk is empty , will be fixed on next TTS
|
602 |
continue
|
603 |
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
|
608 |
-
|
609 |
-
|
610 |
-
|
611 |
-
|
612 |
-
|
613 |
-
|
614 |
-
|
615 |
-
|
616 |
-
|
617 |
-
|
618 |
-
|
619 |
-
|
620 |
-
return (history, None)
|
621 |
|
622 |
|
623 |
except RuntimeError as e:
|
|
|
592 |
# XTTS is actually using streaming response but we are playing audio by sentence
|
593 |
# If you want direct XTTS voice streaming (send each chunk to voice ) you may set DIRECT_STREAM=1 environment variable
|
594 |
if audio_stream is not None:
|
595 |
+
# frame_length = 0
|
596 |
for chunk in audio_stream:
|
597 |
try:
|
598 |
wav_bytestream += chunk
|
599 |
+
# frame_length += len(chunk)
|
600 |
except:
|
601 |
# hack to continue on playing. sometimes last chunk is empty , will be fixed on next TTS
|
602 |
continue
|
603 |
|
604 |
+
# Filter output for better voice
|
605 |
+
filter_output=True
|
606 |
+
if filter_output:
|
607 |
+
data_s16 = np.frombuffer(wav_bytestream, dtype=np.int16, count=len(wav_bytestream)//2, offset=0)
|
608 |
+
float_data = data_s16 * 0.5**15
|
609 |
+
reduced_noise = nr.reduce_noise(y=float_data, sr=24000,prop_decrease =0.8,n_fft=1024)
|
610 |
+
wav_bytestream = (reduced_noise * 32767).astype(np.int16)
|
611 |
+
wav_bytestream = wav_bytestream.tobytes(
|
612 |
+
# Directly encode the WAV bytestream to base64
|
613 |
+
base64_audio = base64.b64encode(pcm_to_wav(wav_bytestream)).decode('utf8')
|
614 |
+
|
615 |
+
if audio_stream is not None:
|
616 |
+
return (history, base64_audio)
|
617 |
+
else:
|
618 |
+
# Handle the case where the audio stream is None (e.g., silent response)
|
619 |
+
return (history, None)
|
|
|
620 |
|
621 |
|
622 |
except RuntimeError as e:
|