Commit
·
8df21b1
1
Parent(s):
b967e55
Update app.py
Browse files
app.py
CHANGED
@@ -543,7 +543,7 @@ def generate_speech_from_history(history, chatbot_role, sentence):
|
|
543 |
# sentence = sentence[:-1] + " " + sentence[-1]
|
544 |
|
545 |
# regex does the job well
|
546 |
-
sentence = re.sub("([^\x00-\x7F]|\w)(
|
547 |
|
548 |
print("Sentence for speech:", sentence)
|
549 |
|
@@ -577,7 +577,8 @@ def generate_speech_from_history(history, chatbot_role, sentence):
|
|
577 |
)
|
578 |
else:
|
579 |
# likely got a ' or " or some other text without alphanumeric in it
|
580 |
-
audio_stream = None
|
|
|
581 |
|
582 |
# XTTS is actually using streaming response but we are playing audio by sentence
|
583 |
# If you want direct XTTS voice streaming (send each chunk to voice ) you may set DIRECT_STREAM=1 environment variable
|
@@ -595,13 +596,16 @@ def generate_speech_from_history(history, chatbot_role, sentence):
|
|
595 |
continue
|
596 |
|
597 |
# Filter output for better voice
|
598 |
-
filter_output=
|
599 |
if filter_output:
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
|
|
|
|
|
|
605 |
|
606 |
# Directly encode the WAV bytestream to base64
|
607 |
base64_audio = base64.b64encode(pcm_to_wav(sentence_wav_bytestream)).decode('utf8')
|
|
|
543 |
# sentence = sentence[:-1] + " " + sentence[-1]
|
544 |
|
545 |
# regex does the job well
|
546 |
+
sentence = re.sub("([^\x00-\x7F]|\w)([\.。?!]+)",r"\1 \2",sentence)
|
547 |
|
548 |
print("Sentence for speech:", sentence)
|
549 |
|
|
|
577 |
)
|
578 |
else:
|
579 |
# likely got a ' or " or some other text without alphanumeric in it
|
580 |
+
audio_stream = None
|
581 |
+
continue
|
582 |
|
583 |
# XTTS is actually using streaming response but we are playing audio by sentence
|
584 |
# If you want direct XTTS voice streaming (send each chunk to voice ) you may set DIRECT_STREAM=1 environment variable
|
|
|
596 |
continue
|
597 |
|
598 |
# Filter output for better voice
|
599 |
+
filter_output=True
|
600 |
if filter_output:
|
601 |
+
try:
|
602 |
+
data_s16 = np.frombuffer(sentence_wav_bytestream, dtype=np.int16, count=len(sentence_wav_bytestream)//2, offset=0)
|
603 |
+
float_data = data_s16 * 0.5**15
|
604 |
+
reduced_noise = nr.reduce_noise(y=float_data, sr=24000,prop_decrease =0.8,n_fft=1024)
|
605 |
+
sentence_wav_bytestream = (reduced_noise * 32767).astype(np.int16)
|
606 |
+
sentence_wav_bytestream = sentence_wav_bytestream.tobytes()
|
607 |
+
except:
|
608 |
+
print("failed to remove noise")
|
609 |
|
610 |
# Directly encode the WAV bytestream to base64
|
611 |
base64_audio = base64.b64encode(pcm_to_wav(sentence_wav_bytestream)).decode('utf8')
|