Spaces:

jbilcke-hf
/

ai-bedtime-story-server

Paused

App Files Files Community

jbilcke-hf HF Staff commited on Nov 21, 2023

Commit

8df21b1

1 Parent(s): b967e55

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -8

app.py CHANGED Viewed

@@ -543,7 +543,7 @@ def generate_speech_from_history(history, chatbot_role, sentence):
     #    sentence = sentence[:-1] + " " + sentence[-1]
     # regex does the job well
-    sentence = re.sub("([^\x00-\x7F]|\w)(\.|\。|\?|\!)",r"\1 \2\2",sentence)
     print("Sentence for speech:", sentence)
@@ -577,7 +577,8 @@ def generate_speech_from_history(history, chatbot_role, sentence):
                     )
             else:
                 # likely got a ' or " or some other text without alphanumeric in it
-                audio_stream = None
             # XTTS is actually using streaming response but we are playing audio by sentence
             # If you want direct XTTS voice streaming (send each chunk to voice ) you may set DIRECT_STREAM=1 environment variable
@@ -595,13 +596,16 @@ def generate_speech_from_history(history, chatbot_role, sentence):
                         continue
                 # Filter output for better voice
-                filter_output=False
                 if filter_output:
-                    data_s16 = np.frombuffer(sentence_wav_bytestream, dtype=np.int16, count=len(sentence_wav_bytestream)//2, offset=0)
-                    float_data = data_s16 * 0.5**15
-                    reduced_noise = nr.reduce_noise(y=float_data, sr=24000,prop_decrease =0.8,n_fft=1024)
-                    sentence_wav_bytestream = (reduced_noise * 32767).astype(np.int16)
-                    sentence_wav_bytestream = sentence_wav_bytestream.tobytes()
                 # Directly encode the WAV bytestream to base64
                 base64_audio = base64.b64encode(pcm_to_wav(sentence_wav_bytestream)).decode('utf8')

     #    sentence = sentence[:-1] + " " + sentence[-1]
     # regex does the job well
+    sentence = re.sub("([^\x00-\x7F]|\w)([\.。?!]+)",r"\1 \2",sentence)
     print("Sentence for speech:", sentence)
                     )
             else:
                 # likely got a ' or " or some other text without alphanumeric in it
+                audio_stream = None
+                continue
             # XTTS is actually using streaming response but we are playing audio by sentence
             # If you want direct XTTS voice streaming (send each chunk to voice ) you may set DIRECT_STREAM=1 environment variable
                         continue
                 # Filter output for better voice
+                filter_output=True
                 if filter_output:
+                    try:
+                        data_s16 = np.frombuffer(sentence_wav_bytestream, dtype=np.int16, count=len(sentence_wav_bytestream)//2, offset=0)
+                        float_data = data_s16 * 0.5**15
+                        reduced_noise = nr.reduce_noise(y=float_data, sr=24000,prop_decrease =0.8,n_fft=1024)
+                        sentence_wav_bytestream = (reduced_noise * 32767).astype(np.int16)
+                        sentence_wav_bytestream = sentence_wav_bytestream.tobytes()
+                    except:
+                        print("failed to remove noise")
                 # Directly encode the WAV bytestream to base64
                 base64_audio = base64.b64encode(pcm_to_wav(sentence_wav_bytestream)).decode('utf8')