Commit
·
c2cf399
1
Parent(s):
fe3096b
Update app.py
Browse files
app.py
CHANGED
@@ -575,20 +575,17 @@ def generate_speech_for_sentence(history, chatbot_role, sentence, return_as_byte
|
|
575 |
reduced_noise = nr.reduce_noise(y=float_data, sr=24000,prop_decrease =0.8,n_fft=1024)
|
576 |
wav_bytestream = (reduced_noise * 32767).astype(np.int16)
|
577 |
wav_bytestream = wav_bytestream.tobytes()
|
578 |
-
|
|
|
|
|
|
|
579 |
if audio_stream is not None:
|
580 |
-
|
581 |
-
|
582 |
-
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
f.setframerate(24000)
|
587 |
-
f.writeframes(wav_bytestream)
|
588 |
-
|
589 |
-
return (history , gr.Audio(value=audio_unique_filename, autoplay=True))
|
590 |
-
else:
|
591 |
-
return (history , gr.Audio(value=wav_bytestream, autoplay=True))
|
592 |
except RuntimeError as e:
|
593 |
if "device-side assert" in str(e):
|
594 |
# cannot do anything on cuda device side error, need tor estart
|
@@ -634,14 +631,12 @@ def generate_story_and_speech(input_text, chatbot_role):
|
|
634 |
history_tuples = [tuple(entry) for entry in last_history]
|
635 |
|
636 |
synthesized_speech = generate_speech_for_sentence(history_tuples, chatbot_role, story_text, return_as_byte=True)
|
637 |
-
|
638 |
-
|
639 |
-
|
640 |
-
|
641 |
-
|
642 |
-
|
643 |
-
speech_audio_base64 = base64.b64encode(speech_audio_bytes).decode('utf8')
|
644 |
-
return {"text": story_text.strip(), "audio": speech_audio_base64}
|
645 |
else:
|
646 |
return {"text": "Failed to generate story (no synthesized speech)", "audio": None}
|
647 |
|
|
|
575 |
reduced_noise = nr.reduce_noise(y=float_data, sr=24000,prop_decrease =0.8,n_fft=1024)
|
576 |
wav_bytestream = (reduced_noise * 32767).astype(np.int16)
|
577 |
wav_bytestream = wav_bytestream.tobytes()
|
578 |
+
|
579 |
+
# Directly encode the WAV bytestream to base64
|
580 |
+
base64_audio = base64.b64encode(wav_bytestream).decode('utf8')
|
581 |
+
|
582 |
if audio_stream is not None:
|
583 |
+
return (history, base64_audio)
|
584 |
+
else:
|
585 |
+
# Handle the case where the audio stream is None (e.g., silent response)
|
586 |
+
return (history, None)
|
587 |
+
|
588 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
589 |
except RuntimeError as e:
|
590 |
if "device-side assert" in str(e):
|
591 |
# cannot do anything on cuda device side error, need tor estart
|
|
|
631 |
history_tuples = [tuple(entry) for entry in last_history]
|
632 |
|
633 |
synthesized_speech = generate_speech_for_sentence(history_tuples, chatbot_role, story_text, return_as_byte=True)
|
634 |
+
|
635 |
+
if synthesized_speech:
|
636 |
+
# Retrieve the base64 audio string from the tuple
|
637 |
+
base64_audio = synthesized_speech[1]
|
638 |
+
return {"text": story_text.strip(), "audio": base64_audio}
|
639 |
+
|
|
|
|
|
640 |
else:
|
641 |
return {"text": "Failed to generate story (no synthesized speech)", "audio": None}
|
642 |
|