Commit
·
c2cf399
1
Parent(s):
fe3096b
Update app.py
Browse files
app.py
CHANGED
|
@@ -575,20 +575,17 @@ def generate_speech_for_sentence(history, chatbot_role, sentence, return_as_byte
|
|
| 575 |
reduced_noise = nr.reduce_noise(y=float_data, sr=24000,prop_decrease =0.8,n_fft=1024)
|
| 576 |
wav_bytestream = (reduced_noise * 32767).astype(np.int16)
|
| 577 |
wav_bytestream = wav_bytestream.tobytes()
|
| 578 |
-
|
|
|
|
|
|
|
|
|
|
| 579 |
if audio_stream is not None:
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
f.setframerate(24000)
|
| 587 |
-
f.writeframes(wav_bytestream)
|
| 588 |
-
|
| 589 |
-
return (history , gr.Audio(value=audio_unique_filename, autoplay=True))
|
| 590 |
-
else:
|
| 591 |
-
return (history , gr.Audio(value=wav_bytestream, autoplay=True))
|
| 592 |
except RuntimeError as e:
|
| 593 |
if "device-side assert" in str(e):
|
| 594 |
# cannot do anything on cuda device side error, need tor estart
|
|
@@ -634,14 +631,12 @@ def generate_story_and_speech(input_text, chatbot_role):
|
|
| 634 |
history_tuples = [tuple(entry) for entry in last_history]
|
| 635 |
|
| 636 |
synthesized_speech = generate_speech_for_sentence(history_tuples, chatbot_role, story_text, return_as_byte=True)
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
speech_audio_base64 = base64.b64encode(speech_audio_bytes).decode('utf8')
|
| 644 |
-
return {"text": story_text.strip(), "audio": speech_audio_base64}
|
| 645 |
else:
|
| 646 |
return {"text": "Failed to generate story (no synthesized speech)", "audio": None}
|
| 647 |
|
|
|
|
| 575 |
reduced_noise = nr.reduce_noise(y=float_data, sr=24000,prop_decrease =0.8,n_fft=1024)
|
| 576 |
wav_bytestream = (reduced_noise * 32767).astype(np.int16)
|
| 577 |
wav_bytestream = wav_bytestream.tobytes()
|
| 578 |
+
|
| 579 |
+
# Directly encode the WAV bytestream to base64
|
| 580 |
+
base64_audio = base64.b64encode(wav_bytestream).decode('utf8')
|
| 581 |
+
|
| 582 |
if audio_stream is not None:
|
| 583 |
+
return (history, base64_audio)
|
| 584 |
+
else:
|
| 585 |
+
# Handle the case where the audio stream is None (e.g., silent response)
|
| 586 |
+
return (history, None)
|
| 587 |
+
|
| 588 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 589 |
except RuntimeError as e:
|
| 590 |
if "device-side assert" in str(e):
|
| 591 |
# cannot do anything on cuda device side error, need tor estart
|
|
|
|
| 631 |
history_tuples = [tuple(entry) for entry in last_history]
|
| 632 |
|
| 633 |
synthesized_speech = generate_speech_for_sentence(history_tuples, chatbot_role, story_text, return_as_byte=True)
|
| 634 |
+
|
| 635 |
+
if synthesized_speech:
|
| 636 |
+
# Retrieve the base64 audio string from the tuple
|
| 637 |
+
base64_audio = synthesized_speech[1]
|
| 638 |
+
return {"text": story_text.strip(), "audio": base64_audio}
|
| 639 |
+
|
|
|
|
|
|
|
| 640 |
else:
|
| 641 |
return {"text": "Failed to generate story (no synthesized speech)", "audio": None}
|
| 642 |
|