Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -479,21 +479,7 @@ base_audio_drive = "/data/audio"
|
|
479 |
# return stream, full_text, result
|
480 |
|
481 |
|
482 |
-
# def transcribe_function(stream, new_chunk):
|
483 |
-
# sr, y = new_chunk[0], new_chunk[1]
|
484 |
-
# y = y.astype(np.float32) / np.max(np.abs(y))
|
485 |
-
# if stream is not None:
|
486 |
-
# stream = np.concatenate([stream, y])
|
487 |
-
# else:
|
488 |
-
# stream = y
|
489 |
-
# result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
|
490 |
-
# full_text = result.get("text", "")
|
491 |
-
# return stream, full_text # Return the transcribed text
|
492 |
-
|
493 |
def transcribe_function(stream, new_chunk):
|
494 |
-
# Reset conversational memory
|
495 |
-
conversational_memory.clear()
|
496 |
-
|
497 |
sr, y = new_chunk[0], new_chunk[1]
|
498 |
y = y.astype(np.float32) / np.max(np.abs(y))
|
499 |
if stream is not None:
|
@@ -502,7 +488,9 @@ def transcribe_function(stream, new_chunk):
|
|
502 |
stream = y
|
503 |
result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
|
504 |
full_text = result.get("text", "")
|
505 |
-
return stream, full_text
|
|
|
|
|
506 |
|
507 |
def update_map_with_response(history):
|
508 |
if not history:
|
@@ -681,6 +669,10 @@ def update_images():
|
|
681 |
image_3 = generate_image(hardcoded_prompt_3)
|
682 |
return image_1, image_2, image_3
|
683 |
|
|
|
|
|
|
|
|
|
684 |
with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
|
685 |
with gr.Row():
|
686 |
with gr.Column():
|
@@ -709,8 +701,8 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
|
|
709 |
clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
|
710 |
|
711 |
audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
|
712 |
-
audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="voice_query_to_text")
|
713 |
-
|
714 |
|
715 |
|
716 |
|
|
|
479 |
# return stream, full_text, result
|
480 |
|
481 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
482 |
def transcribe_function(stream, new_chunk):
|
|
|
|
|
|
|
483 |
sr, y = new_chunk[0], new_chunk[1]
|
484 |
y = y.astype(np.float32) / np.max(np.abs(y))
|
485 |
if stream is not None:
|
|
|
488 |
stream = y
|
489 |
result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
|
490 |
full_text = result.get("text", "")
|
491 |
+
return stream, full_text # Return the transcribed text
|
492 |
+
|
493 |
+
|
494 |
|
495 |
def update_map_with_response(history):
|
496 |
if not history:
|
|
|
669 |
image_3 = generate_image(hardcoded_prompt_3)
|
670 |
return image_1, image_2, image_3
|
671 |
|
672 |
+
def clear_state_and_textbox():
|
673 |
+
conversational_memory.clear()
|
674 |
+
return "", None
|
675 |
+
|
676 |
with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
|
677 |
with gr.Row():
|
678 |
with gr.Column():
|
|
|
701 |
clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
|
702 |
|
703 |
audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
|
704 |
+
# audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="voice_query_to_text")
|
705 |
+
audio_input.start_streaming_fn(clear_state_and_textbox).stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="voice_query_to_text")
|
706 |
|
707 |
|
708 |
|