IT2091024v2

Paused

App Files Files Community

Pijush2023 commited on Jul 22, 2024

Commit

a2f185e

verified ·

1 Parent(s): f2545fc

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -17

app.py CHANGED Viewed

@@ -479,21 +479,7 @@ base_audio_drive = "/data/audio"
 #     return stream, full_text, result
-# def transcribe_function(stream, new_chunk):
-#     sr, y = new_chunk[0], new_chunk[1]
-#     y = y.astype(np.float32) / np.max(np.abs(y))
-#     if stream is not None:
-#         stream = np.concatenate([stream, y])
-#     else:
-#         stream = y
-#     result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
-#     full_text = result.get("text", "")
-#     return stream, full_text  # Return the transcribed text
 def transcribe_function(stream, new_chunk):
-    # Reset conversational memory
-    conversational_memory.clear()
     sr, y = new_chunk[0], new_chunk[1]
     y = y.astype(np.float32) / np.max(np.abs(y))
     if stream is not None:
@@ -502,7 +488,9 @@ def transcribe_function(stream, new_chunk):
         stream = y
     result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
     full_text = result.get("text", "")
-    return stream, full_text
 def update_map_with_response(history):
     if not history:
@@ -681,6 +669,10 @@ def update_images():
     image_3 = generate_image(hardcoded_prompt_3)
     return image_1, image_2, image_3
 with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
     with gr.Row():
         with gr.Column():
@@ -709,8 +701,8 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
             clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
             audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
-            audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="voice_query_to_text")

 #     return stream, full_text, result
 def transcribe_function(stream, new_chunk):
     sr, y = new_chunk[0], new_chunk[1]
     y = y.astype(np.float32) / np.max(np.abs(y))
     if stream is not None:
         stream = y
     result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
     full_text = result.get("text", "")
+    return stream, full_text  # Return the transcribed text
 def update_map_with_response(history):
     if not history:
     image_3 = generate_image(hardcoded_prompt_3)
     return image_1, image_2, image_3
+def clear_state_and_textbox():
+    conversational_memory.clear()
+    return "", None
 with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
     with gr.Row():
         with gr.Column():
             clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
             audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
+            # audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="voice_query_to_text")
+            audio_input.start_streaming_fn(clear_state_and_textbox).stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="voice_query_to_text")