Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -456,26 +456,37 @@ pipe_asr = pipeline("automatic-speech-recognition", model=model, tokenizer=proce
|
|
456 |
|
457 |
base_audio_drive = "/data/audio"
|
458 |
|
459 |
-
def transcribe_function(stream, new_chunk):
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
|
466 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
467 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
468 |
if stream is not None:
|
469 |
stream = np.concatenate([stream, y])
|
470 |
else:
|
471 |
stream = y
|
472 |
-
|
473 |
result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
|
474 |
-
|
475 |
-
full_text
|
476 |
-
|
477 |
-
return stream, full_text, result
|
478 |
-
|
479 |
|
480 |
def update_map_with_response(history):
|
481 |
if not history:
|
@@ -681,17 +692,26 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
|
|
681 |
clear_button = gr.Button("Clear")
|
682 |
clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
|
683 |
|
684 |
-
# audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
|
685 |
-
# audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="voice_query_to_text")
|
|
|
|
|
|
|
686 |
|
|
|
|
|
|
|
|
|
687 |
audio_input = gr.Audio(sources=["microphone"], streaming=False, type='numpy')
|
688 |
-
|
|
|
|
|
689 |
|
690 |
send_chunk_button = gr.Button("Send chunk")
|
691 |
-
send_chunk_button.click(fn=lambda
|
692 |
|
693 |
-
|
694 |
|
|
|
695 |
# with gr.Column():
|
696 |
# weather_output = gr.HTML(value=fetch_local_weather())
|
697 |
# news_output = gr.HTML(value=fetch_local_news())
|
|
|
456 |
|
457 |
base_audio_drive = "/data/audio"
|
458 |
|
459 |
+
# def transcribe_function(stream, new_chunk):
|
460 |
+
# try:
|
461 |
+
# sr, y = new_chunk[0], new_chunk[1]
|
462 |
+
# except TypeError:
|
463 |
+
# print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
|
464 |
+
# return stream, "", None
|
465 |
|
466 |
+
# y = y.astype(np.float32) / np.max(np.abs(y))
|
467 |
+
|
468 |
+
# if stream is not None:
|
469 |
+
# stream = np.concatenate([stream, y])
|
470 |
+
# else:
|
471 |
+
# stream = y
|
472 |
+
|
473 |
+
# result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
|
474 |
|
475 |
+
# full_text = result.get("text","")
|
476 |
+
|
477 |
+
# return stream, full_text, result
|
478 |
+
|
479 |
+
|
480 |
+
def transcribe_function(stream, new_chunk):
|
481 |
+
sr, y = new_chunk[0], new_chunk[1]
|
482 |
+
y = y.astype(np.float32) / np.max(np.abs(y))
|
483 |
if stream is not None:
|
484 |
stream = np.concatenate([stream, y])
|
485 |
else:
|
486 |
stream = y
|
|
|
487 |
result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
|
488 |
+
full_text = result.get("text", "")
|
489 |
+
return stream, full_text # Return the transcribed text
|
|
|
|
|
|
|
490 |
|
491 |
def update_map_with_response(history):
|
492 |
if not history:
|
|
|
692 |
clear_button = gr.Button("Clear")
|
693 |
clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
|
694 |
|
695 |
+
#P1 audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
|
696 |
+
#P1 audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="voice_query_to_text")
|
697 |
+
|
698 |
+
#p2 audio_input = gr.Audio(sources=["microphone"], streaming=False, type='numpy')
|
699 |
+
#p2 audio_input.change(fn=transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="voice_query_to_text")
|
700 |
|
701 |
+
#p2 send_chunk_button = gr.Button("Send chunk")
|
702 |
+
#p2 send_chunk_button.click(fn=lambda state: state[1], inputs=[state], outputs=chat_input)
|
703 |
+
|
704 |
+
|
705 |
audio_input = gr.Audio(sources=["microphone"], streaming=False, type='numpy')
|
706 |
+
transcription_state = gr.State(value="") # Initialize a state to hold the transcribed text
|
707 |
+
|
708 |
+
audio_input.change(fn=transcribe_function, inputs=[transcription_state, audio_input], outputs=[transcription_state, transcription_state])
|
709 |
|
710 |
send_chunk_button = gr.Button("Send chunk")
|
711 |
+
send_chunk_button.click(fn=lambda transcription_state: transcription_state, inputs=[transcription_state], outputs=chat_input)
|
712 |
|
|
|
713 |
|
714 |
+
|
715 |
# with gr.Column():
|
716 |
# weather_output = gr.HTML(value=fetch_local_weather())
|
717 |
# news_output = gr.HTML(value=fetch_local_news())
|