Pijush2023 commited on
Commit
a2f185e
·
verified ·
1 Parent(s): f2545fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -17
app.py CHANGED
@@ -479,21 +479,7 @@ base_audio_drive = "/data/audio"
479
  # return stream, full_text, result
480
 
481
 
482
- # def transcribe_function(stream, new_chunk):
483
- # sr, y = new_chunk[0], new_chunk[1]
484
- # y = y.astype(np.float32) / np.max(np.abs(y))
485
- # if stream is not None:
486
- # stream = np.concatenate([stream, y])
487
- # else:
488
- # stream = y
489
- # result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
490
- # full_text = result.get("text", "")
491
- # return stream, full_text # Return the transcribed text
492
-
493
  def transcribe_function(stream, new_chunk):
494
- # Reset conversational memory
495
- conversational_memory.clear()
496
-
497
  sr, y = new_chunk[0], new_chunk[1]
498
  y = y.astype(np.float32) / np.max(np.abs(y))
499
  if stream is not None:
@@ -502,7 +488,9 @@ def transcribe_function(stream, new_chunk):
502
  stream = y
503
  result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
504
  full_text = result.get("text", "")
505
- return stream, full_text
 
 
506
 
507
  def update_map_with_response(history):
508
  if not history:
@@ -681,6 +669,10 @@ def update_images():
681
  image_3 = generate_image(hardcoded_prompt_3)
682
  return image_1, image_2, image_3
683
 
 
 
 
 
684
  with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
685
  with gr.Row():
686
  with gr.Column():
@@ -709,8 +701,8 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
709
  clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
710
 
711
  audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
712
- audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="voice_query_to_text")
713
-
714
 
715
 
716
 
 
479
  # return stream, full_text, result
480
 
481
 
 
 
 
 
 
 
 
 
 
 
 
482
  def transcribe_function(stream, new_chunk):
 
 
 
483
  sr, y = new_chunk[0], new_chunk[1]
484
  y = y.astype(np.float32) / np.max(np.abs(y))
485
  if stream is not None:
 
488
  stream = y
489
  result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
490
  full_text = result.get("text", "")
491
+ return stream, full_text # Return the transcribed text
492
+
493
+
494
 
495
  def update_map_with_response(history):
496
  if not history:
 
669
  image_3 = generate_image(hardcoded_prompt_3)
670
  return image_1, image_2, image_3
671
 
672
+ def clear_state_and_textbox():
673
+ conversational_memory.clear()
674
+ return "", None
675
+
676
  with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
677
  with gr.Row():
678
  with gr.Column():
 
701
  clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
702
 
703
  audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
704
+ # audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="voice_query_to_text")
705
+ audio_input.start_streaming_fn(clear_state_and_textbox).stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="voice_query_to_text")
706
 
707
 
708