Pijush2023 commited on
Commit
1b35873
·
verified ·
1 Parent(s): 416ba83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -18
app.py CHANGED
@@ -456,26 +456,37 @@ pipe_asr = pipeline("automatic-speech-recognition", model=model, tokenizer=proce
456
 
457
  base_audio_drive = "/data/audio"
458
 
459
- def transcribe_function(stream, new_chunk):
460
- try:
461
- sr, y = new_chunk[0], new_chunk[1]
462
- except TypeError:
463
- print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
464
- return stream, "", None
465
 
466
- y = y.astype(np.float32) / np.max(np.abs(y))
 
 
 
 
 
 
 
467
 
 
 
 
 
 
 
 
 
468
  if stream is not None:
469
  stream = np.concatenate([stream, y])
470
  else:
471
  stream = y
472
-
473
  result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
474
-
475
- full_text = result.get("text","")
476
-
477
- return stream, full_text, result
478
-
479
 
480
  def update_map_with_response(history):
481
  if not history:
@@ -681,17 +692,26 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
681
  clear_button = gr.Button("Clear")
682
  clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
683
 
684
- # audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
685
- # audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="voice_query_to_text")
 
 
 
686
 
 
 
 
 
687
  audio_input = gr.Audio(sources=["microphone"], streaming=False, type='numpy')
688
- audio_input.change(fn=transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="voice_query_to_text")
 
 
689
 
690
  send_chunk_button = gr.Button("Send chunk")
691
- send_chunk_button.click(fn=lambda state: state[1], inputs=[state], outputs=chat_input)
692
 
693
-
694
 
 
695
  # with gr.Column():
696
  # weather_output = gr.HTML(value=fetch_local_weather())
697
  # news_output = gr.HTML(value=fetch_local_news())
 
456
 
457
  base_audio_drive = "/data/audio"
458
 
459
+ # def transcribe_function(stream, new_chunk):
460
+ # try:
461
+ # sr, y = new_chunk[0], new_chunk[1]
462
+ # except TypeError:
463
+ # print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
464
+ # return stream, "", None
465
 
466
+ # y = y.astype(np.float32) / np.max(np.abs(y))
467
+
468
+ # if stream is not None:
469
+ # stream = np.concatenate([stream, y])
470
+ # else:
471
+ # stream = y
472
+
473
+ # result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
474
 
475
+ # full_text = result.get("text","")
476
+
477
+ # return stream, full_text, result
478
+
479
+
480
+ def transcribe_function(stream, new_chunk):
481
+ sr, y = new_chunk[0], new_chunk[1]
482
+ y = y.astype(np.float32) / np.max(np.abs(y))
483
  if stream is not None:
484
  stream = np.concatenate([stream, y])
485
  else:
486
  stream = y
 
487
  result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
488
+ full_text = result.get("text", "")
489
+ return stream, full_text # Return the transcribed text
 
 
 
490
 
491
  def update_map_with_response(history):
492
  if not history:
 
692
  clear_button = gr.Button("Clear")
693
  clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
694
 
695
+ #P1 audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
696
+ #P1 audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="voice_query_to_text")
697
+
698
+ #p2 audio_input = gr.Audio(sources=["microphone"], streaming=False, type='numpy')
699
+ #p2 audio_input.change(fn=transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="voice_query_to_text")
700
 
701
+ #p2 send_chunk_button = gr.Button("Send chunk")
702
+ #p2 send_chunk_button.click(fn=lambda state: state[1], inputs=[state], outputs=chat_input)
703
+
704
+
705
  audio_input = gr.Audio(sources=["microphone"], streaming=False, type='numpy')
706
+ transcription_state = gr.State(value="") # Initialize a state to hold the transcribed text
707
+
708
+ audio_input.change(fn=transcribe_function, inputs=[transcription_state, audio_input], outputs=[transcription_state, transcription_state])
709
 
710
  send_chunk_button = gr.Button("Send chunk")
711
+ send_chunk_button.click(fn=lambda transcription_state: transcription_state, inputs=[transcription_state], outputs=chat_input)
712
 
 
713
 
714
+
715
  # with gr.Column():
716
  # weather_output = gr.HTML(value=fetch_local_weather())
717
  # news_output = gr.HTML(value=fetch_local_news())