simonraj commited on
Commit
86e368d
·
verified ·
1 Parent(s): f0a39fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -25
app.py CHANGED
@@ -1,39 +1,77 @@
1
  import gradio as gr
 
 
2
 
 
 
3
 
4
- def click_js():
5
- return """function audioRecord() {
6
- var xPathRes = document.evaluate ('//*[contains(@class, "record")]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
7
- xPathRes.singleNodeValue.click();}"""
8
 
 
 
 
9
 
10
- def action(btn):
11
- """Changes button text on click"""
12
- if btn == 'Speak': return 'Stop'
13
- else: return 'Speak'
14
 
 
 
 
 
 
15
 
16
- def check_btn(btn):
17
- """Checks for correct button text before invoking transcribe()"""
18
- if btn != 'Speak': raise Exception('Recording...')
 
 
19
 
 
 
 
 
 
20
 
21
- def transcribe():
22
- return 'Success'
 
23
 
24
- with gr.Blocks() as demo:
25
- msg = gr.Textbox()
26
- audio_box = gr.Audio(label="Audio", sources="microphone", type="filepath", elem_id='audio')
27
 
28
- with gr.Row():
29
- audio_btn = gr.Button('Speak')
30
- clear = gr.Button("Clear")
 
 
 
 
 
 
 
 
31
 
32
- audio_btn.click(fn=action, inputs=audio_btn, outputs=audio_btn).\
33
- then(fn=lambda: None, js=click_js()).\
34
- then(fn=check_btn, inputs=audio_btn).\
35
- success(fn=transcribe, outputs=msg)
 
 
 
 
 
 
 
 
36
 
37
- clear.click(lambda: None, None, msg, queue=False)
 
 
 
38
 
39
- demo.queue().launch(debug=True)
 
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
+ import numpy as np
4
 
5
+ # Initialize the automatic speech recognition pipeline using a pre-trained model
6
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
7
 
8
+ # Global variables to store the accumulated audio data and its streaming rate
9
+ audio_data = None
10
+ streaming_rate = None
 
11
 
12
+ def capture_audio(stream, new_chunk):
13
+ """
14
+ Function to capture streaming audio and accumulate it in a global variable.
15
 
16
+ Args:
17
+ stream (numpy.ndarray): The accumulated audio data up to this point.
18
+ new_chunk (tuple): A tuple containing the sampling rate and the new audio data chunk.
 
19
 
20
+ Returns:
21
+ numpy.ndarray: The updated stream with the new chunk appended.
22
+ """
23
+ global audio_data
24
+ global streaming_rate
25
 
26
+ # Extract sampling rate and audio chunk, normalize the audio
27
+ sr, y = new_chunk
28
+ streaming_rate = sr
29
+ y = y.astype(np.float32)
30
+ y /= np.max(np.abs(y))
31
 
32
+ # Concatenate new audio chunk to the existing stream or start a new one
33
+ if stream is not None:
34
+ stream = np.concatenate([stream, y])
35
+ else:
36
+ stream = y
37
 
38
+ # Update the global variable with the new audio data
39
+ audio_data = stream
40
+ return stream
41
 
42
+ def get_transcript():
43
+ """
44
+ Function to transcribe the accumulated audio data.
45
 
46
+ Returns:
47
+ str: The transcription of the accumulated audio data.
48
+ """
49
+ global audio_data
50
+ global streaming_rate
51
+
52
+ # Transcribe the audio data if available
53
+ if audio_data is not None and streaming_rate is not None:
54
+ transcript = transcriber({"sampling_rate": streaming_rate, "raw": audio_data})["text"]
55
+ return transcript
56
+ return ""
57
 
58
+ # Building the Gradio interface using Blocks
59
+ with gr.Blocks() as demo:
60
+ with gr.Row():
61
+ with gr.Column():
62
+ # State variable to manage the streaming data
63
+ state = gr.State()
64
+ # Audio component for real-time audio capture from the microphone
65
+ audio = gr.Audio(sources=["microphone"], streaming=True, type="numpy")
66
+ # Textbox for displaying the transcription
67
+ transcript_box = gr.Textbox(label="Transcript")
68
+ # Button to initiate transcription of the captured audio
69
+ rfrsh_btn = gr.Button("Refresh")
70
 
71
+ # Streaming setup to handle real-time audio capture
72
+ audio.stream(fn=capture_audio, inputs=[state, audio], outputs=[state])
73
+ # Button click setup to trigger transcription
74
+ rfrsh_btn.click(fn=get_transcript, outputs=[transcript_box])
75
 
76
+ # Launch the Gradio interface
77
+ demo.launch()