Spaces:

simonraj
/

Audio

Sleeping

App Files Files Community

simonraj commited on Feb 3, 2024

Commit

86e368d

verified ·

1 Parent(s): f0a39fa

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -25

app.py CHANGED Viewed

@@ -1,39 +1,77 @@
 import gradio as gr
-def click_js():
-    return """function audioRecord() {
-    var xPathRes = document.evaluate ('//*[contains(@class, "record")]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
-    xPathRes.singleNodeValue.click();}"""
-def action(btn):
-    """Changes button text on click"""
-    if btn == 'Speak': return 'Stop'
-    else: return 'Speak'
-def check_btn(btn):
-    """Checks for correct button text before invoking transcribe()"""
-    if btn != 'Speak': raise Exception('Recording...')
-def transcribe():
-    return 'Success'
-with gr.Blocks() as demo:
-    msg = gr.Textbox()
-    audio_box = gr.Audio(label="Audio", sources="microphone", type="filepath", elem_id='audio')
-    with gr.Row():
-        audio_btn = gr.Button('Speak')
-        clear = gr.Button("Clear")
-    audio_btn.click(fn=action, inputs=audio_btn, outputs=audio_btn).\
-              then(fn=lambda: None, js=click_js()).\
-              then(fn=check_btn, inputs=audio_btn).\
-              success(fn=transcribe, outputs=msg)
-    clear.click(lambda: None, None, msg, queue=False)
-demo.queue().launch(debug=True)

 import gradio as gr
+from transformers import pipeline
+import numpy as np
+# Initialize the automatic speech recognition pipeline using a pre-trained model
+transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
+# Global variables to store the accumulated audio data and its streaming rate
+audio_data = None
+streaming_rate = None
+def capture_audio(stream, new_chunk):
+    """
+    Function to capture streaming audio and accumulate it in a global variable.
+    Args:
+        stream (numpy.ndarray): The accumulated audio data up to this point.
+        new_chunk (tuple): A tuple containing the sampling rate and the new audio data chunk.
+    Returns:
+        numpy.ndarray: The updated stream with the new chunk appended.
+    """
+    global audio_data
+    global streaming_rate
+    # Extract sampling rate and audio chunk, normalize the audio
+    sr, y = new_chunk
+    streaming_rate = sr
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))
+    # Concatenate new audio chunk to the existing stream or start a new one
+    if stream is not None:
+        stream = np.concatenate([stream, y])
+    else:
+        stream = y
+    # Update the global variable with the new audio data
+    audio_data = stream
+    return stream
+def get_transcript():
+    """
+    Function to transcribe the accumulated audio data.
+    Returns:
+        str: The transcription of the accumulated audio data.
+    """
+    global audio_data
+    global streaming_rate
+    # Transcribe the audio data if available
+    if audio_data is not None and streaming_rate is not None:
+        transcript = transcriber({"sampling_rate": streaming_rate, "raw": audio_data})["text"]
+        return transcript
+    return ""
+# Building the Gradio interface using Blocks
+with gr.Blocks() as demo:
+    with gr.Row():
+        with gr.Column():
+            # State variable to manage the streaming data
+            state = gr.State()
+            # Audio component for real-time audio capture from the microphone
+            audio = gr.Audio(sources=["microphone"], streaming=True, type="numpy")
+            # Textbox for displaying the transcription
+            transcript_box = gr.Textbox(label="Transcript")
+            # Button to initiate transcription of the captured audio
+            rfrsh_btn = gr.Button("Refresh")
+            # Streaming setup to handle real-time audio capture
+            audio.stream(fn=capture_audio, inputs=[state, audio], outputs=[state])
+            # Button click setup to trigger transcription
+            rfrsh_btn.click(fn=get_transcript, outputs=[transcript_box])
+# Launch the Gradio interface
+demo.launch()