Spaces:

simonraj
/

Audio

Sleeping

App Files Files Community

Audio / app.py

simonraj

Update app.py

86e368d verified over 1 year ago

raw

history blame contribute delete

2.64 kB

	import gradio as gr
	from transformers import pipeline
	import numpy as np

	# Initialize the automatic speech recognition pipeline using a pre-trained model
	transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")

	# Global variables to store the accumulated audio data and its streaming rate
	audio_data = None
	streaming_rate = None

	def capture_audio(stream, new_chunk):
	"""
	Function to capture streaming audio and accumulate it in a global variable.

	Args:
	stream (numpy.ndarray): The accumulated audio data up to this point.
	new_chunk (tuple): A tuple containing the sampling rate and the new audio data chunk.

	Returns:
	numpy.ndarray: The updated stream with the new chunk appended.
	"""
	global audio_data
	global streaming_rate

	# Extract sampling rate and audio chunk, normalize the audio
	sr, y = new_chunk
	streaming_rate = sr
	y = y.astype(np.float32)
	y /= np.max(np.abs(y))

	# Concatenate new audio chunk to the existing stream or start a new one
	if stream is not None:
	stream = np.concatenate([stream, y])
	else:
	stream = y

	# Update the global variable with the new audio data
	audio_data = stream
	return stream

	def get_transcript():
	"""
	Function to transcribe the accumulated audio data.

	Returns:
	str: The transcription of the accumulated audio data.
	"""
	global audio_data
	global streaming_rate

	# Transcribe the audio data if available
	if audio_data is not None and streaming_rate is not None:
	transcript = transcriber({"sampling_rate": streaming_rate, "raw": audio_data})["text"]
	return transcript
	return ""

	# Building the Gradio interface using Blocks
	with gr.Blocks() as demo:
	with gr.Row():
	with gr.Column():
	# State variable to manage the streaming data
	state = gr.State()
	# Audio component for real-time audio capture from the microphone
	audio = gr.Audio(sources=["microphone"], streaming=True, type="numpy")
	# Textbox for displaying the transcription
	transcript_box = gr.Textbox(label="Transcript")
	# Button to initiate transcription of the captured audio
	rfrsh_btn = gr.Button("Refresh")

	# Streaming setup to handle real-time audio capture
	audio.stream(fn=capture_audio, inputs=[state, audio], outputs=[state])
	# Button click setup to trigger transcription
	rfrsh_btn.click(fn=get_transcript, outputs=[transcript_box])

	# Launch the Gradio interface
	demo.launch()