Spaces:
Sleeping
Sleeping
import streamlit as st | |
from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode | |
import numpy as np | |
import pydub | |
from transformers import pipeline | |
from asr import load_model, inference | |
# Define a custom audio processor to handle microphone input | |
class AudioProcessor(AudioProcessorBase): | |
def __init__(self): | |
self.audio_data = [] | |
def recv_audio(self, frame): | |
# Convert the audio frame to a NumPy array | |
audio_array = np.frombuffer(frame.to_ndarray(), dtype=np.int16) | |
self.audio_data.append(audio_array) | |
return frame | |
def get_audio_data(self): | |
# Combine all captured audio data | |
if self.audio_data: | |
combined = np.concatenate(self.audio_data, axis=0) | |
return combined | |
return None | |
# Title of the app | |
st.title("Real-Time Speech-to-Text") | |
# Initialize the audio processor | |
audio_processor = AudioProcessor() | |
# WebRTC streamer to capture microphone input | |
webrtc_streamer( | |
key="audio", | |
mode=WebRtcMode.SENDONLY, | |
audio_processor_factory=lambda: audio_processor, | |
media_stream_constraints={"audio": True, "video": False}, | |
) | |
# Load a pre-trained ASR pipeline from Hugging Face | |
def load_asr_model(): | |
return load_model() | |
asr_model = load_asr_model() | |
# Button to process audio and perform ASR | |
if st.button("Transcribe Audio"): | |
audio_data = audio_processor.get_audio_data() | |
if audio_data is not None: | |
# Convert the NumPy array to a WAV-like audio segment | |
audio_segment = pydub.AudioSegment( | |
audio_data.tobytes(), | |
frame_rate=16000, # Default WebRTC audio frame rate | |
sample_width=2, # 16-bit audio | |
channels=1 # Mono | |
) | |
# Perform ASR on the audio segment | |
st.info("Transcribing audio...") | |
transcription = inference(asr_model, audio_segment.raw_data) | |
# Display transcription | |
st.text_area("Transcription", transcription["text"], height=200) | |
else: | |
st.warning("No audio data captured! Please speak into your microphone.") | |