Spaces:
Sleeping
Sleeping
try adding mic inp
Browse files
app.py
CHANGED
@@ -3,7 +3,9 @@ import datetime
|
|
3 |
import logging
|
4 |
import soundfile
|
5 |
import streamlit as st
|
6 |
-
|
|
|
|
|
7 |
from pathlib import Path
|
8 |
|
9 |
from asr import load_model, inference
|
@@ -12,6 +14,26 @@ LOG_DIR = "./logs"
|
|
12 |
DATA_DIR = "./data"
|
13 |
logger = logging.getLogger(__name__)
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
def upload_audio() -> Path:
|
16 |
# Upload audio file
|
17 |
uploaded_file = st.file_uploader("Choose a audio file(wav, mp3, flac)", type=['wav','mp3','flac'])
|
@@ -68,6 +90,44 @@ def main():
|
|
68 |
st.success(f"Inference finished in {process_time}.")
|
69 |
st.write(f"output: {output['text']}")
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
if __name__ == "__main__":
|
73 |
# Setting logger
|
|
|
3 |
import logging
|
4 |
import soundfile
|
5 |
import streamlit as st
|
6 |
+
from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
|
7 |
+
import numpy as np
|
8 |
+
import pydub
|
9 |
from pathlib import Path
|
10 |
|
11 |
from asr import load_model, inference
|
|
|
14 |
DATA_DIR = "./data"
|
15 |
logger = logging.getLogger(__name__)
|
16 |
|
17 |
+
|
18 |
+
# Define a custom audio processor to handle microphone input
|
19 |
+
class AudioProcessor(AudioProcessorBase):
|
20 |
+
def __init__(self):
|
21 |
+
self.audio_data = []
|
22 |
+
|
23 |
+
def recv_audio(self, frame):
|
24 |
+
# Convert the audio frame to a NumPy array
|
25 |
+
audio_array = np.frombuffer(frame.to_ndarray(), dtype=np.int16)
|
26 |
+
self.audio_data.append(audio_array)
|
27 |
+
return frame
|
28 |
+
|
29 |
+
def get_audio_data(self):
|
30 |
+
# Combine all captured audio data
|
31 |
+
if self.audio_data:
|
32 |
+
combined = np.concatenate(self.audio_data, axis=0)
|
33 |
+
return combined
|
34 |
+
return None
|
35 |
+
|
36 |
+
|
37 |
def upload_audio() -> Path:
|
38 |
# Upload audio file
|
39 |
uploaded_file = st.file_uploader("Choose a audio file(wav, mp3, flac)", type=['wav','mp3','flac'])
|
|
|
90 |
st.success(f"Inference finished in {process_time}.")
|
91 |
st.write(f"output: {output['text']}")
|
92 |
|
93 |
+
st.title("Microphone Input for ASR")
|
94 |
+
|
95 |
+
# Initialize the audio processor
|
96 |
+
audio_processor = AudioProcessor()
|
97 |
+
|
98 |
+
webrtc_streamer(
|
99 |
+
key="audio",
|
100 |
+
mode=WebRtcMode.SENDONLY,
|
101 |
+
audio_processor_factory=lambda: audio_processor,
|
102 |
+
media_stream_constraints={"audio": True, "video": False},
|
103 |
+
)
|
104 |
+
|
105 |
+
|
106 |
+
if st.button("Process Audio"):
|
107 |
+
audio_data = audio_processor.get_audio_data()
|
108 |
+
if audio_data is not None:
|
109 |
+
# Convert the NumPy array to a WAV-like audio segment
|
110 |
+
audio_segment = pydub.AudioSegment(
|
111 |
+
audio_data.tobytes(),
|
112 |
+
frame_rate=16000, # Default WebRTC audio frame rate
|
113 |
+
sample_width=2, # 16-bit audio
|
114 |
+
channels=1 # Mono
|
115 |
+
)
|
116 |
+
# Save or process audio_segment as needed
|
117 |
+
st.success("Audio captured successfully!")
|
118 |
+
# st.audio(audio_segment.export(format="wav"), format="audio/wav")
|
119 |
+
else:
|
120 |
+
st.warning("No audio data captured!")
|
121 |
+
|
122 |
+
|
123 |
+
if st.button("Transcribe Audio"):
|
124 |
+
if audio_data is not None:
|
125 |
+
# Perform ASR on the audio segment
|
126 |
+
transcription = inference(generator, audio_segment.raw_data)
|
127 |
+
st.text_area("Transcription", transcription["text"])
|
128 |
+
else:
|
129 |
+
st.warning("No audio data to transcribe!")
|
130 |
+
|
131 |
|
132 |
if __name__ == "__main__":
|
133 |
# Setting logger
|