Spaces:
Sleeping
Sleeping
try again
Browse files
app.py
CHANGED
@@ -1,73 +1,99 @@
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
-
from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
|
3 |
-
import numpy as np
|
4 |
-
import pydub
|
5 |
-
from transformers import pipeline
|
6 |
from asr import load_model, inference
|
|
|
7 |
|
8 |
|
9 |
-
# Define a custom audio processor to handle microphone input
|
10 |
-
class AudioProcessor(AudioProcessorBase):
|
11 |
-
def __init__(self):
|
12 |
-
self.audio_data = []
|
13 |
-
|
14 |
-
def recv_audio(self, frame):
|
15 |
-
# Convert the audio frame to a NumPy array
|
16 |
-
audio_array = np.frombuffer(frame.to_ndarray(), dtype=np.int16)
|
17 |
-
self.audio_data.append(audio_array)
|
18 |
-
return frame
|
19 |
-
|
20 |
-
def get_audio_data(self):
|
21 |
-
# Combine all captured audio data
|
22 |
-
if self.audio_data:
|
23 |
-
combined = np.concatenate(self.audio_data, axis=0)
|
24 |
-
return combined
|
25 |
-
return None
|
26 |
-
|
27 |
-
# Title of the app
|
28 |
-
st.title("Real-Time Speech-to-Text")
|
29 |
-
|
30 |
-
# Initialize the audio processor
|
31 |
-
audio_processor = AudioProcessor()
|
32 |
-
|
33 |
-
# WebRTC streamer to capture microphone input
|
34 |
-
webrtc_streamer(
|
35 |
-
key="audio",
|
36 |
-
mode=WebRtcMode.SENDONLY,
|
37 |
-
audio_processor_factory=lambda: audio_processor,
|
38 |
-
media_stream_constraints={"audio": True, "video": False},
|
39 |
-
rtc_configuration={
|
40 |
-
"iceServers": [
|
41 |
-
{"urls": "stun:stun.l.google.com:19302"} # Public STUN server
|
42 |
-
]
|
43 |
-
},
|
44 |
-
)
|
45 |
-
|
46 |
-
# Load a pre-trained ASR pipeline from Hugging Face
|
47 |
@st.cache_resource
|
48 |
def load_asr_model():
|
49 |
return load_model()
|
50 |
|
51 |
asr_model = load_asr_model()
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
)
|
64 |
|
65 |
-
#
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
st.
|
71 |
-
|
72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import datetime
|
4 |
import streamlit as st
|
|
|
|
|
|
|
|
|
5 |
from asr import load_model, inference
|
6 |
+
from audio_recorder_streamlit import audio_recorder
|
7 |
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
@st.cache_resource
|
10 |
def load_asr_model():
|
11 |
return load_model()
|
12 |
|
13 |
asr_model = load_asr_model()
|
14 |
|
15 |
+
|
16 |
+
def transcribe(audio_file):
|
17 |
+
transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
18 |
+
return transcript
|
19 |
+
|
20 |
+
|
21 |
+
def save_audio_file(audio_bytes, file_extension):
|
22 |
+
"""
|
23 |
+
Save audio bytes to a file with the specified extension.
|
24 |
+
|
25 |
+
:param audio_bytes: Audio data in bytes
|
26 |
+
:param file_extension: The extension of the output audio file
|
27 |
+
:return: The name of the saved audio file
|
28 |
+
"""
|
29 |
+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
30 |
+
file_name = f"audio_{timestamp}.{file_extension}"
|
31 |
+
|
32 |
+
with open(file_name, "wb") as f:
|
33 |
+
f.write(audio_bytes)
|
34 |
+
|
35 |
+
return file_name
|
36 |
+
|
37 |
+
|
38 |
+
def transcribe_audio(file_path):
|
39 |
+
"""
|
40 |
+
Transcribe the audio file at the specified path.
|
41 |
+
|
42 |
+
:param file_path: The path of the audio file to transcribe
|
43 |
+
:return: The transcribed text
|
44 |
+
"""
|
45 |
+
with open(file_path, "rb") as audio_file:
|
46 |
+
transcript = inference(asr_model, audio_file)
|
47 |
+
return transcript
|
48 |
+
|
49 |
+
|
50 |
+
def main():
|
51 |
+
"""
|
52 |
+
"""
|
53 |
+
st.title("Anishinaabemowin Transcription")
|
54 |
+
tab1, tab2 = st.tabs(["Record Audio", "Upload Audio"])
|
55 |
+
|
56 |
+
# Record Audio tab
|
57 |
+
with tab1:
|
58 |
+
audio_bytes = audio_recorder()
|
59 |
+
if audio_bytes:
|
60 |
+
st.audio(audio_bytes, format="audio/wav")
|
61 |
+
save_audio_file(audio_bytes, "wav")
|
62 |
+
|
63 |
+
# Upload Audio tab
|
64 |
+
with tab2:
|
65 |
+
audio_file = st.file_uploader("Upload Audio", type=["wav"])
|
66 |
+
if audio_file:
|
67 |
+
file_extension = audio_file.type.split('/')[1]
|
68 |
+
save_audio_file(audio_file.read(), file_extension)
|
69 |
+
|
70 |
+
# Transcribe button action
|
71 |
+
if st.button("Transcribe"):
|
72 |
+
# Find the newest audio file
|
73 |
+
audio_file_path = max(
|
74 |
+
[f for f in os.listdir(".") if f.startswith("audio")],
|
75 |
+
key=os.path.getctime,
|
76 |
)
|
77 |
|
78 |
+
# Transcribe the audio file
|
79 |
+
transcript_text = transcribe_audio(audio_file_path)
|
80 |
+
|
81 |
+
# Display the transcript
|
82 |
+
st.header("Transcript")
|
83 |
+
st.write(transcript_text)
|
84 |
+
|
85 |
+
# Save the transcript to a text file
|
86 |
+
with open("transcript.txt", "w") as f:
|
87 |
+
f.write(transcript_text)
|
88 |
+
|
89 |
+
# Provide a download button for the transcript
|
90 |
+
st.download_button("Download Transcript", transcript_text)
|
91 |
+
|
92 |
+
|
93 |
+
if __name__ == "__main__":
|
94 |
+
# Set up the working directory
|
95 |
+
working_dir = os.path.dirname(os.path.abspath(__file__))
|
96 |
+
sys.path.append(working_dir)
|
97 |
|
98 |
+
# Run the main function
|
99 |
+
main()
|