Spaces:

Lguyogiro
/

Anishinaabemowin-ASR

Sleeping

App Files Files Community

Lguyogiro commited on Dec 11, 2024

Commit

c734958

1 Parent(s): 8d2723f

try again

Browse files

Files changed (1) hide show

app.py +86 -60

app.py CHANGED Viewed

@@ -1,73 +1,99 @@
 import streamlit as st
-from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
-import numpy as np
-import pydub
-from transformers import pipeline
 from asr import load_model, inference
-# Define a custom audio processor to handle microphone input
-class AudioProcessor(AudioProcessorBase):
-    def __init__(self):
-        self.audio_data = []
-    def recv_audio(self, frame):
-        # Convert the audio frame to a NumPy array
-        audio_array = np.frombuffer(frame.to_ndarray(), dtype=np.int16)
-        self.audio_data.append(audio_array)
-        return frame
-    def get_audio_data(self):
-        # Combine all captured audio data
-        if self.audio_data:
-            combined = np.concatenate(self.audio_data, axis=0)
-            return combined
-        return None
-# Title of the app
-st.title("Real-Time Speech-to-Text")
-# Initialize the audio processor
-audio_processor = AudioProcessor()
-# WebRTC streamer to capture microphone input
-webrtc_streamer(
-    key="audio",
-    mode=WebRtcMode.SENDONLY,
-    audio_processor_factory=lambda: audio_processor,
-    media_stream_constraints={"audio": True, "video": False},
-    rtc_configuration={
-        "iceServers": [
-            {"urls": "stun:stun.l.google.com:19302"}  # Public STUN server
-        ]
-    },
-)
-# Load a pre-trained ASR pipeline from Hugging Face
 @st.cache_resource
 def load_asr_model():
     return load_model()
 asr_model = load_asr_model()
-# Button to process audio and perform ASR
-if st.button("Transcribe Audio"):
-    audio_data = audio_processor.get_audio_data()
-    if audio_data is not None:
-        # Convert the NumPy array to a WAV-like audio segment
-        audio_segment = pydub.AudioSegment(
-            audio_data.tobytes(),
-            frame_rate=16000,  # Default WebRTC audio frame rate
-            sample_width=2,  # 16-bit audio
-            channels=1  # Mono
         )
-        # Perform ASR on the audio segment
-        st.info("Transcribing audio...")
-        transcription = inference(asr_model, audio_segment.raw_data)
-        # Display transcription
-        st.text_area("Transcription", transcription["text"], height=200)
-    else:
-        st.warning("No audio data captured! Please speak into your microphone.")

+import os
+import sys
+import datetime
 import streamlit as st
 from asr import load_model, inference
+from audio_recorder_streamlit import audio_recorder
 @st.cache_resource
 def load_asr_model():
     return load_model()
 asr_model = load_asr_model()
+def transcribe(audio_file):
+    transcript = openai.Audio.transcribe("whisper-1", audio_file)
+    return transcript
+def save_audio_file(audio_bytes, file_extension):
+    """
+    Save audio bytes to a file with the specified extension.
+    :param audio_bytes: Audio data in bytes
+    :param file_extension: The extension of the output audio file
+    :return: The name of the saved audio file
+    """
+    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    file_name = f"audio_{timestamp}.{file_extension}"
+    with open(file_name, "wb") as f:
+        f.write(audio_bytes)
+    return file_name
+def transcribe_audio(file_path):
+    """
+    Transcribe the audio file at the specified path.
+    :param file_path: The path of the audio file to transcribe
+    :return: The transcribed text
+    """
+    with open(file_path, "rb") as audio_file:
+        transcript = inference(asr_model, audio_file)
+    return transcript
+def main():
+    """
+    """
+    st.title("Anishinaabemowin Transcription")
+    tab1, tab2 = st.tabs(["Record Audio", "Upload Audio"])
+    # Record Audio tab
+    with tab1:
+        audio_bytes = audio_recorder()
+        if audio_bytes:
+            st.audio(audio_bytes, format="audio/wav")
+            save_audio_file(audio_bytes, "wav")
+    # Upload Audio tab
+    with tab2:
+        audio_file = st.file_uploader("Upload Audio", type=["wav"])
+        if audio_file:
+            file_extension = audio_file.type.split('/')[1]
+            save_audio_file(audio_file.read(), file_extension)
+    # Transcribe button action
+    if st.button("Transcribe"):
+        # Find the newest audio file
+        audio_file_path = max(
+            [f for f in os.listdir(".") if f.startswith("audio")],
+            key=os.path.getctime,
         )
+        # Transcribe the audio file
+        transcript_text = transcribe_audio(audio_file_path)
+        # Display the transcript
+        st.header("Transcript")
+        st.write(transcript_text)
+        # Save the transcript to a text file
+        with open("transcript.txt", "w") as f:
+            f.write(transcript_text)
+        # Provide a download button for the transcript
+        st.download_button("Download Transcript", transcript_text)
+if __name__ == "__main__":
+    # Set up the working directory
+    working_dir = os.path.dirname(os.path.abspath(__file__))
+    sys.path.append(working_dir)
+    # Run the main function
+    main()