Spaces:
Running
Running
File size: 1,242 Bytes
bbafd1c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import streamlit as st
from transformers import pipeline
import soundfile as sf
import tempfile
# Load the ASR model from Hugging Face
@st.cache_resource
def load_model():
# Use Hugging Face's pipeline with your desired model
return pipeline("automatic-speech-recognition", model="fractalego/personal-speech-to-text-model")
# Initialize the model pipeline
pipe = load_model()
# Streamlit UI
st.title("Speech-to-Text Transcription App")
st.write("Upload an audio file, and the AI model will transcribe it.")
# Upload audio file
uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "m4a"])
if uploaded_file is not None:
st.audio(uploaded_file, format='audio/wav')
# Save uploaded file to a temporary location
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(uploaded_file.read())
temp_file_path = temp_file.name
# Read the audio file and transcribe
with st.spinner("Transcribing... Please wait..."):
transcription = pipe(temp_file_path)
# Display the transcription result
st.subheader("Transcription")
st.write(transcription['text'])
else:
st.info("Please upload an audio file to start transcription.")
|