import streamlit as st
from transformers import pipeline
import soundfile as sf
import tempfile

# Load the ASR model from Hugging Face
@st.cache_resource
def load_model():
    # Use Hugging Face's pipeline with your desired model
    return pipeline("automatic-speech-recognition", model="fractalego/personal-speech-to-text-model")

# Initialize the model pipeline
pipe = load_model()

# Streamlit UI
st.title("Speech-to-Text Transcription App")
st.write("Upload an audio file, and the AI model will transcribe it.")

# Upload audio file
uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "m4a"])

if uploaded_file is not None:
    st.audio(uploaded_file, format='audio/wav')
    
    # Save uploaded file to a temporary location
    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
        temp_file.write(uploaded_file.read())
        temp_file_path = temp_file.name
    
    # Read the audio file and transcribe
    with st.spinner("Transcribing... Please wait..."):
        transcription = pipe(temp_file_path)
    
    # Display the transcription result
    st.subheader("Transcription")
    st.write(transcription['text'])

else:
    st.info("Please upload an audio file to start transcription.")