import streamlit as st from pyannote.audio import Pipeline from transformers import pipeline import whisper # Title st.title("Multi-Speaker Audio Analyzer") # Upload Audio File uploaded_file = st.file_uploader("Upload an audio file (MP3/WAV)", type=["mp3", "wav"]) # Process Button if uploaded_file: st.audio(uploaded_file, format='audio/wav') # Load pre-trained models diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization") transcription_model = whisper.load_model("base") summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # Perform Speaker Diarization st.write("Processing Speaker Diarization...") diarized_output = diarization_pipeline(uploaded_file) # Perform Speech-to-Text Transcription st.write("Transcribing Audio...") transcription = transcription_model.transcribe(uploaded_file) # Generate Summary st.write("Generating Summary...") summary = summarizer(transcription["text"]) # Display Outputs st.write("Speaker-Diarized Transcript:") st.text(diarized_output) st.write("Full Transcription:") st.text(transcription["text"]) st.write("Summary:") st.text(summary[0]['summary_text'])