Manyue-DataScientist's picture
Update app.py
2a6784d verified
raw
history blame
1.23 kB
import streamlit as st
from pyannote.audio import Pipeline
from transformers import pipeline
import whisper
# Title
st.title("Multi-Speaker Audio Analyzer")
# Upload Audio File
uploaded_file = st.file_uploader("Upload an audio file (MP3/WAV)", type=["mp3", "wav"])
# Process Button
if uploaded_file:
st.audio(uploaded_file, format='audio/wav')
# Load pre-trained models
diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
transcription_model = whisper.load_model("base")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# Perform Speaker Diarization
st.write("Processing Speaker Diarization...")
diarized_output = diarization_pipeline(uploaded_file)
# Perform Speech-to-Text Transcription
st.write("Transcribing Audio...")
transcription = transcription_model.transcribe(uploaded_file)
# Generate Summary
st.write("Generating Summary...")
summary = summarizer(transcription["text"])
# Display Outputs
st.write("Speaker-Diarized Transcript:")
st.text(diarized_output)
st.write("Full Transcription:")
st.text(transcription["text"])
st.write("Summary:")
st.text(summary[0]['summary_text'])