import streamlit as st import torchaudio import speechbrain as sb from speechbrain.dataio.dataio import read_audio from IPython.display import Audio from speechbrain.pretrained import SepformerSeparation as separator from scipy.io import wavfile import io # Load the pretrained model model = separator.from_hparams(source="speechbrain/sepformer-whamr-enhancement", savedir='pretrained_models/sepformer-whamr-enhancement') # Define the Streamlit app def app(): st.title("Speech Enhancement using Sepformer") # Add a file uploader to allow the user to select an audio file uploaded_file = st.file_uploader("Choose an audio file", type=["wav"]) # If an audio file is uploaded, perform speech enhancement and play the results if uploaded_file is not None: # Load the uploaded audio file audio_bytes = uploaded_file.read() with open("uploaded_audio.wav", "wb") as f: f.write(audio_bytes) signal = read_audio("uploaded_audio.wav").squeeze() # Perform speech enhancement using the Sepformer model enhanced_speech = model.separate_file(path='uploaded_audio.wav') enhanced_signal = enhanced_speech[:, :].detach().cpu().squeeze() enhanced_bytes = io.BytesIO() wavfile.write("enhanced_audio.wav", 8000, enhanced_signal.numpy()) with open("enhanced_audio.wav", "rb") as f: enhanced_byte = f.read() #st.audio(enhanced_byte, format='audio/wav') # Play the original and enhanced audio st.audio(audio_bytes, format='audio/wav') st.audio(enhanced_byte, format='audio/wav') # Run the Streamlit app if __name__ == '__main__': app()