Noiz / app.py
pnavin's picture
Update app.py
fcfd302
raw
history blame
1.69 kB
import streamlit as st
import torchaudio
import speechbrain as sb
from speechbrain.dataio.dataio import read_audio
from IPython.display import Audio
from speechbrain.pretrained import SepformerSeparation as separator
from scipy.io import wavfile
import io
# Load the pretrained model
model = separator.from_hparams(source="speechbrain/sepformer-whamr-enhancement", savedir='pretrained_models/sepformer-whamr-enhancement')
# Define the Streamlit app
def app():
st.title("Speech Enhancement using Sepformer")
# Add a file uploader to allow the user to select an audio file
uploaded_file = st.file_uploader("Choose an audio file", type=["wav"])
# If an audio file is uploaded, perform speech enhancement and play the results
if uploaded_file is not None:
# Load the uploaded audio file
audio_bytes = uploaded_file.read()
with open("uploaded_audio.wav", "wb") as f:
f.write(audio_bytes)
signal = read_audio("uploaded_audio.wav").squeeze()
# Perform speech enhancement using the Sepformer model
enhanced_speech = model.separate_file(path='uploaded_audio.wav')
enhanced_signal = enhanced_speech[:, :].detach().cpu().squeeze()
enhanced_bytes = io.BytesIO()
wavfile.write("enhanced_audio.wav", 8000, enhanced_signal.numpy())
with open("enhanced_audio.wav", "rb") as f:
enhanced_byte = f.read()
#st.audio(enhanced_byte, format='audio/wav')
# Play the original and enhanced audio
st.audio(audio_bytes, format='audio/wav')
st.audio(enhanced_byte, format='audio/wav')
# Run the Streamlit app
if __name__ == '__main__':
app()