Spaces:
Running
Running
import streamlit as st | |
import noisereduce as nr | |
from pedalboard.io import AudioFile | |
from pedalboard import * | |
import numpy as np | |
from io import BytesIO | |
import librosa | |
# Function to process audio | |
def process_audio(input_file): | |
# Set sampling rate | |
sr = 44100 | |
# Read audio file | |
with AudioFile(input_file).resampled_to(sr) as f: | |
audio = f.read(f.frames) | |
# Reduce stationary noise | |
reduced_noise = nr.reduce_noise( | |
y=audio, sr=sr, stationary=True, prop_decrease=0.75) | |
# Apply audio effects using pedalboard | |
board = Pedalboard([ | |
NoiseGate(threshold_db=-30, ratio=1.5, release_ms=250), | |
Compressor(threshold_db=-16, ratio=2.5), | |
LowShelfFilter(cutoff_frequency_hz=400, gain_db=10, q=1), | |
Gain(gain_db=10) | |
]) | |
processed_audio = board(reduced_noise, sr) | |
return processed_audio, sr | |
# Function to detect plosives in audio | |
def detect_plosives(audio, sr): | |
# Define frequency bands for plosive detection | |
frequency_bands = [(0, 200), (200, 500)] | |
# Calculate short-time Fourier transform (STFT) | |
stft = librosa.stft(audio) | |
# Convert amplitude to energy (power) spectrogram | |
power = np.abs(stft) ** 2 | |
# Calculate energy in each frequency band | |
band_energies = [] | |
for band in frequency_bands: | |
freq_range = librosa.core.fft_frequencies(sr=sr) | |
indices = np.where((freq_range >= band[0]) & (freq_range < band[1]))[0] | |
if len(indices) > 0: # Check if indices exist before proceeding | |
# Sum along frequency axis | |
band_energy = np.sum(power[indices, :], axis=0) | |
band_energies.append(band_energy) | |
if not band_energies: # If no bands have energy, return empty plosives | |
return [] | |
# Calculate overall energy as the sum of energy in all bands | |
overall_energy = np.sum(band_energies, axis=0) | |
# Apply thresholding to detect plosives | |
threshold = np.max(overall_energy) * 0.5 | |
plosive_frames = np.where(overall_energy > threshold)[0] | |
# Convert frame indices to time in seconds | |
plosives = librosa.frames_to_time(plosive_frames, sr=sr) | |
return plosives | |
# Main function | |
def main(): | |
st.title("Audio Enhancement") | |
# Upload audio file | |
uploaded_file = st.file_uploader("Upload audio file", type=["wav"]) | |
if uploaded_file is not None: | |
# Process the audio | |
st.write("Processing audio...") | |
processed_audio, sr = process_audio(uploaded_file) | |
# Display processing progress | |
st.write("Audio processed successfully!") | |
# Allow user to play original and processed audio | |
st.subheader("Play Original Audio") | |
st.audio(uploaded_file, format='audio/wav', start_time=0) | |
st.subheader("Play Enhanced Audio") | |
st.audio(processed_audio, format='audio/wav', | |
start_time=0, sample_rate=sr) | |
# Allow user to download processed audio | |
st.subheader("Download Enhanced Audio") | |
processed_audio_bytes = BytesIO() | |
with AudioFile(processed_audio_bytes, 'w', sr, len(processed_audio), format='wav') as f: | |
f.write(processed_audio) | |
st.download_button("Download", processed_audio_bytes.getvalue(), | |
file_name="processed_audio.wav", mime='audio/wav') | |
# Button to detect plosives | |
if st.button("Detect Plosives"): | |
st.write("Detecting plosives...") | |
plosives = detect_plosives(processed_audio, sr) | |
if plosives: | |
st.subheader("Detected Plosives") | |
st.write("Plosives detected at time(s):", ", ".join( | |
[str(round(p, 2)) for p in plosives])) | |
else: | |
st.write("No plosives detected.") | |
# Entry point of the program | |
if __name__ == "__main__": | |
main() | |