Spaces:
Running
Running
File size: 3,866 Bytes
0acc83a 41fb2f1 dd632f3 41fb2f1 dd632f3 0acc83a 41fb2f1 dd632f3 41fb2f1 dd632f3 41fb2f1 dd632f3 41fb2f1 dd632f3 41fb2f1 dd632f3 41fb2f1 dd632f3 41fb2f1 dd632f3 41fb2f1 dd632f3 41fb2f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import streamlit as st
import noisereduce as nr
from pedalboard.io import AudioFile
from pedalboard import *
import numpy as np
from io import BytesIO
import librosa
def process_audio(input_file):
sr = 44100
with AudioFile(input_file).resampled_to(sr) as f:
audio = f.read(f.frames)
# Reduce stationary noise
reduced_noise = nr.reduce_noise(
y=audio, sr=sr, stationary=True, prop_decrease=0.75)
# Apply audio effects
board = Pedalboard([
NoiseGate(threshold_db=-30, ratio=1.5, release_ms=250),
Compressor(threshold_db=-16, ratio=2.5),
LowShelfFilter(cutoff_frequency_hz=400, gain_db=10, q=1),
Gain(gain_db=10)
])
processed_audio = board(reduced_noise, sr)
return processed_audio, sr
def detect_plosives(audio, sr):
# Define frequency bands for plosive detection
frequency_bands = [(0, 200), (200, 500)]
# Calculate short-time Fourier transform (STFT)
stft = librosa.stft(audio)
# Convert amplitude to energy (power) spectrogram
power = np.abs(stft) ** 2
# Calculate energy in each frequency band
band_energies = []
for band in frequency_bands:
freq_range = librosa.core.fft_frequencies(sr=sr)
indices = np.where((freq_range >= band[0]) & (freq_range < band[1]))[0]
if len(indices) > 0: # Check if indices exist before proceeding
# Sum along frequency axis
band_energy = np.sum(power[indices, :], axis=0)
band_energies.append(band_energy)
if not band_energies: # If no bands have energy, return empty plosives
return []
# Calculate overall energy as the sum of energy in all bands
overall_energy = np.sum(band_energies, axis=0)
# Apply thresholding to detect plosives
threshold = np.max(overall_energy) * 0.5
plosive_frames = np.where(overall_energy > threshold)[0]
# Convert frame indices to time in seconds
plosives = librosa.frames_to_time(plosive_frames, sr=sr)
return plosives
def main():
st.title("Audio Enhancement")
# Upload audio file
uploaded_file = st.file_uploader("Upload audio file", type=["wav"])
if uploaded_file is not None:
# Process the audio
st.write("Processing audio...")
processed_audio, sr = process_audio(uploaded_file)
# Display processing progress
st.write("Audio processed successfully!")
# Allow user to play original and processed audio
# st.subheader("Play Audio")
# st.audio(uploaded_file, format='audio/wav', start_time=0)
# st.audio(processed_audio, format='audio/wav',
# start_time=0, sample_rate=None)
st.subheader("Play Origional Audio")
st.audio(uploaded_file, format='audio/wav', start_time=0)
st.subheader("Play Enhanced Audio")
st.audio(processed_audio, format='audio/wav',
start_time=0, sample_rate=sr)
# Allow user to download processed audio
st.subheader("Download Enhanced Audio")
processed_audio_bytes = BytesIO()
with AudioFile(processed_audio_bytes, 'w', sr, len(processed_audio), format='wav') as f:
f.write(processed_audio)
st.download_button("Download", processed_audio_bytes.getvalue(),
file_name="processed_audio.wav", mime='audio/wav')
# Button to detect plosives
if st.button("Detect Plosives"):
st.write("Detecting plosives...")
plosives = detect_plosives(processed_audio, sr)
if plosives:
st.subheader("Detected Plosives")
st.write("Plosives detected at time(s):", ", ".join(
[str(round(p, 2)) for p in plosives]))
else:
st.write("No plosives detected.")
if __name__ == "__main__":
main()
|