File size: 3,866 Bytes
0acc83a
41fb2f1
dd632f3
 
41fb2f1
 
dd632f3
0acc83a
41fb2f1
 
dd632f3
 
 
41fb2f1
dd632f3
 
 
41fb2f1
dd632f3
 
 
 
 
 
 
41fb2f1
dd632f3
41fb2f1
 
 
 
dd632f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41fb2f1
dd632f3
41fb2f1
 
 
 
 
 
 
 
 
 
dd632f3
 
 
 
 
 
 
 
 
 
 
 
41fb2f1
 
dd632f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41fb2f1
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import streamlit as st
import noisereduce as nr
from pedalboard.io import AudioFile
from pedalboard import *
import numpy as np
from io import BytesIO
import librosa


def process_audio(input_file):
    sr = 44100
    with AudioFile(input_file).resampled_to(sr) as f:
        audio = f.read(f.frames)

    # Reduce stationary noise
    reduced_noise = nr.reduce_noise(
        y=audio, sr=sr, stationary=True, prop_decrease=0.75)

    # Apply audio effects
    board = Pedalboard([
        NoiseGate(threshold_db=-30, ratio=1.5, release_ms=250),
        Compressor(threshold_db=-16, ratio=2.5),
        LowShelfFilter(cutoff_frequency_hz=400, gain_db=10, q=1),
        Gain(gain_db=10)
    ])

    processed_audio = board(reduced_noise, sr)

    return processed_audio, sr


def detect_plosives(audio, sr):
    # Define frequency bands for plosive detection
    frequency_bands = [(0, 200), (200, 500)]

    # Calculate short-time Fourier transform (STFT)
    stft = librosa.stft(audio)

    # Convert amplitude to energy (power) spectrogram
    power = np.abs(stft) ** 2

    # Calculate energy in each frequency band
    band_energies = []
    for band in frequency_bands:
        freq_range = librosa.core.fft_frequencies(sr=sr)
        indices = np.where((freq_range >= band[0]) & (freq_range < band[1]))[0]
        if len(indices) > 0:  # Check if indices exist before proceeding
            # Sum along frequency axis
            band_energy = np.sum(power[indices, :], axis=0)
            band_energies.append(band_energy)

    if not band_energies:  # If no bands have energy, return empty plosives
        return []

    # Calculate overall energy as the sum of energy in all bands
    overall_energy = np.sum(band_energies, axis=0)

    # Apply thresholding to detect plosives
    threshold = np.max(overall_energy) * 0.5
    plosive_frames = np.where(overall_energy > threshold)[0]

    # Convert frame indices to time in seconds
    plosives = librosa.frames_to_time(plosive_frames, sr=sr)

    return plosives


def main():
    st.title("Audio Enhancement")

    # Upload audio file
    uploaded_file = st.file_uploader("Upload audio file", type=["wav"])

    if uploaded_file is not None:
        # Process the audio
        st.write("Processing audio...")
        processed_audio, sr = process_audio(uploaded_file)

        # Display processing progress
        st.write("Audio processed successfully!")

        # Allow user to play original and processed audio
        # st.subheader("Play Audio")
        # st.audio(uploaded_file, format='audio/wav', start_time=0)
        # st.audio(processed_audio, format='audio/wav',
        #          start_time=0, sample_rate=None)
        st.subheader("Play Origional Audio")
        st.audio(uploaded_file, format='audio/wav', start_time=0)
        st.subheader("Play Enhanced Audio")
        st.audio(processed_audio, format='audio/wav',
                 start_time=0, sample_rate=sr)

        # Allow user to download processed audio
        st.subheader("Download Enhanced Audio")
        processed_audio_bytes = BytesIO()
        with AudioFile(processed_audio_bytes, 'w', sr, len(processed_audio), format='wav') as f:
            f.write(processed_audio)
        st.download_button("Download", processed_audio_bytes.getvalue(),
                           file_name="processed_audio.wav", mime='audio/wav')

        # Button to detect plosives
        if st.button("Detect Plosives"):
            st.write("Detecting plosives...")
            plosives = detect_plosives(processed_audio, sr)
            if plosives:
                st.subheader("Detected Plosives")
                st.write("Plosives detected at time(s):", ", ".join(
                    [str(round(p, 2)) for p in plosives]))
            else:
                st.write("No plosives detected.")


if __name__ == "__main__":
    main()