File size: 3,831 Bytes
0acc83a
41fb2f1
dd632f3
 
41fb2f1
 
dd632f3
0acc83a
9e0939e
 
41fb2f1
 
9e0939e
dd632f3
9e0939e
dd632f3
 
41fb2f1
dd632f3
 
 
41fb2f1
9e0939e
dd632f3
 
 
 
 
 
41fb2f1
dd632f3
41fb2f1
 
 
9e0939e
 
41fb2f1
dd632f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e0939e
 
dd632f3
41fb2f1
dd632f3
41fb2f1
 
 
 
 
 
 
 
 
 
dd632f3
 
 
9e0939e
dd632f3
 
 
 
41fb2f1
 
dd632f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41fb2f1
 
9e0939e
41fb2f1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import streamlit as st
import noisereduce as nr
from pedalboard.io import AudioFile
from pedalboard import *
import numpy as np
from io import BytesIO
import librosa

# Function to process audio


def process_audio(input_file):
    # Set sampling rate
    sr = 44100
    # Read audio file
    with AudioFile(input_file).resampled_to(sr) as f:
        audio = f.read(f.frames)

    # Reduce stationary noise
    reduced_noise = nr.reduce_noise(
        y=audio, sr=sr, stationary=True, prop_decrease=0.75)

    # Apply audio effects using pedalboard
    board = Pedalboard([
        NoiseGate(threshold_db=-30, ratio=1.5, release_ms=250),
        Compressor(threshold_db=-16, ratio=2.5),
        LowShelfFilter(cutoff_frequency_hz=400, gain_db=10, q=1),
        Gain(gain_db=10)
    ])

    processed_audio = board(reduced_noise, sr)

    return processed_audio, sr

# Function to detect plosives in audio


def detect_plosives(audio, sr):
    # Define frequency bands for plosive detection
    frequency_bands = [(0, 200), (200, 500)]

    # Calculate short-time Fourier transform (STFT)
    stft = librosa.stft(audio)

    # Convert amplitude to energy (power) spectrogram
    power = np.abs(stft) ** 2

    # Calculate energy in each frequency band
    band_energies = []
    for band in frequency_bands:
        freq_range = librosa.core.fft_frequencies(sr=sr)
        indices = np.where((freq_range >= band[0]) & (freq_range < band[1]))[0]
        if len(indices) > 0:  # Check if indices exist before proceeding
            # Sum along frequency axis
            band_energy = np.sum(power[indices, :], axis=0)
            band_energies.append(band_energy)

    if not band_energies:  # If no bands have energy, return empty plosives
        return []

    # Calculate overall energy as the sum of energy in all bands
    overall_energy = np.sum(band_energies, axis=0)

    # Apply thresholding to detect plosives
    threshold = np.max(overall_energy) * 0.5
    plosive_frames = np.where(overall_energy > threshold)[0]

    # Convert frame indices to time in seconds
    plosives = librosa.frames_to_time(plosive_frames, sr=sr)

    return plosives

# Main function


def main():
    st.title("Audio Enhancement")

    # Upload audio file
    uploaded_file = st.file_uploader("Upload audio file", type=["wav"])

    if uploaded_file is not None:
        # Process the audio
        st.write("Processing audio...")
        processed_audio, sr = process_audio(uploaded_file)

        # Display processing progress
        st.write("Audio processed successfully!")

        # Allow user to play original and processed audio
        st.subheader("Play Original Audio")
        st.audio(uploaded_file, format='audio/wav', start_time=0)
        st.subheader("Play Enhanced Audio")
        st.audio(processed_audio, format='audio/wav',
                 start_time=0, sample_rate=sr)

        # Allow user to download processed audio
        st.subheader("Download Enhanced Audio")
        processed_audio_bytes = BytesIO()
        with AudioFile(processed_audio_bytes, 'w', sr, len(processed_audio), format='wav') as f:
            f.write(processed_audio)
        st.download_button("Download", processed_audio_bytes.getvalue(),
                           file_name="processed_audio.wav", mime='audio/wav')

        # Button to detect plosives
        if st.button("Detect Plosives"):
            st.write("Detecting plosives...")
            plosives = detect_plosives(processed_audio, sr)
            if plosives:
                st.subheader("Detected Plosives")
                st.write("Plosives detected at time(s):", ", ".join(
                    [str(round(p, 2)) for p in plosives]))
            else:
                st.write("No plosives detected.")


# Entry point of the program
if __name__ == "__main__":
    main()