File size: 3,047 Bytes
2fceb69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import streamlit as st
import tensorflow as tf
import numpy as np
import librosa
import matplotlib.pyplot as plt
import librosa.display
import tempfile
import os

# Load the trained model
@st.cache_resource
def load_model():
    model_path = "sound_classification_model.h5"  # Replace with the path to your .h5 file
    model = tf.keras.models.load_model(model_path)
    return model

model = load_model()

# Preprocess audio into a spectrogram
def preprocess_audio(file_path, n_mels=128, fixed_time_steps=128):
    try:
        y, sr = librosa.load(file_path, sr=None)
        mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmax=sr/2)
        log_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
        log_spectrogram = log_spectrogram / np.max(np.abs(log_spectrogram))
        if log_spectrogram.shape[1] < fixed_time_steps:
            padding = fixed_time_steps - log_spectrogram.shape[1]
            log_spectrogram = np.pad(log_spectrogram, ((0, 0), (0, padding)), mode='constant')
        else:
            log_spectrogram = log_spectrogram[:, :fixed_time_steps]
        return np.expand_dims(log_spectrogram, axis=-1)  # Add channel dimension for CNNs
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

# Streamlit app UI
st.title("Audio Spectrogram Prediction")
st.write("Upload an audio file to generate a spectrogram and predict its class using your trained model.")

# File upload widget
uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3"])

if uploaded_file is not None:
    # Save the uploaded audio file to a temporary location
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
        temp_audio_file.write(uploaded_file.read())
        temp_audio_path = temp_audio_file.name

    # Preprocess the audio into a spectrogram
    st.write("Processing audio into a spectrogram...")
    spectrogram = preprocess_audio(temp_audio_path)
    
    if spectrogram is not None:
        # Display the spectrogram
        st.write("Generated Spectrogram:")
        plt.figure(figsize=(10, 4))
        librosa.display.specshow(spectrogram[:, :, 0], sr=22050, x_axis='time', y_axis='mel', fmax=8000, cmap='plasma')
        plt.colorbar(format='%+2.0f dB')
        plt.title('Mel-Spectrogram')
        plt.tight_layout()
        st.pyplot(plt)

        # Predict using the model
        st.write("Predicting...")
        spectrogram = np.expand_dims(spectrogram, axis=0)  # Add batch dimension
        predictions = model.predict(spectrogram)
        predicted_class = np.argmax(predictions, axis=-1)  # Assuming classification

        # Display the results
        st.write("Prediction Results:")
        st.write(f"Predicted Class: {int(predicted_class[0])}")
        st.write(f"Raw Model Output: {predictions}")
    else:
        st.write("Failed to process the audio file. Please try again with a different file.")

    # Optional: Clean up temporary file
    os.remove(temp_audio_path)