EcoSoundNet / app.py
fadzwan's picture
Create app.py
2fceb69 verified
raw
history blame
3.05 kB
import streamlit as st
import tensorflow as tf
import numpy as np
import librosa
import matplotlib.pyplot as plt
import librosa.display
import tempfile
import os
# Load the trained model
@st.cache_resource
def load_model():
model_path = "sound_classification_model.h5" # Replace with the path to your .h5 file
model = tf.keras.models.load_model(model_path)
return model
model = load_model()
# Preprocess audio into a spectrogram
def preprocess_audio(file_path, n_mels=128, fixed_time_steps=128):
try:
y, sr = librosa.load(file_path, sr=None)
mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmax=sr/2)
log_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
log_spectrogram = log_spectrogram / np.max(np.abs(log_spectrogram))
if log_spectrogram.shape[1] < fixed_time_steps:
padding = fixed_time_steps - log_spectrogram.shape[1]
log_spectrogram = np.pad(log_spectrogram, ((0, 0), (0, padding)), mode='constant')
else:
log_spectrogram = log_spectrogram[:, :fixed_time_steps]
return np.expand_dims(log_spectrogram, axis=-1) # Add channel dimension for CNNs
except Exception as e:
print(f"Error processing {file_path}: {e}")
return None
# Streamlit app UI
st.title("Audio Spectrogram Prediction")
st.write("Upload an audio file to generate a spectrogram and predict its class using your trained model.")
# File upload widget
uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3"])
if uploaded_file is not None:
# Save the uploaded audio file to a temporary location
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
temp_audio_file.write(uploaded_file.read())
temp_audio_path = temp_audio_file.name
# Preprocess the audio into a spectrogram
st.write("Processing audio into a spectrogram...")
spectrogram = preprocess_audio(temp_audio_path)
if spectrogram is not None:
# Display the spectrogram
st.write("Generated Spectrogram:")
plt.figure(figsize=(10, 4))
librosa.display.specshow(spectrogram[:, :, 0], sr=22050, x_axis='time', y_axis='mel', fmax=8000, cmap='plasma')
plt.colorbar(format='%+2.0f dB')
plt.title('Mel-Spectrogram')
plt.tight_layout()
st.pyplot(plt)
# Predict using the model
st.write("Predicting...")
spectrogram = np.expand_dims(spectrogram, axis=0) # Add batch dimension
predictions = model.predict(spectrogram)
predicted_class = np.argmax(predictions, axis=-1) # Assuming classification
# Display the results
st.write("Prediction Results:")
st.write(f"Predicted Class: {int(predicted_class[0])}")
st.write(f"Raw Model Output: {predictions}")
else:
st.write("Failed to process the audio file. Please try again with a different file.")
# Optional: Clean up temporary file
os.remove(temp_audio_path)