import streamlit as st | |
import tensorflow as tf | |
import numpy as np | |
import librosa | |
import matplotlib.pyplot as plt | |
import librosa.display | |
import tempfile | |
import os | |
# Load the trained model | |
def load_model(): | |
model_path = "sound_classification_model.h5" # Replace with the path to your .h5 file | |
model = tf.keras.models.load_model(model_path) | |
return model | |
model = load_model() | |
# Preprocess audio into a spectrogram | |
def preprocess_audio(file_path, n_mels=128, fixed_time_steps=128): | |
try: | |
y, sr = librosa.load(file_path, sr=None) | |
mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmax=sr/2) | |
log_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max) | |
log_spectrogram = log_spectrogram / np.max(np.abs(log_spectrogram)) | |
if log_spectrogram.shape[1] < fixed_time_steps: | |
padding = fixed_time_steps - log_spectrogram.shape[1] | |
log_spectrogram = np.pad(log_spectrogram, ((0, 0), (0, padding)), mode='constant') | |
else: | |
log_spectrogram = log_spectrogram[:, :fixed_time_steps] | |
return np.expand_dims(log_spectrogram, axis=-1) # Add channel dimension for CNNs | |
except Exception as e: | |
print(f"Error processing {file_path}: {e}") | |
return None | |
# Streamlit app UI | |
st.title("Audio Spectrogram Prediction") | |
st.write("Upload an audio file to generate a spectrogram and predict its class using your trained model.") | |
# File upload widget | |
uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3"]) | |
if uploaded_file is not None: | |
# Save the uploaded audio file to a temporary location | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file: | |
temp_audio_file.write( | |
temp_audio_path = | |
# Preprocess the audio into a spectrogram | |
st.write("Processing audio into a spectrogram...") | |
spectrogram = preprocess_audio(temp_audio_path) | |
if spectrogram is not None: | |
# Display the spectrogram | |
st.write("Generated Spectrogram:") | |
plt.figure(figsize=(10, 4)) | |
librosa.display.specshow(spectrogram[:, :, 0], sr=22050, x_axis='time', y_axis='mel', fmax=8000, cmap='plasma') | |
plt.colorbar(format='%+2.0f dB') | |
plt.title('Mel-Spectrogram') | |
plt.tight_layout() | |
st.pyplot(plt) | |
# Predict using the model | |
st.write("Predicting...") | |
spectrogram = np.expand_dims(spectrogram, axis=0) # Add batch dimension | |
predictions = model.predict(spectrogram) | |
predicted_class = np.argmax(predictions, axis=-1) # Assuming classification | |
# Display the results | |
st.write("Prediction Results:") | |
st.write(f"Predicted Class: {int(predicted_class[0])}") | |
st.write(f"Raw Model Output: {predictions}") | |
else: | |
st.write("Failed to process the audio file. Please try again with a different file.") | |
# Optional: Clean up temporary file | |
os.remove(temp_audio_path) | |