Spaces:
Runtime error
Runtime error
import os | |
import librosa | |
import numpy as np | |
import tensorflow as tf | |
import gradio as gr | |
from sklearn.preprocessing import LabelEncoder | |
# Load the pre-trained model | |
model_path = 'sound_to_text_model.h5' | |
model = tf.keras.models.load_model(model_path) | |
# Initialize the encoder (make sure it's fitted to your labels) | |
# Note: You need to fit the encoder to your actual labels before saving/loading the model | |
# For example, you can use the same encoder you used during training | |
encoder = LabelEncoder() | |
# Assuming you have a list of labels used during training (e.g., y) | |
# encoder.fit(y) # Uncomment and run this if you haven't already fitted the encoder | |
# Function to extract features from audio | |
def extract_features(file_path): | |
y_audio, sr = librosa.load(file_path, duration=2.0) | |
mfccs = librosa.feature.mfcc(y=y_audio, sr=sr, n_mfcc=13) | |
return np.mean(mfccs.T, axis=0) # Average to create a fixed size | |
# Function to predict text from audio | |
def predict_sound_text(audio): | |
features = extract_features(audio) # Use audio directly as the file path | |
prediction = model.predict(np.array([features])) | |
label = encoder.inverse_transform([np.argmax(prediction)]) | |
return label[0] | |
# Define Gradio interface | |
interface = gr.Interface( | |
fn=predict_sound_text, | |
inputs=gr.Audio(type="filepath"), # Use only the type argument | |
outputs="text", | |
title="Audio to Text Converter", | |
description="Upload an audio file (MP3 format) and get the textual representation." | |
) | |
# Launch the interface | |
if __name__ == "__main__": | |
interface.launch() | |