Spaces:

szili2011
/

sound-to-text-converter

Runtime error

szili2011 commited on Oct 27, 2024

Commit

f3ecf4e

verified ·

1 Parent(s): c75b241

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,11 +3,19 @@ import librosa
 import numpy as np
 import tensorflow as tf
 import gradio as gr
 # Load the pre-trained model
 model_path = 'sound_to_text_model.h5'
 model = tf.keras.models.load_model(model_path)
 # Function to extract features from audio
 def extract_features(file_path):
     y_audio, sr = librosa.load(file_path, duration=2.0)
@@ -16,7 +24,7 @@ def extract_features(file_path):
 # Function to predict text from audio
 def predict_sound_text(audio):
-    features = extract_features(audio.name)
     prediction = model.predict(np.array([features]))
     label = encoder.inverse_transform([np.argmax(prediction)])
     return label[0]
@@ -24,7 +32,7 @@ def predict_sound_text(audio):
 # Define Gradio interface
 interface = gr.Interface(
     fn=predict_sound_text,
-    inputs=gr.Audio(type="filepath"),  # Removed source parameter
     outputs="text",
     title="Audio to Text Converter",
     description="Upload an audio file (MP3 format) and get the textual representation."

 import numpy as np
 import tensorflow as tf
 import gradio as gr
+from sklearn.preprocessing import LabelEncoder
 # Load the pre-trained model
 model_path = 'sound_to_text_model.h5'
 model = tf.keras.models.load_model(model_path)
+# Initialize the encoder (make sure it's fitted to your labels)
+# Note: You need to fit the encoder to your actual labels before saving/loading the model
+# For example, you can use the same encoder you used during training
+encoder = LabelEncoder()
+# Assuming you have a list of labels used during training (e.g., y)
+# encoder.fit(y)  # Uncomment and run this if you haven't already fitted the encoder
 # Function to extract features from audio
 def extract_features(file_path):
     y_audio, sr = librosa.load(file_path, duration=2.0)
 # Function to predict text from audio
 def predict_sound_text(audio):
+    features = extract_features(audio)  # Use audio directly as the file path
     prediction = model.predict(np.array([features]))
     label = encoder.inverse_transform([np.argmax(prediction)])
     return label[0]
 # Define Gradio interface
 interface = gr.Interface(
     fn=predict_sound_text,
+    inputs=gr.Audio(type="filepath"),  # Use only the type argument
     outputs="text",
     title="Audio to Text Converter",
     description="Upload an audio file (MP3 format) and get the textual representation."