szili2011 commited on
Commit
f3ecf4e
·
verified ·
1 Parent(s): c75b241

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -2
app.py CHANGED
@@ -3,11 +3,19 @@ import librosa
3
  import numpy as np
4
  import tensorflow as tf
5
  import gradio as gr
 
6
 
7
  # Load the pre-trained model
8
  model_path = 'sound_to_text_model.h5'
9
  model = tf.keras.models.load_model(model_path)
10
 
 
 
 
 
 
 
 
11
  # Function to extract features from audio
12
  def extract_features(file_path):
13
  y_audio, sr = librosa.load(file_path, duration=2.0)
@@ -16,7 +24,7 @@ def extract_features(file_path):
16
 
17
  # Function to predict text from audio
18
  def predict_sound_text(audio):
19
- features = extract_features(audio.name)
20
  prediction = model.predict(np.array([features]))
21
  label = encoder.inverse_transform([np.argmax(prediction)])
22
  return label[0]
@@ -24,7 +32,7 @@ def predict_sound_text(audio):
24
  # Define Gradio interface
25
  interface = gr.Interface(
26
  fn=predict_sound_text,
27
- inputs=gr.Audio(type="filepath"), # Removed source parameter
28
  outputs="text",
29
  title="Audio to Text Converter",
30
  description="Upload an audio file (MP3 format) and get the textual representation."
 
3
  import numpy as np
4
  import tensorflow as tf
5
  import gradio as gr
6
+ from sklearn.preprocessing import LabelEncoder
7
 
8
  # Load the pre-trained model
9
  model_path = 'sound_to_text_model.h5'
10
  model = tf.keras.models.load_model(model_path)
11
 
12
+ # Initialize the encoder (make sure it's fitted to your labels)
13
+ # Note: You need to fit the encoder to your actual labels before saving/loading the model
14
+ # For example, you can use the same encoder you used during training
15
+ encoder = LabelEncoder()
16
+ # Assuming you have a list of labels used during training (e.g., y)
17
+ # encoder.fit(y) # Uncomment and run this if you haven't already fitted the encoder
18
+
19
  # Function to extract features from audio
20
  def extract_features(file_path):
21
  y_audio, sr = librosa.load(file_path, duration=2.0)
 
24
 
25
  # Function to predict text from audio
26
  def predict_sound_text(audio):
27
+ features = extract_features(audio) # Use audio directly as the file path
28
  prediction = model.predict(np.array([features]))
29
  label = encoder.inverse_transform([np.argmax(prediction)])
30
  return label[0]
 
32
  # Define Gradio interface
33
  interface = gr.Interface(
34
  fn=predict_sound_text,
35
+ inputs=gr.Audio(type="filepath"), # Use only the type argument
36
  outputs="text",
37
  title="Audio to Text Converter",
38
  description="Upload an audio file (MP3 format) and get the textual representation."