File size: 1,151 Bytes
1d0caf2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import os
import librosa
import numpy as np
import tensorflow as tf
import gradio as gr

# Load the pre-trained model
model_path = 'sound_to_text_model.h5'
model = tf.keras.models.load_model(model_path)

# Function to extract features from audio
def extract_features(file_path):
    y_audio, sr = librosa.load(file_path, duration=2.0)
    mfccs = librosa.feature.mfcc(y=y_audio, sr=sr, n_mfcc=13)
    return np.mean(mfccs.T, axis=0)  # Average to create a fixed size

# Function to predict text from audio
def predict_sound_text(audio):
    features = extract_features(audio.name)
    prediction = model.predict(np.array([features]))
    label = encoder.inverse_transform([np.argmax(prediction)])
    return label[0]

# Define Gradio interface
interface = gr.Interface(fn=predict_sound_text,
                         inputs=gr.Audio(source="upload", type="filepath"),
                         outputs="text",
                         title="Audio to Text Converter",
                         description="Upload an audio file (MP3 format) and get the textual representation.")

# Launch the interface
if __name__ == "__main__":
    interface.launch()