from speechbrain.pretrained.interfaces import foreign_class import gradio as gr import os import warnings warnings.filterwarnings("ignore") # Loading the speechbrain emotion detection model learner = foreign_class( source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier" ) # Building prediction function for gradio emotion_dict = { 'sad': 'Sad', 'hap': 'Happy', 'ang': 'Anger', 'fea': 'Fear', 'sur': 'Surprised', 'neu': 'Neutral' } def predict_emotion(audio): out_prob, score, index, text_lab = learner.classify_file(audio.name) return emotion_dict[text_lab[0]] def predict_emotion_from_file(file_path): audio = gr.Audio(file_path=file_path, source="upload", type="filepath") return predict_emotion(audio) # Define the Gradio interface with the first tab for uploading an audio file inputs = gr.inputs.Audio(label="Input Audio", type="file") outputs = "text" title = "ML Speech Emotion Detection" description = "Speechbrain powered wav2vec 2.0 pretrained model on IEMOCAP dataset using Gradio." # Define the second tab for selecting an audio file from the dropdown rec_folder = "rec" audio_files = [f for f in os.listdir(rec_folder) if f.endswith(('.wav', '.mp3'))] audio_files = [os.path.join(rec_folder, f) for f in audio_files] file_dropdown = gr.inputs.Dropdown(label="Select Preloaded Audio File", choices=audio_files) # Create the Gradio interface with both tabs iface = gr.Interface( fn=predict_emotion, inputs=inputs, outputs=outputs, title=title, description=description, tab_name="Upload Audio" ) # Add the second tab to the interface iface.add_tab( "Select Preloaded Audio", gr.Interface( fn=predict_emotion_from_file, inputs=file_dropdown, outputs=outputs, tab_name="Select Preloaded Audio" ) ) # Launch the Gradio app iface.launch()