from speechbrain.pretrained.interfaces import foreign_class
import gradio as gr
import os

import warnings
warnings.filterwarnings("ignore")

# Loading the speechbrain emotion detection model
learner = foreign_class(
    source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
    pymodule_file="custom_interface.py", 
    classname="CustomEncoderWav2vec2Classifier"
)

# Building prediction function for gradio
emotion_dict = {
    'sad': 'Sad', 
    'hap': 'Happy',
    'ang': 'Anger',
    'fea': 'Fear',
    'sur': 'Surprised',
    'neu': 'Neutral'
}

def predict_emotion(audio):
    out_prob, score, index, text_lab = learner.classify_file(audio.name)
    return emotion_dict[text_lab[0]]

def predict_emotion_from_file(file_path):
    audio = gr.Audio(file_path=file_path, source="upload", type="filepath")
    return predict_emotion(audio)

# Define the Gradio interface with the first tab for uploading an audio file
inputs = gr.inputs.Audio(label="Input Audio", type="file")
outputs = "text"
title = "ML Speech Emotion Detection"
description = "Speechbrain powered wav2vec 2.0 pretrained model on IEMOCAP dataset using Gradio."

# Define the second tab for selecting an audio file from the dropdown
rec_folder = "rec"
audio_files = [f for f in os.listdir(rec_folder) if f.endswith(('.wav', '.mp3'))]
audio_files = [os.path.join(rec_folder, f) for f in audio_files]
file_dropdown = gr.inputs.Dropdown(label="Select Preloaded Audio File", choices=audio_files)

# Create the Gradio interface with both tabs
iface = gr.Interface(
    fn=predict_emotion,
    inputs=inputs,
    outputs=outputs,
    title=title,
    description=description,
    tab_name="Upload Audio"
)

# Add the second tab to the interface
iface.add_tab(
    "Select Preloaded Audio",
    gr.Interface(
        fn=predict_emotion_from_file,
        inputs=file_dropdown,
        outputs=outputs,
        tab_name="Select Preloaded Audio"
    )
)

# Launch the Gradio app
iface.launch()