import gradio as gr
import numpy as np
import torch
from transformers import pipeline
import librosa
import soundfile as sf

class EmotionRecognizer:
    def __init__(self):
        self.classifier = pipeline(
            "audio-classification",
            model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
            device=0 if torch.cuda.is_available() else -1
        )
        self.target_sr = 16000  # Target sample rate for the model
        self.max_duration = 10  # Max audio duration in seconds

    def process_audio(self, audio_path):
        try:
            # Load audio file using soundfile (works better in Hugging Face Spaces)
            audio, orig_sr = sf.read(audio_path)
            
            # Convert stereo to mono if needed
            if len(audio.shape) > 1:
                audio = np.mean(audio, axis=1)
                
            # Resample if necessary
            if orig_sr != self.target_sr:
                audio = librosa.resample(
                    y=audio.astype(np.float32),
                    orig_sr=orig_sr,
                    target_sr=self.target_sr
                )
            else:
                audio = audio.astype(np.float32)
            
            # Normalize audio
            audio = librosa.util.normalize(audio)
            
            # Trim/pad audio to max duration
            max_samples = self.max_duration * self.target_sr
            if len(audio) > max_samples:
                audio = audio[:max_samples]
            else:
                audio = np.pad(audio, (0, max(0, max_samples - len(audio))))
            
            # Run classification
            results = self.classifier(
                {"array": audio, "sampling_rate": self.target_sr}
            )
            
            # Format output
            labels = [res["label"] for res in results]
            scores = [res["score"] * 100 for res in results]
            
            text_output = "\n".join([
                f"{label}: {score:.2f}%"
                for label, score in zip(labels, scores)
            ])
            
            plot_data = {
                "labels": labels,
                "values": scores
            }
            
            return text_output, plot_data
            
        except Exception as e:
            error_msg = f"Error processing audio: {str(e)}"
            print(error_msg)
            return error_msg, None

def create_interface():
    recognizer = EmotionRecognizer()
    
    with gr.Blocks(title="Audio Emotion Recognition") as interface:
        gr.Markdown("# 🎙️ Audio Emotion Recognition")
        gr.Markdown("Record or upload audio (English speech, 3-10 seconds)")
        
        with gr.Row():
            with gr.Column():
                audio_input = gr.Audio(
                    sources=["microphone", "upload"],
                    type="filepath",
                    label="Input Audio",
                    waveform_options={"waveform_progress_color": "#FF0066"}
                )
                submit_btn = gr.Button("Analyze", variant="primary")
                
            with gr.Column():
                text_output = gr.Textbox(
                    label="Emotion Analysis Results",
                    interactive=False
                )
                plot_output = gr.BarPlot(
                    label="Confidence Scores",
                    x="labels",
                    y="values",
                    color="labels",
                    height=300
                )
        
        submit_btn.click(
            fn=recognizer.process_audio,
            inputs=audio_input,
            outputs=[text_output, plot_output]
        )
        
        gr.Examples(
            examples=[
                "https://huggingface.co/spaces/echalabres/emotion-recognition/raw/main/example_angry.wav",
                "https://huggingface.co/spaces/echalabres/emotion-recognition/raw/main/example_happy.wav"
            ],
            inputs=audio_input,
            outputs=[text_output, plot_output],
            fn=recognizer.process_audio,
            cache_examples=True
        )
    
    return interface

if __name__ == "__main__":
    demo = create_interface()
    demo.launch()