import gradio as gr
from transformers import pipeline
import torch

def load_models():
    """Load and verify models with error checking"""
    try:
        device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Using device: {device}")
        
        # Load Whisper for speech recognition
        transcriber = pipeline(
            "automatic-speech-recognition",
            model="openai/whisper-tiny",
            device=device
        )
        
        # Load emotion recognition model
        emotion_analyzer = pipeline(
            "text-classification",
            model="j-hartmann/emotion-english-distilroberta-base",
            device=device
        )
        
        return transcriber, emotion_analyzer
    except Exception as e:
        print(f"Error loading models: {str(e)}")
        return None, None

def analyze_audio(audio_path):
    """
    Analyze audio for emotional content with detailed output
    """
    if audio_path is None:
        return "Please provide audio", "No audio detected"
        
    try:
        # Load models
        transcriber, emotion_analyzer = load_models()
        if transcriber is None or emotion_analyzer is None:
            return "Error loading models", "Model initialization failed"
            
        # Transcribe speech
        try:
            result = transcriber(audio_path)
            text = result["text"]
            if not text.strip():
                return "No speech detected", "Empty transcription"
            print(f"Transcribed text: {text}")  # Debug output
        except Exception as e:
            return f"Transcription error: {str(e)}", "Failed to process audio"
            
        # Analyze emotion
        try:
            emotion_result = emotion_analyzer(text)[0]
            emotion = emotion_result["label"].title()  # Capitalize emotion
            confidence = f"{emotion_result['score']:.2%}"
            
            # Map technical emotion labels to more natural descriptions
            emotion_mapping = {
                "Joy": "Happy/Joyful",
                "Sadness": "Sad/Melancholic",
                "Anger": "Angry/Frustrated",
                "Fear": "Anxious/Fearful",
                "Surprise": "Surprised/Astonished",
                "Love": "Warm/Affectionate",
                "Neutral": "Neutral/Calm"
            }
            
            display_emotion = emotion_mapping.get(emotion, emotion)
            return display_emotion, confidence
            
        except Exception as e:
            return f"Emotion analysis error: {str(e)}", "Analysis failed"
            
    except Exception as e:
        return f"Unexpected error: {str(e)}", "Process failed"

# Create interface with better labeling
interface = gr.Interface(
    fn=analyze_audio,
    inputs=gr.Audio(
        sources=["microphone", "upload"],
        type="filepath",
        label="Record or Upload Audio"
    ),
    outputs=[
        gr.Textbox(label="Detected Emotion"),
        gr.Textbox(label="Confidence Score")
    ],
    title="Speech Emotion Analyzer",
    description="""
    This tool analyzes the emotional tone of speech, detecting emotions like:
    - Happy/Joyful
    - Sad/Melancholic
    - Angry/Frustrated
    - Anxious/Fearful
    - Surprised/Astonished
    - Warm/Affectionate
    - Neutral/Calm
    """,
    theme=gr.themes.Base()
)

if __name__ == "__main__":
    interface.launch(
        debug=True,
        server_name="0.0.0.0",
        server_port=7860,
        share=True
    )