File size: 3,519 Bytes
00ae0ce
 
b921a8f
00ae0ce
b921a8f
 
f52a928
b921a8f
 
 
7ff24b4
f52a928
b921a8f
 
 
f52a928
 
7ff24b4
 
 
 
b921a8f
f52a928
 
7ff24b4
f52a928
b921a8f
 
0a29c8e
b921a8f
 
7ff24b4
b921a8f
 
7ff24b4
f52a928
b921a8f
 
7ff24b4
 
b921a8f
 
7ff24b4
b921a8f
 
 
 
 
7ff24b4
b921a8f
 
 
7ff24b4
b921a8f
7ff24b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b921a8f
7ff24b4
b921a8f
dc5c04c
b921a8f
dc5c04c
7ff24b4
b921a8f
 
 
 
 
7ff24b4
b921a8f
 
7ff24b4
 
b921a8f
7ff24b4
 
 
 
 
 
 
 
 
 
 
 
b921a8f
00ae0ce
986b8c7
f52a928
b921a8f
 
 
 
986b8c7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import gradio as gr
from transformers import pipeline
import torch

def load_models():
    """Load and verify models with error checking"""
    try:
        device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Using device: {device}")
        
        # Load Whisper for speech recognition
        transcriber = pipeline(
            "automatic-speech-recognition",
            model="openai/whisper-tiny",
            device=device
        )
        
        # Load emotion recognition model
        emotion_analyzer = pipeline(
            "text-classification",
            model="j-hartmann/emotion-english-distilroberta-base",
            device=device
        )
        
        return transcriber, emotion_analyzer
    except Exception as e:
        print(f"Error loading models: {str(e)}")
        return None, None

def analyze_audio(audio_path):
    """
    Analyze audio for emotional content with detailed output
    """
    if audio_path is None:
        return "Please provide audio", "No audio detected"
        
    try:
        # Load models
        transcriber, emotion_analyzer = load_models()
        if transcriber is None or emotion_analyzer is None:
            return "Error loading models", "Model initialization failed"
            
        # Transcribe speech
        try:
            result = transcriber(audio_path)
            text = result["text"]
            if not text.strip():
                return "No speech detected", "Empty transcription"
            print(f"Transcribed text: {text}")  # Debug output
        except Exception as e:
            return f"Transcription error: {str(e)}", "Failed to process audio"
            
        # Analyze emotion
        try:
            emotion_result = emotion_analyzer(text)[0]
            emotion = emotion_result["label"].title()  # Capitalize emotion
            confidence = f"{emotion_result['score']:.2%}"
            
            # Map technical emotion labels to more natural descriptions
            emotion_mapping = {
                "Joy": "Happy/Joyful",
                "Sadness": "Sad/Melancholic",
                "Anger": "Angry/Frustrated",
                "Fear": "Anxious/Fearful",
                "Surprise": "Surprised/Astonished",
                "Love": "Warm/Affectionate",
                "Neutral": "Neutral/Calm"
            }
            
            display_emotion = emotion_mapping.get(emotion, emotion)
            return display_emotion, confidence
            
        except Exception as e:
            return f"Emotion analysis error: {str(e)}", "Analysis failed"
            
    except Exception as e:
        return f"Unexpected error: {str(e)}", "Process failed"

# Create interface with better labeling
interface = gr.Interface(
    fn=analyze_audio,
    inputs=gr.Audio(
        sources=["microphone", "upload"],
        type="filepath",
        label="Record or Upload Audio"
    ),
    outputs=[
        gr.Textbox(label="Detected Emotion"),
        gr.Textbox(label="Confidence Score")
    ],
    title="Speech Emotion Analyzer",
    description="""
    This tool analyzes the emotional tone of speech, detecting emotions like:
    - Happy/Joyful
    - Sad/Melancholic
    - Angry/Frustrated
    - Anxious/Fearful
    - Surprised/Astonished
    - Warm/Affectionate
    - Neutral/Calm
    """,
    theme=gr.themes.Base()
)

if __name__ == "__main__":
    interface.launch(
        debug=True,
        server_name="0.0.0.0",
        server_port=7860,
        share=True
    )