File size: 2,136 Bytes
00ae0ce
 
b921a8f
00ae0ce
8b1154e
 
f52a928
8b1154e
 
b921a8f
8b1154e
 
f52a928
 
8b1154e
 
7ff24b4
8b1154e
 
f52a928
 
8b1154e
f52a928
8b1154e
b921a8f
0a29c8e
8b1154e
 
 
 
 
b921a8f
8b1154e
 
 
 
 
 
 
 
 
b921a8f
8b1154e
 
7ff24b4
8b1154e
 
 
 
 
 
dc5c04c
8b1154e
 
dc5c04c
8b1154e
b921a8f
8b1154e
 
 
 
 
 
 
 
b921a8f
8b1154e
 
b921a8f
7ff24b4
8b1154e
b921a8f
00ae0ce
986b8c7
8b1154e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import gradio as gr
from transformers import pipeline
import torch

def create_analyzers():
    """Initialize speech and emotion analyzers"""
    try:
        # Use tiny whisper model for speed and reliability
        speech_recognizer = pipeline(
            "automatic-speech-recognition",
            model="openai/whisper-tiny.en",
            chunk_length_s=30
        )
        
        # Use smaller emotion classifier
        emotion_classifier = pipeline(
            "text-classification",
            model="SamLowe/roberta-base-go_emotions",
            top_k=1
        )
        
        return speech_recognizer, emotion_classifier
    except Exception as e:
        print(f"Model loading error: {e}")
        return None, None

def analyze_tone(audio_file):
    """Analyze the emotional tone of speech"""
    if audio_file is None:
        return "No input", "N/A"
    
    try:
        # Get models
        speech_recognizer, emotion_classifier = create_analyzers()
        
        # Transcribe audio
        transcription = speech_recognizer(audio_file)
        text = transcription["text"]
        
        if not text.strip():
            return "No speech detected", "N/A"
            
        print(f"Transcribed text: {text}")  # For debugging
        
        # Analyze emotion
        result = emotion_classifier(text)[0][0]
        emotion = result['label'].replace('_', ' ').title()
        confidence = f"{result['score']:.1%}"
        
        return emotion, confidence
        
    except Exception as e:
        print(f"Analysis error: {e}")
        return f"Error: {str(e)}", "N/A"

# Create minimal interface
interface = gr.Interface(
    fn=analyze_tone,
    inputs=[
        gr.Audio(
            sources=["microphone", "upload"],
            type="filepath",
            label="Audio Input"
        )
    ],
    outputs=[
        gr.Textbox(label="Emotion"),
        gr.Textbox(label="Confidence")
    ],
    title="Speech Emotion Analyzer",
    description="Record or upload audio to detect the emotional tone.",
)

if __name__ == "__main__":
    interface.launch(server_name="0.0.0.0", share=True)