File size: 2,791 Bytes
00ae0ce
 
 
dc5c04c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00ae0ce
dc5c04c
 
 
 
 
 
00ae0ce
dc5c04c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00ae0ce
dc5c04c
 
 
 
 
 
 
 
00ae0ce
dc5c04c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import gradio as gr
from transformers import pipeline

def create_speech_analyzer():
    # Initialize models with error handling
    try:
        # Load Whisper model for speech recognition
        transcriber = pipeline(
            "automatic-speech-recognition", 
            model="openai/whisper-medium",
            max_new_tokens=128
        )
        
        # Load sentiment analysis model
        sentiment_model = pipeline(
            "sentiment-analysis",
            model="distilbert-base-uncased-finetuned-sst-2-english"
        )
        
        return transcriber, sentiment_model
    
    except Exception as e:
        raise RuntimeError(f"Error loading models: {str(e)}")

def analyze_speech(audio_file):
    """
    Analyze speech audio for transcription and sentiment.
    
    Args:
        audio_file: Path to audio file or audio data
        
    Returns:
        dict: Contains transcription, sentiment and confidence score
    """
    try:
        # Get model instances
        transcriber, sentiment_model = create_speech_analyzer()
        
        # Transcribe audio
        transcription = transcriber(audio_file)["text"]
        
        # Analyze sentiment
        sentiment_result = sentiment_model(transcription)[0]
        
        return {
            "transcription": transcription,
            "sentiment": sentiment_result["label"],
            "confidence": f"{sentiment_result['score']:.2%}"
        }
        
    except Exception as e:
        return {
            "transcription": f"Error processing audio: {str(e)}",
            "sentiment": "ERROR",
            "confidence": "0%"
        }

def create_interface():
    """Create and configure the Gradio interface"""
    return gr.Interface(
        fn=analyze_speech,
        inputs=gr.Audio(
            source="microphone",
            type="filepath",
            label="Upload or Record Audio"
        ),
        outputs=[
            gr.Textbox(label="Transcription"),
            gr.Textbox(label="Sentiment Analysis"),
            gr.Textbox(label="Confidence Score")
        ],
        title="Real-Time Speech Sentiment Analyzer",
        description="""
        This tool transcribes speech and analyzes its sentiment in real-time.
        Upload an audio file or record directly through your microphone.
        """,
        theme=gr.themes.Soft(),
        examples=[],  # Add example audio files here if desired
        cache_examples=True
    )

def main():
    # Create and launch the interface
    interface = create_interface()
    interface.launch(
        share=True,  # Enable sharing via public URL
        debug=True,  # Enable debug mode for better error messages
        server_name="0.0.0.0"  # Allow external connections
    )

if __name__ == "__main__":
    main()