File size: 2,811 Bytes
00ae0ce
 
b921a8f
00ae0ce
b921a8f
 
f52a928
b921a8f
 
 
 
 
f52a928
b921a8f
 
 
f52a928
 
b921a8f
 
f52a928
b921a8f
 
f52a928
 
b921a8f
f52a928
b921a8f
 
0a29c8e
b921a8f
 
 
 
 
 
f52a928
b921a8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc5c04c
b921a8f
dc5c04c
b921a8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00ae0ce
b921a8f
986b8c7
f52a928
b921a8f
 
 
 
986b8c7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import gradio as gr
from transformers import pipeline
import torch

def load_models():
    """Load and verify models with error checking"""
    try:
        # Check CUDA availability
        device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Using device: {device}")
        
        # Load a smaller Whisper model for better compatibility
        transcriber = pipeline(
            "automatic-speech-recognition",
            model="openai/whisper-tiny",
            device=device
        )
        
        # Load a simpler sentiment model
        sentiment = pipeline(
            "sentiment-analysis",
            model="distilbert-base-uncased-finetuned-sst-2-english",
            device=device
        )
        
        return transcriber, sentiment
    except Exception as e:
        print(f"Error loading models: {str(e)}")
        return None, None

def analyze_audio(audio_path):
    """
    Analyze audio tone with robust error handling
    """
    if audio_path is None:
        return "Please provide an audio input", "No audio detected"
        
    try:
        # Load models
        transcriber, sentiment = load_models()
        if transcriber is None or sentiment is None:
            return "Error loading models", "Model initialization failed"
            
        # Transcribe with error checking
        try:
            result = transcriber(audio_path)
            text = result["text"]
            if not text.strip():
                return "No speech detected", "Empty transcription"
        except Exception as e:
            return f"Transcription error: {str(e)}", "Failed to process audio"
            
        # Analyze sentiment
        try:
            sentiment_result = sentiment(text)[0]
            tone = sentiment_result["label"]
            confidence = f"{sentiment_result['score']:.2%}"
            return tone, confidence
        except Exception as e:
            return f"Sentiment analysis error: {str(e)}", "Analysis failed"
            
    except Exception as e:
        return f"Unexpected error: {str(e)}", "Process failed"

# Create interface with simplified components
interface = gr.Interface(
    fn=analyze_audio,
    inputs=gr.Audio(
        sources=["microphone", "upload"],
        type="filepath",
    ),
    outputs=[
        gr.Textbox(label="Tone"),
        gr.Textbox(label="Confidence Level")
    ],
    title="Simple Speech Tone Analyzer",
    description="Record or upload audio to analyze its tone. The analysis may take a few moments.",
    examples=None,
    cache_examples=False,
    theme=gr.themes.Base(),
)

# Launch with specific parameters for better stability
if __name__ == "__main__":
    interface.launch(
        debug=True,
        server_name="0.0.0.0",
        server_port=7860,
        share=True
    )