Spaces:
Sleeping
Sleeping
File size: 3,519 Bytes
00ae0ce b921a8f 00ae0ce b921a8f f52a928 b921a8f 7ff24b4 f52a928 b921a8f f52a928 7ff24b4 b921a8f f52a928 7ff24b4 f52a928 b921a8f 0a29c8e b921a8f 7ff24b4 b921a8f 7ff24b4 f52a928 b921a8f 7ff24b4 b921a8f 7ff24b4 b921a8f 7ff24b4 b921a8f 7ff24b4 b921a8f 7ff24b4 b921a8f 7ff24b4 b921a8f dc5c04c b921a8f dc5c04c 7ff24b4 b921a8f 7ff24b4 b921a8f 7ff24b4 b921a8f 7ff24b4 b921a8f 00ae0ce 986b8c7 f52a928 b921a8f 986b8c7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import gradio as gr
from transformers import pipeline
import torch
def load_models():
"""Load and verify models with error checking"""
try:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# Load Whisper for speech recognition
transcriber = pipeline(
"automatic-speech-recognition",
model="openai/whisper-tiny",
device=device
)
# Load emotion recognition model
emotion_analyzer = pipeline(
"text-classification",
model="j-hartmann/emotion-english-distilroberta-base",
device=device
)
return transcriber, emotion_analyzer
except Exception as e:
print(f"Error loading models: {str(e)}")
return None, None
def analyze_audio(audio_path):
"""
Analyze audio for emotional content with detailed output
"""
if audio_path is None:
return "Please provide audio", "No audio detected"
try:
# Load models
transcriber, emotion_analyzer = load_models()
if transcriber is None or emotion_analyzer is None:
return "Error loading models", "Model initialization failed"
# Transcribe speech
try:
result = transcriber(audio_path)
text = result["text"]
if not text.strip():
return "No speech detected", "Empty transcription"
print(f"Transcribed text: {text}") # Debug output
except Exception as e:
return f"Transcription error: {str(e)}", "Failed to process audio"
# Analyze emotion
try:
emotion_result = emotion_analyzer(text)[0]
emotion = emotion_result["label"].title() # Capitalize emotion
confidence = f"{emotion_result['score']:.2%}"
# Map technical emotion labels to more natural descriptions
emotion_mapping = {
"Joy": "Happy/Joyful",
"Sadness": "Sad/Melancholic",
"Anger": "Angry/Frustrated",
"Fear": "Anxious/Fearful",
"Surprise": "Surprised/Astonished",
"Love": "Warm/Affectionate",
"Neutral": "Neutral/Calm"
}
display_emotion = emotion_mapping.get(emotion, emotion)
return display_emotion, confidence
except Exception as e:
return f"Emotion analysis error: {str(e)}", "Analysis failed"
except Exception as e:
return f"Unexpected error: {str(e)}", "Process failed"
# Create interface with better labeling
interface = gr.Interface(
fn=analyze_audio,
inputs=gr.Audio(
sources=["microphone", "upload"],
type="filepath",
label="Record or Upload Audio"
),
outputs=[
gr.Textbox(label="Detected Emotion"),
gr.Textbox(label="Confidence Score")
],
title="Speech Emotion Analyzer",
description="""
This tool analyzes the emotional tone of speech, detecting emotions like:
- Happy/Joyful
- Sad/Melancholic
- Angry/Frustrated
- Anxious/Fearful
- Surprised/Astonished
- Warm/Affectionate
- Neutral/Calm
""",
theme=gr.themes.Base()
)
if __name__ == "__main__":
interface.launch(
debug=True,
server_name="0.0.0.0",
server_port=7860,
share=True
) |