Boltz79's picture
Update app.py
7ff24b4 verified
raw
history blame
3.52 kB
import gradio as gr
from transformers import pipeline
import torch
def load_models():
"""Load and verify models with error checking"""
try:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# Load Whisper for speech recognition
transcriber = pipeline(
"automatic-speech-recognition",
model="openai/whisper-tiny",
device=device
)
# Load emotion recognition model
emotion_analyzer = pipeline(
"text-classification",
model="j-hartmann/emotion-english-distilroberta-base",
device=device
)
return transcriber, emotion_analyzer
except Exception as e:
print(f"Error loading models: {str(e)}")
return None, None
def analyze_audio(audio_path):
"""
Analyze audio for emotional content with detailed output
"""
if audio_path is None:
return "Please provide audio", "No audio detected"
try:
# Load models
transcriber, emotion_analyzer = load_models()
if transcriber is None or emotion_analyzer is None:
return "Error loading models", "Model initialization failed"
# Transcribe speech
try:
result = transcriber(audio_path)
text = result["text"]
if not text.strip():
return "No speech detected", "Empty transcription"
print(f"Transcribed text: {text}") # Debug output
except Exception as e:
return f"Transcription error: {str(e)}", "Failed to process audio"
# Analyze emotion
try:
emotion_result = emotion_analyzer(text)[0]
emotion = emotion_result["label"].title() # Capitalize emotion
confidence = f"{emotion_result['score']:.2%}"
# Map technical emotion labels to more natural descriptions
emotion_mapping = {
"Joy": "Happy/Joyful",
"Sadness": "Sad/Melancholic",
"Anger": "Angry/Frustrated",
"Fear": "Anxious/Fearful",
"Surprise": "Surprised/Astonished",
"Love": "Warm/Affectionate",
"Neutral": "Neutral/Calm"
}
display_emotion = emotion_mapping.get(emotion, emotion)
return display_emotion, confidence
except Exception as e:
return f"Emotion analysis error: {str(e)}", "Analysis failed"
except Exception as e:
return f"Unexpected error: {str(e)}", "Process failed"
# Create interface with better labeling
interface = gr.Interface(
fn=analyze_audio,
inputs=gr.Audio(
sources=["microphone", "upload"],
type="filepath",
label="Record or Upload Audio"
),
outputs=[
gr.Textbox(label="Detected Emotion"),
gr.Textbox(label="Confidence Score")
],
title="Speech Emotion Analyzer",
description="""
This tool analyzes the emotional tone of speech, detecting emotions like:
- Happy/Joyful
- Sad/Melancholic
- Angry/Frustrated
- Anxious/Fearful
- Surprised/Astonished
- Warm/Affectionate
- Neutral/Calm
""",
theme=gr.themes.Base()
)
if __name__ == "__main__":
interface.launch(
debug=True,
server_name="0.0.0.0",
server_port=7860,
share=True
)