Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
import torch | |
def load_models(): | |
"""Load and verify models with error checking""" | |
try: | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
print(f"Using device: {device}") | |
# Load Whisper for speech recognition | |
transcriber = pipeline( | |
"automatic-speech-recognition", | |
model="openai/whisper-tiny", | |
device=device | |
) | |
# Load emotion recognition model | |
emotion_analyzer = pipeline( | |
"text-classification", | |
model="j-hartmann/emotion-english-distilroberta-base", | |
device=device | |
) | |
return transcriber, emotion_analyzer | |
except Exception as e: | |
print(f"Error loading models: {str(e)}") | |
return None, None | |
def analyze_audio(audio_path): | |
""" | |
Analyze audio for emotional content with detailed output | |
""" | |
if audio_path is None: | |
return "Please provide audio", "No audio detected" | |
try: | |
# Load models | |
transcriber, emotion_analyzer = load_models() | |
if transcriber is None or emotion_analyzer is None: | |
return "Error loading models", "Model initialization failed" | |
# Transcribe speech | |
try: | |
result = transcriber(audio_path) | |
text = result["text"] | |
if not text.strip(): | |
return "No speech detected", "Empty transcription" | |
print(f"Transcribed text: {text}") # Debug output | |
except Exception as e: | |
return f"Transcription error: {str(e)}", "Failed to process audio" | |
# Analyze emotion | |
try: | |
emotion_result = emotion_analyzer(text)[0] | |
emotion = emotion_result["label"].title() # Capitalize emotion | |
confidence = f"{emotion_result['score']:.2%}" | |
# Map technical emotion labels to more natural descriptions | |
emotion_mapping = { | |
"Joy": "Happy/Joyful", | |
"Sadness": "Sad/Melancholic", | |
"Anger": "Angry/Frustrated", | |
"Fear": "Anxious/Fearful", | |
"Surprise": "Surprised/Astonished", | |
"Love": "Warm/Affectionate", | |
"Neutral": "Neutral/Calm" | |
} | |
display_emotion = emotion_mapping.get(emotion, emotion) | |
return display_emotion, confidence | |
except Exception as e: | |
return f"Emotion analysis error: {str(e)}", "Analysis failed" | |
except Exception as e: | |
return f"Unexpected error: {str(e)}", "Process failed" | |
# Create interface with better labeling | |
interface = gr.Interface( | |
fn=analyze_audio, | |
inputs=gr.Audio( | |
sources=["microphone", "upload"], | |
type="filepath", | |
label="Record or Upload Audio" | |
), | |
outputs=[ | |
gr.Textbox(label="Detected Emotion"), | |
gr.Textbox(label="Confidence Score") | |
], | |
title="Speech Emotion Analyzer", | |
description=""" | |
This tool analyzes the emotional tone of speech, detecting emotions like: | |
- Happy/Joyful | |
- Sad/Melancholic | |
- Angry/Frustrated | |
- Anxious/Fearful | |
- Surprised/Astonished | |
- Warm/Affectionate | |
- Neutral/Calm | |
""", | |
theme=gr.themes.Base() | |
) | |
if __name__ == "__main__": | |
interface.launch( | |
debug=True, | |
server_name="0.0.0.0", | |
server_port=7860, | |
share=True | |
) |