import gradio as gr from transformers import pipeline import torch def load_models(): """Load and verify models with error checking""" try: device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {device}") # Load Whisper for speech recognition transcriber = pipeline( "automatic-speech-recognition", model="openai/whisper-tiny", device=device ) # Load emotion recognition model emotion_analyzer = pipeline( "text-classification", model="j-hartmann/emotion-english-distilroberta-base", device=device ) return transcriber, emotion_analyzer except Exception as e: print(f"Error loading models: {str(e)}") return None, None def analyze_audio(audio_path): """ Analyze audio for emotional content with detailed output """ if audio_path is None: return "Please provide audio", "No audio detected" try: # Load models transcriber, emotion_analyzer = load_models() if transcriber is None or emotion_analyzer is None: return "Error loading models", "Model initialization failed" # Transcribe speech try: result = transcriber(audio_path) text = result["text"] if not text.strip(): return "No speech detected", "Empty transcription" print(f"Transcribed text: {text}") # Debug output except Exception as e: return f"Transcription error: {str(e)}", "Failed to process audio" # Analyze emotion try: emotion_result = emotion_analyzer(text)[0] emotion = emotion_result["label"].title() # Capitalize emotion confidence = f"{emotion_result['score']:.2%}" # Map technical emotion labels to more natural descriptions emotion_mapping = { "Joy": "Happy/Joyful", "Sadness": "Sad/Melancholic", "Anger": "Angry/Frustrated", "Fear": "Anxious/Fearful", "Surprise": "Surprised/Astonished", "Love": "Warm/Affectionate", "Neutral": "Neutral/Calm" } display_emotion = emotion_mapping.get(emotion, emotion) return display_emotion, confidence except Exception as e: return f"Emotion analysis error: {str(e)}", "Analysis failed" except Exception as e: return f"Unexpected error: {str(e)}", "Process failed" # Create interface with better labeling interface = gr.Interface( fn=analyze_audio, inputs=gr.Audio( sources=["microphone", "upload"], type="filepath", label="Record or Upload Audio" ), outputs=[ gr.Textbox(label="Detected Emotion"), gr.Textbox(label="Confidence Score") ], title="Speech Emotion Analyzer", description=""" This tool analyzes the emotional tone of speech, detecting emotions like: - Happy/Joyful - Sad/Melancholic - Angry/Frustrated - Anxious/Fearful - Surprised/Astonished - Warm/Affectionate - Neutral/Calm """, theme=gr.themes.Base() ) if __name__ == "__main__": interface.launch( debug=True, server_name="0.0.0.0", server_port=7860, share=True )