import gradio as gr from transformers import pipeline import numpy as np def create_speech_analyzer(): """Initialize the speech-to-text and sentiment analysis models""" try: # Load Faster Whisper for improved speech recognition transcriber = pipeline( "automatic-speech-recognition", model="openai/whisper-small", # Using smaller model for faster processing max_new_tokens=128 ) # Load RoBERTa model for more nuanced sentiment analysis sentiment_model = pipeline( "sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest" ) return transcriber, sentiment_model except Exception as e: raise RuntimeError(f"Error loading models: {str(e)}") def analyze_speech(audio_file): """ Analyze speech for transcription and emotional content. Returns both the transcription and detailed sentiment analysis. """ try: transcriber, sentiment_model = create_speech_analyzer() # Get transcription transcription = transcriber(audio_file)["text"] # RoBERTa provides more detailed sentiment analysis sentiment_result = sentiment_model(transcription)[0] # Map sentiment labels to more readable format sentiment_mapping = { 'LABEL_0': 'Negative', 'LABEL_1': 'Neutral', 'LABEL_2': 'Positive' } # Get the sentiment label and score sentiment = sentiment_mapping.get(sentiment_result['label'], sentiment_result['label']) confidence = sentiment_result['score'] # Analyze sentiment of smaller chunks for longer texts if len(transcription.split()) > 50: # Split into sentences or chunks chunks = transcription.split('.') chunk_sentiments = [sentiment_model(chunk)[0] for chunk in chunks if len(chunk.strip()) > 0] # Calculate average sentiment avg_sentiment = np.mean([s['score'] for s in chunk_sentiments]) sentiment_variation = np.std([s['score'] for s in chunk_sentiments]) detailed_analysis = ( f"Overall Sentiment: {sentiment} ({confidence:.2%})\n" f"Sentiment Stability: {1 - sentiment_variation:.2%}\n" f"Text chunks analyzed: {len(chunk_sentiments)}" ) else: detailed_analysis = f"Sentiment: {sentiment} ({confidence:.2%})" return { "transcription": transcription, "sentiment": sentiment, "analysis": detailed_analysis } except Exception as e: return { "transcription": f"Error in processing: {str(e)}", "sentiment": "ERROR", "analysis": "Analysis failed" } def create_interface(): """Create and configure the Gradio interface""" return gr.Interface( fn=analyze_speech, inputs=gr.Audio( sources=["microphone", "upload"], type="filepath", label="Upload or Record Audio" ), outputs=[ gr.Textbox(label="Transcription", lines=3), gr.Textbox(label="Sentiment"), gr.Textbox(label="Detailed Analysis", lines=3) ], title="Advanced Speech Sentiment Analyzer", description=""" This tool performs advanced sentiment analysis on speech using RoBERTa. It provides detailed sentiment analysis for longer texts and handles both audio uploads and microphone recordings. """, theme=gr.themes.Soft(), examples=[] ) def main(): """Launch the application""" interface = create_interface() interface.launch( share=True, debug=True, server_name="0.0.0.0" ) if __name__ == "__main__": main()