import gradio as gr
from transformers import pipeline
import numpy as np

def create_speech_analyzer():
    """Initialize the speech-to-text and sentiment analysis models"""
    try:
        # Load Faster Whisper for improved speech recognition
        transcriber = pipeline(
            "automatic-speech-recognition", 
            model="openai/whisper-small",  # Using smaller model for faster processing
            max_new_tokens=128
        )
        
        # Load RoBERTa model for more nuanced sentiment analysis
        sentiment_model = pipeline(
            "sentiment-analysis",
            model="cardiffnlp/twitter-roberta-base-sentiment-latest"
        )
        
        return transcriber, sentiment_model
    
    except Exception as e:
        raise RuntimeError(f"Error loading models: {str(e)}")

def analyze_speech(audio_file):
    """
    Analyze speech for transcription and emotional content.
    Returns both the transcription and detailed sentiment analysis.
    """
    try:
        transcriber, sentiment_model = create_speech_analyzer()
        
        # Get transcription
        transcription = transcriber(audio_file)["text"]
        
        # RoBERTa provides more detailed sentiment analysis
        sentiment_result = sentiment_model(transcription)[0]
        
        # Map sentiment labels to more readable format
        sentiment_mapping = {
            'LABEL_0': 'Negative',
            'LABEL_1': 'Neutral',
            'LABEL_2': 'Positive'
        }
        
        # Get the sentiment label and score
        sentiment = sentiment_mapping.get(sentiment_result['label'], sentiment_result['label'])
        confidence = sentiment_result['score']
        
        # Analyze sentiment of smaller chunks for longer texts
        if len(transcription.split()) > 50:
            # Split into sentences or chunks
            chunks = transcription.split('.')
            chunk_sentiments = [sentiment_model(chunk)[0] for chunk in chunks if len(chunk.strip()) > 0]
            
            # Calculate average sentiment
            avg_sentiment = np.mean([s['score'] for s in chunk_sentiments])
            sentiment_variation = np.std([s['score'] for s in chunk_sentiments])
            
            detailed_analysis = (
                f"Overall Sentiment: {sentiment} ({confidence:.2%})\n"
                f"Sentiment Stability: {1 - sentiment_variation:.2%}\n"
                f"Text chunks analyzed: {len(chunk_sentiments)}"
            )
        else:
            detailed_analysis = f"Sentiment: {sentiment} ({confidence:.2%})"
        
        return {
            "transcription": transcription,
            "sentiment": sentiment,
            "analysis": detailed_analysis
        }
        
    except Exception as e:
        return {
            "transcription": f"Error in processing: {str(e)}",
            "sentiment": "ERROR",
            "analysis": "Analysis failed"
        }

def create_interface():
    """Create and configure the Gradio interface"""
    return gr.Interface(
        fn=analyze_speech,
        inputs=gr.Audio(
            sources=["microphone", "upload"],
            type="filepath",
            label="Upload or Record Audio"
        ),
        outputs=[
            gr.Textbox(label="Transcription", lines=3),
            gr.Textbox(label="Sentiment"),
            gr.Textbox(label="Detailed Analysis", lines=3)
        ],
        title="Advanced Speech Sentiment Analyzer",
        description="""
        This tool performs advanced sentiment analysis on speech using RoBERTa.
        It provides detailed sentiment analysis for longer texts and handles
        both audio uploads and microphone recordings.
        """,
        theme=gr.themes.Soft(),
        examples=[]
    )

def main():
    """Launch the application"""
    interface = create_interface()
    interface.launch(
        share=True,
        debug=True,
        server_name="0.0.0.0"
    )

if __name__ == "__main__":
    main()