import gradio as gr
import whisper
from transformers import pipeline
import librosa
import soundfile as sf
import tempfile

model = whisper.load_model("base")

sentiment_analysis = pipeline(
  "sentiment-analysis",
  framework="pt",
  model="SamLowe/roberta-base-go_emotions"
)

def analyze_sentiment(text):
  results = sentiment_analysis(text)
  sentiment_results = {
    result['label']: result['score'] for result in results
  }
  return sentiment_results

def get_sentiment_emoji(sentiment):
  # Define the mapping of sentiments to emojis
  emoji_mapping = {
    "disappointment": "😞",
    "sadness": "😢",
    "annoyance": "😠",
    "neutral": "😐",
    "disapproval": "👎",
    "realization": "😮",
    "nervousness": "😬",
    "approval": "👍",
    "joy": "😄",
    "anger": "😡",
    "embarrassment": "😳",
    "caring": "🤗",
    "remorse": "😔",
    "disgust": "🤢",
    "grief": "😥",
    "confusion": "😕",
    "relief": "😌",
    "desire": "😍",
    "admiration": "😌",
    "optimism": "😊",
    "fear": "😨",
    "love": "❤️",
    "excitement": "🎉",
    "curiosity": "🤔",
    "amusement": "😄",
    "surprise": "😲",
    "gratitude": "🙏",
    "pride": "🦁"
  }
  return emoji_mapping.get(sentiment, "")

def display_sentiment_results(sentiment_results, option):
    sentiment_text = ""
    for sentiment, score in sentiment_results.items():
        emoji = get_sentiment_emoji(sentiment)
        score_percentage = score * 100  # Corrected indentation
        if option == "Sentiment Only":
            sentiment_text += f"{sentiment} {emoji}\n"
        elif option == "Sentiment + Score":
            sentiment_text += f"{sentiment} {emoji}: {score_percentage:.2f}%\n"
    return sentiment_text

def load_and_resample_audio(file_path, target_sample_rate=16000):
    audio, _ = librosa.load(file_path, sr=target_sample_rate)
    temp_file_path = '/tmp/resampled_audio.wav'
    sf.write(temp_file_path, audio, target_sample_rate)
    return temp_file_path

def inference(audio_file_path, sentiment_option):
    resampled_audio_path = load_and_resample_audio(audio_file_path)
    audio = whisper.load_audio(resampled_audio_path)
    audio = whisper.pad_or_trim(audio)

    mel = whisper.log_mel_spectrogram(audio).to(model.device)
    _, probs = model.detect_language(mel)
    lang = max(probs, key=probs.get)

    options = whisper.DecodingOptions(fp16=False)
    result = whisper.decode(model, mel, options)

    sentiment_results = analyze_sentiment(result.text)
    sentiment_output = display_sentiment_results(sentiment_results, sentiment_option)

    return lang.upper(), result.text, sentiment_output

title = "🎤 Gradio UI"
description = "we have deployed our model on Gradio"

block = gr.Blocks()

with block:
    gr.Markdown("# Mood Reader 🕵️‍♂️")
    gr.Markdown("Your Words Whisper 🤫, But Emotions Shout 📢 – Discover What's Truly Behind Every Sentence with Mood Reader 🕵️‍♂️💬")
    with gr.Column():
        audio = gr.Audio(label="Input Audio", type="filepath")
        sentiment_option = gr.Radio(choices=["Sentiment Only", "Sentiment + Score"], label="Select an option")
        transcribe_btn = gr.Button("Transcribe")
    lang_str = gr.Textbox(label="Language")
    text = gr.Textbox(label="Transcription")
    sentiment_output = gr.Textbox(label="Sentiment Analysis Results")

    transcribe_btn.click(
        inference,
        inputs=[audio, sentiment_option],
        outputs=[lang_str, text, sentiment_output]
    )

block.launch()