Spaces:

sharangrav24
/

SentimentAnalysis

Sleeping

File size: 5,560 Bytes

import os
import re
import streamlit as st
import google.generativeai as genai
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from db import get_entry_by_index  # Helper to fetch a document by index

# Configure Gemini API key
GEMINI_API_KEY = os.getenv("gemini_api")
if GEMINI_API_KEY:
    genai.configure(api_key=GEMINI_API_KEY)
else:
    st.error("⚠️ Google API key is missing! Set it in Hugging Face Secrets.")

# Load pre-trained sentiment analysis model
MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment"
try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
    sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
except Exception as e:
    st.error(f"❌ Error loading sentiment model: {e}")

# Load Topic Extraction Model
try:
    topic_pipeline = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
except Exception as e:
    st.error(f"❌ Error loading topic extraction model: {e}")

# Predefined topic labels for classification
TOPIC_LABELS = [
    "Technology", "Politics", "Business", "Sports", "Entertainment",
    "Health", "Science", "Education", "Finance", "Travel", "Food"
]

# Function to analyze sentiment using the pre-trained model
def analyze_sentiment(text):
    try:
        sentiment_result = sentiment_pipeline(text)[0]
        label = sentiment_result['label']
        score = sentiment_result['score']
        sentiment_mapping = {
            "LABEL_0": "Negative",
            "LABEL_1": "Neutral",
            "LABEL_2": "Positive"
        }
        return sentiment_mapping.get(label, "Unknown"), score
    except Exception as e:
        return f"Error analyzing sentiment: {e}", None

# Function to extract topic using zero-shot classification
def extract_topic(text):
    try:
        topic_result = topic_pipeline(text, TOPIC_LABELS)
        top_topic = topic_result["labels"][0]
        confidence = topic_result["scores"][0]
        return top_topic, confidence
    except Exception as e:
        return f"Error extracting topic: {e}", None

# Helper to detect if the user asks for a specific entry.
# Searches for patterns like "data entry 1" or "entry 2" (case-insensitive).
def get_entry_index(prompt):
    match = re.search(r'(?:data entry|entry)\s*(\d+)', prompt.lower())
    if match:
        # Convert to 0-indexed value.
        return int(match.group(1)) - 1
    return None

# Helper to filter the generative response.
# We expect the response to contain:
#   "Let's break down this tweet-like MongoDB entry:" followed by text,
#   then "Conclusion:" followed by text.
# We remove any extra parts and remove the header "Conclusion:".
def filter_ai_response(ai_text):
    breakdown_marker = "Let's break down this tweet-like MongoDB entry:"
    conclusion_marker = "Conclusion:"
    if breakdown_marker in ai_text and conclusion_marker in ai_text:
        # Split into two parts.
        parts = ai_text.split(breakdown_marker, 1)[1]
        breakdown_part, conclusion_part = parts.split(conclusion_marker, 1)
        # Rebuild output with the breakdown section and the conclusion content (without the header)
        filtered = breakdown_marker + "\n" + breakdown_part.strip() + "\n" + conclusion_part.strip()
        return filtered
    else:
        # If the markers aren't found, return the original text.
        return ai_text

# Main function to generate AI response along with sentiment and category analysis.
# If the prompt asks for a specific entry, fetch its "text" from MongoDB and build a custom prompt.
def chatbot_response(user_prompt):
    if not user_prompt:
        return None, None, None, None, None

    try:
        entry_index = get_entry_index(user_prompt)
        if entry_index is not None:
            entry = get_entry_by_index(entry_index)
            if entry is None:
                return "❌ No entry found for the requested index.", None, None, None, None
            entry_text = entry.get("text", "No text available.")
            # Build a prompt instructing the Gemini model to provide analysis in a structured format.
            combined_prompt = (
                f"Provide analysis for the following MongoDB entry:\n\n"
                f"{entry_text}\n\n"
                "Please respond in the following format:\n"
                "Let's break down this tweet-like MongoDB entry:\n[Your detailed analysis here]\n"
                "Conclusion:\n[Your conclusion here]"
            )
            # Run sentiment and topic analysis on the entry's text.
            sentiment_label, sentiment_confidence = analyze_sentiment(entry_text)
            topic_label, topic_confidence = extract_topic(entry_text)
        else:
            # If not an entry query, use the user prompt directly.
            combined_prompt = user_prompt
            sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
            topic_label, topic_confidence = extract_topic(user_prompt)

        # Generate AI response using Gemini.
        model_gen = genai.GenerativeModel("gemini-1.5-pro")
        ai_response = model_gen.generate_content(combined_prompt)
        # Filter the generative response to show only the required sections.
        filtered_response = filter_ai_response(ai_response.text)
        return filtered_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence
    except Exception as e:
        return f"❌ Error: {e}", None, None, None, None