import os import re import streamlit as st import google.generativeai as genai from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer from db import get_entry_by_index, get_dataset_summary # Configure Gemini API key GEMINI_API_KEY = os.getenv("gemini_api") if GEMINI_API_KEY: genai.configure(api_key=GEMINI_API_KEY) else: st.error("⚠️ Google API key is missing! Set it in Hugging Face Secrets.") # Load pre-trained sentiment analysis model MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment" try: tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer) except Exception as e: st.error(f"❌ Error loading sentiment model: {e}") # Load Topic Extraction Model try: topic_pipeline = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") except Exception as e: st.error(f"❌ Error loading topic extraction model: {e}") # Predefined topic labels for classification TOPIC_LABELS = [ "Technology 💻", "Politics 🏛️", "Business 💼", "Sports ⚽", "Entertainment 🎭", "Health 🩺", "Science 🔬", "Education 🎓", "Finance 💰", "Travel ✈️", "Food 🍔", "Environment 🌱", "Culture 🎎", "History 🏺", "Art 🎨", "Literature 📚", "Automotive 🚗", "Law ⚖️", "Music 🎵", "Movies 🎬" ] def analyze_sentiment(text): try: result = sentiment_pipeline(text)[0] label = result['label'] score = result['score'] sentiment_mapping = { "LABEL_0": "😞 Negative", "LABEL_1": "😐 Neutral", "LABEL_2": "😄 Positive" } return sentiment_mapping.get(label, "Unknown"), score except Exception as e: return f"Error analyzing sentiment: {e}", None def extract_topic(text): try: result = topic_pipeline(text, TOPIC_LABELS) top_topic = result["labels"][0] confidence = result["scores"][0] return top_topic, confidence except Exception as e: return f"Error extracting topic: {e}", None # Helper: Extract entry index from prompt (e.g., "data entry 1" yields index 0) def extract_entry_index(prompt): match = re.search(r'(data entry|entry)\s+(\d+)', prompt, re.IGNORECASE) if match: index = int(match.group(2)) - 1 # convert to 0-based index return index return None # Helper: Detect if the query is asking for a specific dataset entry. def is_entry_query(prompt): index = extract_entry_index(prompt) if index is not None: return True, index return False, None # Helper: Detect if the query is a basic dataset question. def is_basic_dataset_question(prompt): lower = prompt.lower() keywords = ["dataset summary", "total tweets", "sentiment distribution", "overall dataset", "data overview", "data summary"] return any(keyword in lower for keyword in keywords) def chatbot_response(user_prompt): if not user_prompt: return None, None, None, None, None try: # If the query is a basic dataset question, fetch summary from MongoDB. if is_basic_dataset_question(user_prompt): summary = get_dataset_summary() ai_response = "Dataset Summary:\n" + summary sentiment_label, sentiment_confidence = analyze_sentiment(summary) topic_label, topic_confidence = extract_topic(summary) return ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence # If the query is about a specific entry in the dataset... entry_query, index = is_entry_query(user_prompt) if entry_query: entry = get_entry_by_index(index) if entry is None: return "❌ No entry found for the requested index.", None, None, None, None # Retrieve fields from the document entry_text = entry.get("text", "No text available.") entry_user = entry.get("user", "Unknown") entry_date = entry.get("date", "Unknown") # Build a static response message with new lines for each field. ai_response = ( "Let's break down this MongoDB entry:\n\n" f"**Tweet:** {entry_text}\n\n" f"**User:** {entry_user}\n\n" f"**Date:** {entry_date}" ) sentiment_label, sentiment_confidence = analyze_sentiment(entry_text) topic_label, topic_confidence = extract_topic(entry_text) return ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence # For other queries, use the generative model. model_gen = genai.GenerativeModel("gemini-1.5-pro") ai_response_obj = model_gen.generate_content(user_prompt) ai_response = ai_response_obj.text sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt) topic_label, topic_confidence = extract_topic(user_prompt) return ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence except Exception as e: return f"❌ Error: {e}", None, None, None, None