Spaces:

sharangrav24
/

SentimentAnalysis

Sleeping

App Files Files Community

KrSharangrav commited on Mar 15

Commit

84326e0

1 Parent(s): 4ec2156

change in logic

Browse files

Files changed (3) hide show

app.py +7 -5
chatbot.py +41 -26
db.py +18 -1

app.py CHANGED Viewed

@@ -3,15 +3,17 @@ import pandas as pd
 from db import insert_data_if_empty, get_mongo_client
 from chatbot import chatbot_response
-# Ensure that historical data is inserted if not already present.
 insert_data_if_empty()
-# Connect to MongoDB (optional: can be used for additional visualizations).
 collection = get_mongo_client()
-st.subheader("💬 Chatbot with Analysis for MongoDB Entries")
-# Updated hint: ask for analysis of a specific data entry.
-user_prompt = st.text_area("Ask me something (e.g., 'Provide analysis for the data entry 1 in the dataset'):")
 if st.button("Get AI Response"):
     ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence = chatbot_response(user_prompt)

 from db import insert_data_if_empty, get_mongo_client
 from chatbot import chatbot_response
+# Ensure historical data is inserted into MongoDB if not already present.
 insert_data_if_empty()
+# Connect to MongoDB (optional: for additional visualizations)
 collection = get_mongo_client()
+st.subheader("💬 Chatbot with Dataset Analysis")
+# Updated hint to include examples for basic questions and entry queries.
+user_prompt = st.text_area(
+    "Ask me something (e.g., 'Provide analysis for data entry 1 in the dataset' or 'What is the dataset summary?'):"
+)
 if st.button("Get AI Response"):
     ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence = chatbot_response(user_prompt)

chatbot.py CHANGED Viewed

@@ -3,7 +3,7 @@ import re
 import streamlit as st
 import google.generativeai as genai
 from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
-from db import get_entry_by_index  # For fetching a specific entry from MongoDB
 # Configure Gemini API key
 GEMINI_API_KEY = os.getenv("gemini_api")
@@ -56,55 +56,70 @@ def extract_topic(text):
     except Exception as e:
         return f"Error extracting topic: {e}", None
-# Helper: extract an entry index from a query string.
-# For example, "data entry 1" or "entry 2" will return index 0 or 1 respectively.
 def extract_entry_index(prompt):
     match = re.search(r'(data entry|entry)\s+(\d+)', prompt, re.IGNORECASE)
     if match:
-        index = int(match.group(2)) - 1  # Convert to 0-based index
         return index
     return None
 def chatbot_response(user_prompt):
     if not user_prompt:
         return None, None, None, None, None
     try:
-        # Check if the user query asks for a specific dataset entry.
-        entry_index = extract_entry_index(user_prompt)
-        if entry_index is not None:
-            # Fetch the requested entry from MongoDB.
-            entry = get_entry_by_index(entry_index)
             if entry is None:
                 return "❌ No entry found for the requested index.", None, None, None, None
-            # Extract the required fields.
             entry_text = entry.get("text", "No text available.")
             entry_user = entry.get("user", "Unknown")
             entry_date = entry.get("date", "Unknown")
-            # Build a static response message with the desired formatting.
             ai_response = (
                 "Let's break down this tweet-like MongoDB entry:\n\n"
                 f"Tweet: {entry_text}\n"
                 f"User: {entry_user}\n"
                 f"Date: {entry_date}"
             )
-            # Run sentiment and topic analysis on the entry's text.
             sentiment_label, sentiment_confidence = analyze_sentiment(entry_text)
             topic_label, topic_confidence = extract_topic(entry_text)
-            return ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence
-        else:
-            # For all other queries, use the generative model flow.
-            model_gen = genai.GenerativeModel("gemini-1.5-pro")
-            ai_response_obj = model_gen.generate_content(user_prompt)
-            ai_response = ai_response_obj.text
-            # Perform sentiment and topic analysis on the user prompt.
-            sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
-            topic_label, topic_confidence = extract_topic(user_prompt)
             return ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence
     except Exception as e:
         return f"❌ Error: {e}", None, None, None, None

 import streamlit as st
 import google.generativeai as genai
 from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
+from db import get_entry_by_index, get_dataset_summary
 # Configure Gemini API key
 GEMINI_API_KEY = os.getenv("gemini_api")
     except Exception as e:
         return f"Error extracting topic: {e}", None
+# Helper: Extract entry index from prompt (e.g., "data entry 1" yields index 0)
 def extract_entry_index(prompt):
     match = re.search(r'(data entry|entry)\s+(\d+)', prompt, re.IGNORECASE)
     if match:
+        index = int(match.group(2)) - 1  # convert to 0-based index
         return index
     return None
+# Helper: Detect if the query is asking for a specific dataset entry.
+def is_entry_query(prompt):
+    index = extract_entry_index(prompt)
+    if index is not None:
+        return True, index
+    return False, None
+# Helper: Detect if the query is a basic dataset question.
+# Examples: "What is the dataset summary?", "Show me the sentiment distribution", etc.
+def is_basic_dataset_question(prompt):
+    lower = prompt.lower()
+    keywords = ["dataset summary", "total tweets", "sentiment distribution", "overall dataset", "data overview", "data summary"]
+    return any(keyword in lower for keyword in keywords)
 def chatbot_response(user_prompt):
     if not user_prompt:
         return None, None, None, None, None
     try:
+        # If the query is a basic dataset question, fetch summary from MongoDB.
+        if is_basic_dataset_question(user_prompt):
+            summary = get_dataset_summary()
+            ai_response = "Dataset Summary:\n" + summary
+            # Run analysis on the summary text
+            sentiment_label, sentiment_confidence = analyze_sentiment(summary)
+            topic_label, topic_confidence = extract_topic(summary)
+            return ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence
+        # If the query is about a specific entry in the dataset...
+        entry_query, index = is_entry_query(user_prompt)
+        if entry_query:
+            entry = get_entry_by_index(index)
             if entry is None:
                 return "❌ No entry found for the requested index.", None, None, None, None
+            # Retrieve fields from the document
             entry_text = entry.get("text", "No text available.")
             entry_user = entry.get("user", "Unknown")
             entry_date = entry.get("date", "Unknown")
+            # Build a static response message with the required format
             ai_response = (
                 "Let's break down this tweet-like MongoDB entry:\n\n"
                 f"Tweet: {entry_text}\n"
                 f"User: {entry_user}\n"
                 f"Date: {entry_date}"
             )
             sentiment_label, sentiment_confidence = analyze_sentiment(entry_text)
             topic_label, topic_confidence = extract_topic(entry_text)
             return ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence
+        # For other queries, use the generative model (this branch may be slower).
+        model_gen = genai.GenerativeModel("gemini-1.5-pro")
+        ai_response_obj = model_gen.generate_content(user_prompt)
+        ai_response = ai_response_obj.text
+        sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
+        topic_label, topic_confidence = extract_topic(user_prompt)
+        return ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence
     except Exception as e:
         return f"❌ Error: {e}", None, None, None, None

db.py CHANGED Viewed

@@ -27,9 +27,26 @@ def insert_data_if_empty():
         except Exception as e:
             print(f"❌ Error loading dataset: {e}")
 def get_entry_by_index(index=0):
     collection = get_mongo_client()
-    # Fetch the document by skipping "index" entries.
     doc_cursor = collection.find({}, {"_id": 0}).skip(index).limit(1)
     docs = list(doc_cursor)
     if docs:

         except Exception as e:
             print(f"❌ Error loading dataset: {e}")
+def get_dataset_summary():
+    collection = get_mongo_client()
+    pipeline = [
+        {"$group": {"_id": "$target", "count": {"$sum": 1}}}
+    ]
+    results = list(collection.aggregate(pipeline))
+    mapping = {"0": "Negative", "2": "Neutral", "4": "Positive"}
+    summary_parts = []
+    total = 0
+    for doc in results:
+        target = str(doc["_id"])
+        count = doc["count"]
+        total += count
+        label = mapping.get(target, target)
+        summary_parts.append(f"{label}: {count}")
+    summary = f"Total tweets: {total}. " + ", ".join(summary_parts) + "."
+    return summary
 def get_entry_by_index(index=0):
     collection = get_mongo_client()
     doc_cursor = collection.find({}, {"_id": 0}).skip(index).limit(1)
     docs = list(doc_cursor)
     if docs: