Spaces:

sharangrav24
/

SentimentAnalysis

Sleeping

App Files Files Community

KrSharangrav commited on Mar 15

Commit

867c886

1 Parent(s): be89ae1

changes done post new csv insertion

Browse files

Files changed (2) hide show

app.py +2 -2
chatbot.py +46 -36

app.py CHANGED Viewed

@@ -6,10 +6,10 @@ from chatbot import chatbot_response
 # Insert historical data into MongoDB if not already present
 insert_data_if_empty()
-# Connect to MongoDB (available for further extension or analysis)
 collection = get_mongo_client()
-st.subheader("💬 Chatbot with Sentiment, Topic Analysis, and Dataset Insights")
 user_prompt = st.text_area("Ask me something:")
 if st.button("Get AI Response"):

 # Insert historical data into MongoDB if not already present
 insert_data_if_empty()
+# Connect to MongoDB (for potential further use)
 collection = get_mongo_client()
+st.subheader("💬 Chatbot with Sentiment & Topic Analysis")
 user_prompt = st.text_area("Ask me something:")
 if st.button("Get AI Response"):

chatbot.py CHANGED Viewed

@@ -2,6 +2,8 @@ import os
 import streamlit as st
 import google.generativeai as genai
 from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
 # Configure Gemini API key
 GEMINI_API_KEY = os.getenv("gemini_api")
@@ -56,45 +58,53 @@ def extract_topic(text):
     except Exception as e:
         return f"Error extracting topic: {e}", None
-# Function to generate AI response along with sentiment and topic analysis.
-# Also, if the query relates to the dataset, fetch statistics from MongoDB.
 def chatbot_response(user_prompt):
     if not user_prompt:
         return None, None, None, None, None
-    try:
-        # Generate AI response using Gemini
-        model_gen = genai.GenerativeModel("gemini-1.5-pro")
-        ai_response = model_gen.generate_content(user_prompt)
-        # Perform sentiment analysis on the user prompt
         sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
-        # Perform topic extraction on the user prompt
         topic_label, topic_confidence = extract_topic(user_prompt)
-        # If the prompt seems related to the dataset, get MongoDB statistics.
-        if any(keyword in user_prompt.lower() for keyword in ["sentiment140", "dataset", "historical", "mongodb", "stored data"]):
-            from db import get_mongo_client
-            collection = get_mongo_client()
-            # Aggregate counts by the 'target' field (assumed to be in the CSV)
-            pipeline = [
-                {"$group": {"_id": "$target", "count": {"$sum": 1}}}
-            ]
-            results = list(collection.aggregate(pipeline))
-            sentiment_map = {0: "Negative", 2: "Neutral", 4: "Positive"}
-            stats_str = ""
-            total = 0
-            for r in results:
-                key = sentiment_map.get(r["_id"], r["_id"])
-                count = r["count"]
-                total += count
-                stats_str += f"{key}: {count}\n"
-            stats_str += f"Total records: {total}"
-            ai_response_text = ai_response.text + "\n\nDataset Information:\n" + stats_str
-        else:
-            ai_response_text = ai_response.text
-        return ai_response_text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
-    except Exception as e:
-        return f"❌ Error: {e}", None, None, None, None

 import streamlit as st
 import google.generativeai as genai
 from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
+import pandas as pd
+from db import get_mongo_client
 # Configure Gemini API key
 GEMINI_API_KEY = os.getenv("gemini_api")
     except Exception as e:
         return f"Error extracting topic: {e}", None
+# Function to determine if the user's query is about the dataset
+def is_dataset_query(text):
+    keywords = ["dataset", "data", "historical", "csv", "stored"]
+    text_lower = text.lower()
+    for keyword in keywords:
+        if keyword in text_lower:
+            return True
+    return False
+# Function to retrieve insights from the dataset stored in MongoDB
+def get_dataset_insights():
+    try:
+        collection = get_mongo_client()
+        data = list(collection.find({}, {"_id": 0}))
+        if not data:
+            return "The dataset in MongoDB is empty."
+        df = pd.DataFrame(data)
+        # Map the sentiment labels from sentiment140.csv: 0 -> Negative, 2 -> Neutral, 4 -> Positive.
+        sentiment_mapping = {0: "Negative", 2: "Neutral", 4: "Positive"}
+        if "target" in df.columns:
+            df['sentiment_label'] = df['target'].apply(lambda x: sentiment_mapping.get(int(x), "Unknown"))
+            summary = df['sentiment_label'].value_counts().to_dict()
+            summary_str = ", ".join([f"{k}: {v}" for k, v in summary.items()])
+            return f"The dataset sentiment distribution is: {summary_str}."
+        else:
+            return "The dataset does not have a 'target' field."
+    except Exception as e:
+        return f"Error retrieving dataset insights: {e}"
+# Function to generate AI response along with sentiment and topic analysis
 def chatbot_response(user_prompt):
     if not user_prompt:
         return None, None, None, None, None
+    # Check if the query is about the dataset
+    if is_dataset_query(user_prompt):
+        dataset_insights = get_dataset_insights()
         sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
         topic_label, topic_confidence = extract_topic(user_prompt)
+        return dataset_insights, sentiment_label, sentiment_confidence, topic_label, topic_confidence
+    else:
+        try:
+            # Generate AI response using Gemini
+            model_gen = genai.GenerativeModel("gemini-1.5-pro")
+            ai_response = model_gen.generate_content(user_prompt)
+            sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
+            topic_label, topic_confidence = extract_topic(user_prompt)
+            return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
+        except Exception as e:
+            return f"❌ Error: {e}", None, None, None, None