Spaces:

sharangrav24
/

SentimentAnalysis

Sleeping

App Files Files Community

KrSharangrav commited on Mar 15

Commit

e332fa0

1 Parent(s): 61ca946

change in the ai response part of chatbot

Browse files

Files changed (3) hide show

app.py +3 -3
chatbot.py +25 -44
db.py +23 -2

app.py CHANGED Viewed

@@ -3,13 +3,13 @@ import pandas as pd
 from db import insert_data_if_empty, get_mongo_client
 from chatbot import chatbot_response
-# Insert historical data into MongoDB if not already present
 insert_data_if_empty()
-# Connect to MongoDB (for potential further use)
 collection = get_mongo_client()
-st.subheader("💬 Chatbot with Sentiment & Topic Analysis")
 user_prompt = st.text_area("Ask me something:")
 if st.button("Get AI Response"):

 from db import insert_data_if_empty, get_mongo_client
 from chatbot import chatbot_response
+# Insert historical data into MongoDB if not already present.
 insert_data_if_empty()
+# Connect to MongoDB (this may be used for additional visualizations if needed).
 collection = get_mongo_client()
+st.subheader("💬 Chatbot with Sentiment, Topic Analysis, and Dataset Insights")
 user_prompt = st.text_area("Ask me something:")
 if st.button("Get AI Response"):

chatbot.py CHANGED Viewed

@@ -2,8 +2,7 @@ import os
 import streamlit as st
 import google.generativeai as genai
 from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
-import pandas as pd
-from db import get_mongo_client
 # Configure Gemini API key
 GEMINI_API_KEY = os.getenv("gemini_api")
@@ -58,53 +57,35 @@ def extract_topic(text):
     except Exception as e:
         return f"Error extracting topic: {e}", None
-# Function to determine if the user's query is about the dataset
-def is_dataset_query(text):
-    keywords = ["dataset", "data", "historical", "csv", "stored"]
-    text_lower = text.lower()
-    for keyword in keywords:
-        if keyword in text_lower:
-            return True
-    return False
-# Function to retrieve insights from the dataset stored in MongoDB
-def get_dataset_insights():
-    try:
-        collection = get_mongo_client()
-        data = list(collection.find({}, {"_id": 0}))
-        if not data:
-            return "The dataset in MongoDB is empty."
-        df = pd.DataFrame(data)
-        # Map the sentiment labels from sentiment140.csv: 0 -> Negative, 2 -> Neutral, 4 -> Positive.
-        sentiment_mapping = {0: "Negative", 2: "Neutral", 4: "Positive"}
-        if "target" in df.columns:
-            df['sentiment_label'] = df['target'].apply(lambda x: sentiment_mapping.get(int(x), "Unknown"))
-            summary = df['sentiment_label'].value_counts().to_dict()
-            summary_str = ", ".join([f"{k}: {v}" for k, v in summary.items()])
-            return f"The dataset sentiment distribution is: {summary_str}."
-        else:
-            return "The dataset does not have a 'target' field."
-    except Exception as e:
-        return f"Error retrieving dataset insights: {e}"
 # Function to generate AI response along with sentiment and topic analysis
 def chatbot_response(user_prompt):
     if not user_prompt:
         return None, None, None, None, None
-    # Check if the query is about the dataset
-    if is_dataset_query(user_prompt):
-        dataset_insights = get_dataset_insights()
         sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
         topic_label, topic_confidence = extract_topic(user_prompt)
-        return dataset_insights, sentiment_label, sentiment_confidence, topic_label, topic_confidence
-    else:
-        try:
-            # Generate AI response using Gemini
-            model_gen = genai.GenerativeModel("gemini-1.5-pro")
-            ai_response = model_gen.generate_content(user_prompt)
-            sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
-            topic_label, topic_confidence = extract_topic(user_prompt)
-            return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
-        except Exception as e:
-            return f"❌ Error: {e}", None, None, None, None

 import streamlit as st
 import google.generativeai as genai
 from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
+from db import get_dataset_summary  # Import the dataset summary function
 # Configure Gemini API key
 GEMINI_API_KEY = os.getenv("gemini_api")
     except Exception as e:
         return f"Error extracting topic: {e}", None
+# Helper to check if the user query is about the dataset
+def is_dataset_query(prompt):
+    keywords = ["dataset", "data", "csv", "mongodb", "historical"]
+    return any(keyword in prompt.lower() for keyword in keywords)
 # Function to generate AI response along with sentiment and topic analysis
 def chatbot_response(user_prompt):
     if not user_prompt:
         return None, None, None, None, None
+    try:
+        # If the query seems related to the dataset, fetch summary insights.
+        if is_dataset_query(user_prompt):
+            dataset_insights = get_dataset_summary()
+            combined_prompt = (
+                f"{user_prompt}\n\nDataset Insights:\n{dataset_insights}\n\n"
+                "Provide a detailed answer that incorporates these dataset insights."
+            )
+        else:
+            combined_prompt = user_prompt
+        # Generate AI response using Gemini with the (possibly augmented) prompt.
+        model_gen = genai.GenerativeModel("gemini-1.5-pro")
+        ai_response = model_gen.generate_content(combined_prompt)
+        # Perform sentiment analysis and topic extraction on the original user prompt.
         sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
         topic_label, topic_confidence = extract_topic(user_prompt)
+        return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
+    except Exception as e:
+        return f"❌ Error: {e}", None, None, None, None

db.py CHANGED Viewed

@@ -3,13 +3,13 @@ import requests
 import io
 from pymongo import MongoClient
-# Function to connect to MongoDB
 def get_mongo_client():
     client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster")
     db = client["sentiment_db"]
     return db["tweets"]
-# Function to insert data if the collection is empty
 def insert_data_if_empty():
     collection = get_mongo_client()
     if collection.count_documents({}) == 0:
@@ -23,3 +23,24 @@ def insert_data_if_empty():
             print("✅ Data Inserted into MongoDB!")
         except Exception as e:
             print(f"❌ Error loading dataset: {e}")

 import io
 from pymongo import MongoClient
+# Function to connect to MongoDB.
 def get_mongo_client():
     client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster")
     db = client["sentiment_db"]
     return db["tweets"]
+# Function to insert data if the collection is empty.
 def insert_data_if_empty():
     collection = get_mongo_client()
     if collection.count_documents({}) == 0:
             print("✅ Data Inserted into MongoDB!")
         except Exception as e:
             print(f"❌ Error loading dataset: {e}")
+# Function to get dataset summary from MongoDB.
+def get_dataset_summary():
+    collection = get_mongo_client()
+    # Aggregate counts for each sentiment target.
+    pipeline = [
+        {"$group": {"_id": "$target", "count": {"$sum": 1}}}
+    ]
+    results = list(collection.aggregate(pipeline))
+    # Map the sentiment target values to labels.
+    mapping = {"0": "Negative", "2": "Neutral", "4": "Positive"}
+    summary_parts = []
+    total = 0
+    for doc in results:
+        target = str(doc["_id"])
+        count = doc["count"]
+        total += count
+        label = mapping.get(target, target)
+        summary_parts.append(f"{label}: {count}")
+    summary = f"Total tweets: {total}. " + ", ".join(summary_parts) + "."
+    return summary