KrSharangrav commited on
Commit
867c886
·
1 Parent(s): be89ae1

changes done post new csv insertion

Browse files
Files changed (2) hide show
  1. app.py +2 -2
  2. chatbot.py +46 -36
app.py CHANGED
@@ -6,10 +6,10 @@ from chatbot import chatbot_response
6
  # Insert historical data into MongoDB if not already present
7
  insert_data_if_empty()
8
 
9
- # Connect to MongoDB (available for further extension or analysis)
10
  collection = get_mongo_client()
11
 
12
- st.subheader("💬 Chatbot with Sentiment, Topic Analysis, and Dataset Insights")
13
  user_prompt = st.text_area("Ask me something:")
14
 
15
  if st.button("Get AI Response"):
 
6
  # Insert historical data into MongoDB if not already present
7
  insert_data_if_empty()
8
 
9
+ # Connect to MongoDB (for potential further use)
10
  collection = get_mongo_client()
11
 
12
+ st.subheader("💬 Chatbot with Sentiment & Topic Analysis")
13
  user_prompt = st.text_area("Ask me something:")
14
 
15
  if st.button("Get AI Response"):
chatbot.py CHANGED
@@ -2,6 +2,8 @@ import os
2
  import streamlit as st
3
  import google.generativeai as genai
4
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
 
 
5
 
6
  # Configure Gemini API key
7
  GEMINI_API_KEY = os.getenv("gemini_api")
@@ -56,45 +58,53 @@ def extract_topic(text):
56
  except Exception as e:
57
  return f"Error extracting topic: {e}", None
58
 
59
- # Function to generate AI response along with sentiment and topic analysis.
60
- # Also, if the query relates to the dataset, fetch statistics from MongoDB.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  def chatbot_response(user_prompt):
62
  if not user_prompt:
63
  return None, None, None, None, None
64
 
65
- try:
66
- # Generate AI response using Gemini
67
- model_gen = genai.GenerativeModel("gemini-1.5-pro")
68
- ai_response = model_gen.generate_content(user_prompt)
69
-
70
- # Perform sentiment analysis on the user prompt
71
  sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
72
-
73
- # Perform topic extraction on the user prompt
74
  topic_label, topic_confidence = extract_topic(user_prompt)
75
-
76
- # If the prompt seems related to the dataset, get MongoDB statistics.
77
- if any(keyword in user_prompt.lower() for keyword in ["sentiment140", "dataset", "historical", "mongodb", "stored data"]):
78
- from db import get_mongo_client
79
- collection = get_mongo_client()
80
- # Aggregate counts by the 'target' field (assumed to be in the CSV)
81
- pipeline = [
82
- {"$group": {"_id": "$target", "count": {"$sum": 1}}}
83
- ]
84
- results = list(collection.aggregate(pipeline))
85
- sentiment_map = {0: "Negative", 2: "Neutral", 4: "Positive"}
86
- stats_str = ""
87
- total = 0
88
- for r in results:
89
- key = sentiment_map.get(r["_id"], r["_id"])
90
- count = r["count"]
91
- total += count
92
- stats_str += f"{key}: {count}\n"
93
- stats_str += f"Total records: {total}"
94
- ai_response_text = ai_response.text + "\n\nDataset Information:\n" + stats_str
95
- else:
96
- ai_response_text = ai_response.text
97
-
98
- return ai_response_text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
99
- except Exception as e:
100
- return f"❌ Error: {e}", None, None, None, None
 
2
  import streamlit as st
3
  import google.generativeai as genai
4
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
5
+ import pandas as pd
6
+ from db import get_mongo_client
7
 
8
  # Configure Gemini API key
9
  GEMINI_API_KEY = os.getenv("gemini_api")
 
58
  except Exception as e:
59
  return f"Error extracting topic: {e}", None
60
 
61
+ # Function to determine if the user's query is about the dataset
62
+ def is_dataset_query(text):
63
+ keywords = ["dataset", "data", "historical", "csv", "stored"]
64
+ text_lower = text.lower()
65
+ for keyword in keywords:
66
+ if keyword in text_lower:
67
+ return True
68
+ return False
69
+
70
+ # Function to retrieve insights from the dataset stored in MongoDB
71
+ def get_dataset_insights():
72
+ try:
73
+ collection = get_mongo_client()
74
+ data = list(collection.find({}, {"_id": 0}))
75
+ if not data:
76
+ return "The dataset in MongoDB is empty."
77
+ df = pd.DataFrame(data)
78
+ # Map the sentiment labels from sentiment140.csv: 0 -> Negative, 2 -> Neutral, 4 -> Positive.
79
+ sentiment_mapping = {0: "Negative", 2: "Neutral", 4: "Positive"}
80
+ if "target" in df.columns:
81
+ df['sentiment_label'] = df['target'].apply(lambda x: sentiment_mapping.get(int(x), "Unknown"))
82
+ summary = df['sentiment_label'].value_counts().to_dict()
83
+ summary_str = ", ".join([f"{k}: {v}" for k, v in summary.items()])
84
+ return f"The dataset sentiment distribution is: {summary_str}."
85
+ else:
86
+ return "The dataset does not have a 'target' field."
87
+ except Exception as e:
88
+ return f"Error retrieving dataset insights: {e}"
89
+
90
+ # Function to generate AI response along with sentiment and topic analysis
91
  def chatbot_response(user_prompt):
92
  if not user_prompt:
93
  return None, None, None, None, None
94
 
95
+ # Check if the query is about the dataset
96
+ if is_dataset_query(user_prompt):
97
+ dataset_insights = get_dataset_insights()
 
 
 
98
  sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
 
 
99
  topic_label, topic_confidence = extract_topic(user_prompt)
100
+ return dataset_insights, sentiment_label, sentiment_confidence, topic_label, topic_confidence
101
+ else:
102
+ try:
103
+ # Generate AI response using Gemini
104
+ model_gen = genai.GenerativeModel("gemini-1.5-pro")
105
+ ai_response = model_gen.generate_content(user_prompt)
106
+ sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
107
+ topic_label, topic_confidence = extract_topic(user_prompt)
108
+ return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
109
+ except Exception as e:
110
+ return f"❌ Error: {e}", None, None, None, None