KrSharangrav commited on
Commit
6e2dc41
Β·
1 Parent(s): e332fa0

change to integrate mongodb

Browse files
Files changed (3) hide show
  1. app.py +4 -3
  2. chatbot.py +51 -21
  3. db.py +9 -5
app.py CHANGED
@@ -6,11 +6,12 @@ from chatbot import chatbot_response
6
  # Insert historical data into MongoDB if not already present.
7
  insert_data_if_empty()
8
 
9
- # Connect to MongoDB (this may be used for additional visualizations if needed).
10
  collection = get_mongo_client()
11
 
12
- st.subheader("πŸ’¬ Chatbot with Sentiment, Topic Analysis, and Dataset Insights")
13
- user_prompt = st.text_area("Ask me something:")
 
14
 
15
  if st.button("Get AI Response"):
16
  ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence = chatbot_response(user_prompt)
 
6
  # Insert historical data into MongoDB if not already present.
7
  insert_data_if_empty()
8
 
9
+ # Connect to MongoDB (for additional visualizations if needed).
10
  collection = get_mongo_client()
11
 
12
+ st.subheader("πŸ’¬ Chatbot with Sentiment, Topic Analysis, and Dataset Entry Insights")
13
+ user_prompt = st.text_area(
14
+ "Ask me something (e.g., 'What is the sentiment and category for the first data entry in the dataset'):")
15
 
16
  if st.button("Get AI Response"):
17
  ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence = chatbot_response(user_prompt)
chatbot.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  import streamlit as st
3
  import google.generativeai as genai
4
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
5
- from db import get_dataset_summary # Import the dataset summary function
6
 
7
  # Configure Gemini API key
8
  GEMINI_API_KEY = os.getenv("gemini_api")
@@ -32,7 +32,6 @@ TOPIC_LABELS = [
32
  "Health", "Science", "Education", "Finance", "Travel", "Food"
33
  ]
34
 
35
- # Function to analyze sentiment using the pre-trained model
36
  def analyze_sentiment(text):
37
  try:
38
  sentiment_result = sentiment_pipeline(text)[0]
@@ -47,7 +46,6 @@ def analyze_sentiment(text):
47
  except Exception as e:
48
  return f"Error analyzing sentiment: {e}", None
49
 
50
- # Function to extract topic using zero-shot classification
51
  def extract_topic(text):
52
  try:
53
  topic_result = topic_pipeline(text, TOPIC_LABELS)
@@ -57,35 +55,67 @@ def extract_topic(text):
57
  except Exception as e:
58
  return f"Error extracting topic: {e}", None
59
 
60
- # Helper to check if the user query is about the dataset
61
  def is_dataset_query(prompt):
62
  keywords = ["dataset", "data", "csv", "mongodb", "historical"]
63
  return any(keyword in prompt.lower() for keyword in keywords)
64
 
65
- # Function to generate AI response along with sentiment and topic analysis
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  def chatbot_response(user_prompt):
67
  if not user_prompt:
68
  return None, None, None, None, None
69
 
70
- try:
71
- # If the query seems related to the dataset, fetch summary insights.
72
- if is_dataset_query(user_prompt):
73
- dataset_insights = get_dataset_summary()
 
 
74
  combined_prompt = (
75
- f"{user_prompt}\n\nDataset Insights:\n{dataset_insights}\n\n"
76
- "Provide a detailed answer that incorporates these dataset insights."
77
  )
 
 
 
 
 
 
78
  else:
79
- combined_prompt = user_prompt
80
 
81
- # Generate AI response using Gemini with the (possibly augmented) prompt.
82
- model_gen = genai.GenerativeModel("gemini-1.5-pro")
83
- ai_response = model_gen.generate_content(combined_prompt)
 
 
 
 
 
 
84
 
85
- # Perform sentiment analysis and topic extraction on the original user prompt.
86
- sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
87
- topic_label, topic_confidence = extract_topic(user_prompt)
88
 
89
- return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
90
- except Exception as e:
91
- return f"❌ Error: {e}", None, None, None, None
 
 
 
2
  import streamlit as st
3
  import google.generativeai as genai
4
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
5
+ from db import get_dataset_summary, get_entry_by_index
6
 
7
  # Configure Gemini API key
8
  GEMINI_API_KEY = os.getenv("gemini_api")
 
32
  "Health", "Science", "Education", "Finance", "Travel", "Food"
33
  ]
34
 
 
35
  def analyze_sentiment(text):
36
  try:
37
  sentiment_result = sentiment_pipeline(text)[0]
 
46
  except Exception as e:
47
  return f"Error analyzing sentiment: {e}", None
48
 
 
49
  def extract_topic(text):
50
  try:
51
  topic_result = topic_pipeline(text, TOPIC_LABELS)
 
55
  except Exception as e:
56
  return f"Error extracting topic: {e}", None
57
 
 
58
  def is_dataset_query(prompt):
59
  keywords = ["dataset", "data", "csv", "mongodb", "historical"]
60
  return any(keyword in prompt.lower() for keyword in keywords)
61
 
62
+ def extract_entry_index(prompt):
63
+ # Map ordinal words to indices (0-indexed)
64
+ ordinals = {
65
+ "first": 0,
66
+ "1st": 0,
67
+ "second": 1,
68
+ "2nd": 1,
69
+ "third": 2,
70
+ "3rd": 2,
71
+ "fourth": 3,
72
+ "4th": 3,
73
+ "fifth": 4,
74
+ "5th": 4,
75
+ }
76
+ for word, index in ordinals.items():
77
+ if word in prompt.lower():
78
+ return index
79
+ return None
80
+
81
  def chatbot_response(user_prompt):
82
  if not user_prompt:
83
  return None, None, None, None, None
84
 
85
+ # Check if the query is about a specific dataset entry.
86
+ entry_index = extract_entry_index(user_prompt)
87
+ if entry_index is not None:
88
+ entry_text = get_entry_by_index(entry_index)
89
+ if entry_text:
90
+ # Create a combined prompt for Gemini to generate detailed insights.
91
  combined_prompt = (
92
+ f"Analyze the following dataset entry from MongoDB:\n\n{entry_text}\n\n"
93
+ "Provide detailed insights, including sentiment analysis and category extraction."
94
  )
95
+ model_gen = genai.GenerativeModel("gemini-1.5-pro")
96
+ ai_response = model_gen.generate_content(combined_prompt)
97
+ # Analyze the entry text.
98
+ sentiment_label, sentiment_confidence = analyze_sentiment(entry_text)
99
+ topic_label, topic_confidence = extract_topic(entry_text)
100
+ return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
101
  else:
102
+ return f"❌ No entry found for index {entry_index+1}.", None, None, None, None
103
 
104
+ # Otherwise, if the query is about the dataset in general.
105
+ if is_dataset_query(user_prompt):
106
+ dataset_insights = get_dataset_summary()
107
+ combined_prompt = (
108
+ f"{user_prompt}\n\nDataset Insights:\n{dataset_insights}\n\n"
109
+ "Provide a detailed answer that incorporates these dataset insights."
110
+ )
111
+ else:
112
+ combined_prompt = user_prompt
113
 
114
+ model_gen = genai.GenerativeModel("gemini-1.5-pro")
115
+ ai_response = model_gen.generate_content(combined_prompt)
 
116
 
117
+ # Run sentiment analysis and topic extraction on the original user prompt.
118
+ sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
119
+ topic_label, topic_confidence = extract_topic(user_prompt)
120
+
121
+ return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
db.py CHANGED
@@ -3,13 +3,11 @@ import requests
3
  import io
4
  from pymongo import MongoClient
5
 
6
- # Function to connect to MongoDB.
7
  def get_mongo_client():
8
  client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster")
9
  db = client["sentiment_db"]
10
  return db["tweets"]
11
 
12
- # Function to insert data if the collection is empty.
13
  def insert_data_if_empty():
14
  collection = get_mongo_client()
15
  if collection.count_documents({}) == 0:
@@ -24,15 +22,12 @@ def insert_data_if_empty():
24
  except Exception as e:
25
  print(f"❌ Error loading dataset: {e}")
26
 
27
- # Function to get dataset summary from MongoDB.
28
  def get_dataset_summary():
29
  collection = get_mongo_client()
30
- # Aggregate counts for each sentiment target.
31
  pipeline = [
32
  {"$group": {"_id": "$target", "count": {"$sum": 1}}}
33
  ]
34
  results = list(collection.aggregate(pipeline))
35
- # Map the sentiment target values to labels.
36
  mapping = {"0": "Negative", "2": "Neutral", "4": "Positive"}
37
  summary_parts = []
38
  total = 0
@@ -44,3 +39,12 @@ def get_dataset_summary():
44
  summary_parts.append(f"{label}: {count}")
45
  summary = f"Total tweets: {total}. " + ", ".join(summary_parts) + "."
46
  return summary
 
 
 
 
 
 
 
 
 
 
3
  import io
4
  from pymongo import MongoClient
5
 
 
6
  def get_mongo_client():
7
  client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster")
8
  db = client["sentiment_db"]
9
  return db["tweets"]
10
 
 
11
  def insert_data_if_empty():
12
  collection = get_mongo_client()
13
  if collection.count_documents({}) == 0:
 
22
  except Exception as e:
23
  print(f"❌ Error loading dataset: {e}")
24
 
 
25
  def get_dataset_summary():
26
  collection = get_mongo_client()
 
27
  pipeline = [
28
  {"$group": {"_id": "$target", "count": {"$sum": 1}}}
29
  ]
30
  results = list(collection.aggregate(pipeline))
 
31
  mapping = {"0": "Negative", "2": "Neutral", "4": "Positive"}
32
  summary_parts = []
33
  total = 0
 
39
  summary_parts.append(f"{label}: {count}")
40
  summary = f"Total tweets: {total}. " + ", ".join(summary_parts) + "."
41
  return summary
42
+
43
+ def get_entry_by_index(index):
44
+ collection = get_mongo_client()
45
+ # Sort by _id (assumes insertion order), skip to the requested index, and get one document.
46
+ document = collection.find({}, {"_id": 0}).sort("_id", 1).skip(index).limit(1)
47
+ docs = list(document)
48
+ if docs:
49
+ return docs[0].get("text", None)
50
+ return None