KrSharangrav commited on
Commit
84326e0
Β·
1 Parent(s): 4ec2156

change in logic

Browse files
Files changed (3) hide show
  1. app.py +7 -5
  2. chatbot.py +41 -26
  3. db.py +18 -1
app.py CHANGED
@@ -3,15 +3,17 @@ import pandas as pd
3
  from db import insert_data_if_empty, get_mongo_client
4
  from chatbot import chatbot_response
5
 
6
- # Ensure that historical data is inserted if not already present.
7
  insert_data_if_empty()
8
 
9
- # Connect to MongoDB (optional: can be used for additional visualizations).
10
  collection = get_mongo_client()
11
 
12
- st.subheader("πŸ’¬ Chatbot with Analysis for MongoDB Entries")
13
- # Updated hint: ask for analysis of a specific data entry.
14
- user_prompt = st.text_area("Ask me something (e.g., 'Provide analysis for the data entry 1 in the dataset'):")
 
 
15
 
16
  if st.button("Get AI Response"):
17
  ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence = chatbot_response(user_prompt)
 
3
  from db import insert_data_if_empty, get_mongo_client
4
  from chatbot import chatbot_response
5
 
6
+ # Ensure historical data is inserted into MongoDB if not already present.
7
  insert_data_if_empty()
8
 
9
+ # Connect to MongoDB (optional: for additional visualizations)
10
  collection = get_mongo_client()
11
 
12
+ st.subheader("πŸ’¬ Chatbot with Dataset Analysis")
13
+ # Updated hint to include examples for basic questions and entry queries.
14
+ user_prompt = st.text_area(
15
+ "Ask me something (e.g., 'Provide analysis for data entry 1 in the dataset' or 'What is the dataset summary?'):"
16
+ )
17
 
18
  if st.button("Get AI Response"):
19
  ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence = chatbot_response(user_prompt)
chatbot.py CHANGED
@@ -3,7 +3,7 @@ import re
3
  import streamlit as st
4
  import google.generativeai as genai
5
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
6
- from db import get_entry_by_index # For fetching a specific entry from MongoDB
7
 
8
  # Configure Gemini API key
9
  GEMINI_API_KEY = os.getenv("gemini_api")
@@ -56,55 +56,70 @@ def extract_topic(text):
56
  except Exception as e:
57
  return f"Error extracting topic: {e}", None
58
 
59
- # Helper: extract an entry index from a query string.
60
- # For example, "data entry 1" or "entry 2" will return index 0 or 1 respectively.
61
  def extract_entry_index(prompt):
62
  match = re.search(r'(data entry|entry)\s+(\d+)', prompt, re.IGNORECASE)
63
  if match:
64
- index = int(match.group(2)) - 1 # Convert to 0-based index
65
  return index
66
  return None
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  def chatbot_response(user_prompt):
69
  if not user_prompt:
70
  return None, None, None, None, None
71
 
72
  try:
73
- # Check if the user query asks for a specific dataset entry.
74
- entry_index = extract_entry_index(user_prompt)
75
- if entry_index is not None:
76
- # Fetch the requested entry from MongoDB.
77
- entry = get_entry_by_index(entry_index)
 
 
 
 
 
 
 
 
78
  if entry is None:
79
  return "❌ No entry found for the requested index.", None, None, None, None
80
- # Extract the required fields.
81
  entry_text = entry.get("text", "No text available.")
82
  entry_user = entry.get("user", "Unknown")
83
  entry_date = entry.get("date", "Unknown")
84
-
85
- # Build a static response message with the desired formatting.
86
  ai_response = (
87
  "Let's break down this tweet-like MongoDB entry:\n\n"
88
  f"Tweet: {entry_text}\n"
89
  f"User: {entry_user}\n"
90
  f"Date: {entry_date}"
91
  )
92
-
93
- # Run sentiment and topic analysis on the entry's text.
94
  sentiment_label, sentiment_confidence = analyze_sentiment(entry_text)
95
  topic_label, topic_confidence = extract_topic(entry_text)
96
-
97
- return ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence
98
- else:
99
- # For all other queries, use the generative model flow.
100
- model_gen = genai.GenerativeModel("gemini-1.5-pro")
101
- ai_response_obj = model_gen.generate_content(user_prompt)
102
- ai_response = ai_response_obj.text
103
-
104
- # Perform sentiment and topic analysis on the user prompt.
105
- sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
106
- topic_label, topic_confidence = extract_topic(user_prompt)
107
-
108
  return ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence
 
 
 
 
 
 
 
 
 
109
  except Exception as e:
110
  return f"❌ Error: {e}", None, None, None, None
 
3
  import streamlit as st
4
  import google.generativeai as genai
5
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
6
+ from db import get_entry_by_index, get_dataset_summary
7
 
8
  # Configure Gemini API key
9
  GEMINI_API_KEY = os.getenv("gemini_api")
 
56
  except Exception as e:
57
  return f"Error extracting topic: {e}", None
58
 
59
+ # Helper: Extract entry index from prompt (e.g., "data entry 1" yields index 0)
 
60
  def extract_entry_index(prompt):
61
  match = re.search(r'(data entry|entry)\s+(\d+)', prompt, re.IGNORECASE)
62
  if match:
63
+ index = int(match.group(2)) - 1 # convert to 0-based index
64
  return index
65
  return None
66
 
67
+ # Helper: Detect if the query is asking for a specific dataset entry.
68
+ def is_entry_query(prompt):
69
+ index = extract_entry_index(prompt)
70
+ if index is not None:
71
+ return True, index
72
+ return False, None
73
+
74
+ # Helper: Detect if the query is a basic dataset question.
75
+ # Examples: "What is the dataset summary?", "Show me the sentiment distribution", etc.
76
+ def is_basic_dataset_question(prompt):
77
+ lower = prompt.lower()
78
+ keywords = ["dataset summary", "total tweets", "sentiment distribution", "overall dataset", "data overview", "data summary"]
79
+ return any(keyword in lower for keyword in keywords)
80
+
81
  def chatbot_response(user_prompt):
82
  if not user_prompt:
83
  return None, None, None, None, None
84
 
85
  try:
86
+ # If the query is a basic dataset question, fetch summary from MongoDB.
87
+ if is_basic_dataset_question(user_prompt):
88
+ summary = get_dataset_summary()
89
+ ai_response = "Dataset Summary:\n" + summary
90
+ # Run analysis on the summary text
91
+ sentiment_label, sentiment_confidence = analyze_sentiment(summary)
92
+ topic_label, topic_confidence = extract_topic(summary)
93
+ return ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence
94
+
95
+ # If the query is about a specific entry in the dataset...
96
+ entry_query, index = is_entry_query(user_prompt)
97
+ if entry_query:
98
+ entry = get_entry_by_index(index)
99
  if entry is None:
100
  return "❌ No entry found for the requested index.", None, None, None, None
101
+ # Retrieve fields from the document
102
  entry_text = entry.get("text", "No text available.")
103
  entry_user = entry.get("user", "Unknown")
104
  entry_date = entry.get("date", "Unknown")
105
+ # Build a static response message with the required format
 
106
  ai_response = (
107
  "Let's break down this tweet-like MongoDB entry:\n\n"
108
  f"Tweet: {entry_text}\n"
109
  f"User: {entry_user}\n"
110
  f"Date: {entry_date}"
111
  )
 
 
112
  sentiment_label, sentiment_confidence = analyze_sentiment(entry_text)
113
  topic_label, topic_confidence = extract_topic(entry_text)
 
 
 
 
 
 
 
 
 
 
 
 
114
  return ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence
115
+
116
+ # For other queries, use the generative model (this branch may be slower).
117
+ model_gen = genai.GenerativeModel("gemini-1.5-pro")
118
+ ai_response_obj = model_gen.generate_content(user_prompt)
119
+ ai_response = ai_response_obj.text
120
+ sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
121
+ topic_label, topic_confidence = extract_topic(user_prompt)
122
+ return ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence
123
+
124
  except Exception as e:
125
  return f"❌ Error: {e}", None, None, None, None
db.py CHANGED
@@ -27,9 +27,26 @@ def insert_data_if_empty():
27
  except Exception as e:
28
  print(f"❌ Error loading dataset: {e}")
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  def get_entry_by_index(index=0):
31
  collection = get_mongo_client()
32
- # Fetch the document by skipping "index" entries.
33
  doc_cursor = collection.find({}, {"_id": 0}).skip(index).limit(1)
34
  docs = list(doc_cursor)
35
  if docs:
 
27
  except Exception as e:
28
  print(f"❌ Error loading dataset: {e}")
29
 
30
+ def get_dataset_summary():
31
+ collection = get_mongo_client()
32
+ pipeline = [
33
+ {"$group": {"_id": "$target", "count": {"$sum": 1}}}
34
+ ]
35
+ results = list(collection.aggregate(pipeline))
36
+ mapping = {"0": "Negative", "2": "Neutral", "4": "Positive"}
37
+ summary_parts = []
38
+ total = 0
39
+ for doc in results:
40
+ target = str(doc["_id"])
41
+ count = doc["count"]
42
+ total += count
43
+ label = mapping.get(target, target)
44
+ summary_parts.append(f"{label}: {count}")
45
+ summary = f"Total tweets: {total}. " + ", ".join(summary_parts) + "."
46
+ return summary
47
+
48
  def get_entry_by_index(index=0):
49
  collection = get_mongo_client()
 
50
  doc_cursor = collection.find({}, {"_id": 0}).skip(index).limit(1)
51
  docs = list(doc_cursor)
52
  if docs: