KrSharangrav commited on
Commit
979706a
·
1 Parent(s): 6e2dc41

change in the interaction

Browse files
Files changed (3) hide show
  1. app.py +6 -7
  2. chatbot.py +58 -53
  3. db.py +8 -23
app.py CHANGED
@@ -1,17 +1,16 @@
1
  import streamlit as st
2
- import pandas as pd
3
  from db import insert_data_if_empty, get_mongo_client
4
  from chatbot import chatbot_response
5
 
6
- # Insert historical data into MongoDB if not already present.
7
  insert_data_if_empty()
8
 
9
- # Connect to MongoDB (for additional visualizations if needed).
10
  collection = get_mongo_client()
11
 
12
- st.subheader("💬 Chatbot with Sentiment, Topic Analysis, and Dataset Entry Insights")
13
- user_prompt = st.text_area(
14
- "Ask me something (e.g., 'What is the sentiment and category for the first data entry in the dataset'):")
15
 
16
  if st.button("Get AI Response"):
17
  ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence = chatbot_response(user_prompt)
@@ -23,4 +22,4 @@ if st.button("Get AI Response"):
23
  st.write("### Category Extraction:")
24
  st.write(f"**Detected Category:** {topic_label} ({topic_confidence:.2f} confidence)")
25
  else:
26
- st.warning("⚠️ Please enter a question or text for analysis.")
 
1
  import streamlit as st
 
2
  from db import insert_data_if_empty, get_mongo_client
3
  from chatbot import chatbot_response
4
 
5
+ # Ensure the historical data is inserted into MongoDB if not already present.
6
  insert_data_if_empty()
7
 
8
+ # (Optional) Connect to MongoDB for further visualization if needed.
9
  collection = get_mongo_client()
10
 
11
+ st.subheader("💬 Chatbot with Analysis for Specific MongoDB Entries")
12
+ st.write("Ask me something (e.g., 'Provide analysis for the data entry 1 in the dataset'): ")
13
+ user_prompt = st.text_area("Your Query:")
14
 
15
  if st.button("Get AI Response"):
16
  ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence = chatbot_response(user_prompt)
 
22
  st.write("### Category Extraction:")
23
  st.write(f"**Detected Category:** {topic_label} ({topic_confidence:.2f} confidence)")
24
  else:
25
+ st.warning("⚠️ Please enter a valid query for analysis.")
chatbot.py CHANGED
@@ -1,8 +1,9 @@
1
  import os
 
2
  import streamlit as st
3
  import google.generativeai as genai
4
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
5
- from db import get_dataset_summary, get_entry_by_index
6
 
7
  # Configure Gemini API key
8
  GEMINI_API_KEY = os.getenv("gemini_api")
@@ -32,6 +33,7 @@ TOPIC_LABELS = [
32
  "Health", "Science", "Education", "Finance", "Travel", "Food"
33
  ]
34
 
 
35
  def analyze_sentiment(text):
36
  try:
37
  sentiment_result = sentiment_pipeline(text)[0]
@@ -46,6 +48,7 @@ def analyze_sentiment(text):
46
  except Exception as e:
47
  return f"Error analyzing sentiment: {e}", None
48
 
 
49
  def extract_topic(text):
50
  try:
51
  topic_result = topic_pipeline(text, TOPIC_LABELS)
@@ -55,67 +58,69 @@ def extract_topic(text):
55
  except Exception as e:
56
  return f"Error extracting topic: {e}", None
57
 
58
- def is_dataset_query(prompt):
59
- keywords = ["dataset", "data", "csv", "mongodb", "historical"]
60
- return any(keyword in prompt.lower() for keyword in keywords)
61
-
62
- def extract_entry_index(prompt):
63
- # Map ordinal words to indices (0-indexed)
64
- ordinals = {
65
- "first": 0,
66
- "1st": 0,
67
- "second": 1,
68
- "2nd": 1,
69
- "third": 2,
70
- "3rd": 2,
71
- "fourth": 3,
72
- "4th": 3,
73
- "fifth": 4,
74
- "5th": 4,
75
- }
76
- for word, index in ordinals.items():
77
- if word in prompt.lower():
78
- return index
79
  return None
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  def chatbot_response(user_prompt):
82
  if not user_prompt:
83
  return None, None, None, None, None
84
 
85
- # Check if the query is about a specific dataset entry.
86
- entry_index = extract_entry_index(user_prompt)
87
- if entry_index is not None:
88
- entry_text = get_entry_by_index(entry_index)
89
- if entry_text:
90
- # Create a combined prompt for Gemini to generate detailed insights.
 
 
91
  combined_prompt = (
92
- f"Analyze the following dataset entry from MongoDB:\n\n{entry_text}\n\n"
93
- "Provide detailed insights, including sentiment analysis and category extraction."
 
 
 
94
  )
95
- model_gen = genai.GenerativeModel("gemini-1.5-pro")
96
- ai_response = model_gen.generate_content(combined_prompt)
97
- # Analyze the entry text.
98
  sentiment_label, sentiment_confidence = analyze_sentiment(entry_text)
99
  topic_label, topic_confidence = extract_topic(entry_text)
100
- return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
101
  else:
102
- return f"❌ No entry found for index {entry_index+1}.", None, None, None, None
 
 
 
103
 
104
- # Otherwise, if the query is about the dataset in general.
105
- if is_dataset_query(user_prompt):
106
- dataset_insights = get_dataset_summary()
107
- combined_prompt = (
108
- f"{user_prompt}\n\nDataset Insights:\n{dataset_insights}\n\n"
109
- "Provide a detailed answer that incorporates these dataset insights."
110
- )
111
- else:
112
- combined_prompt = user_prompt
113
-
114
- model_gen = genai.GenerativeModel("gemini-1.5-pro")
115
- ai_response = model_gen.generate_content(combined_prompt)
116
-
117
- # Run sentiment analysis and topic extraction on the original user prompt.
118
- sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
119
- topic_label, topic_confidence = extract_topic(user_prompt)
120
-
121
- return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
 
1
  import os
2
+ import re
3
  import streamlit as st
4
  import google.generativeai as genai
5
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
6
+ from db import get_entry_by_index # Helper to fetch a document by index
7
 
8
  # Configure Gemini API key
9
  GEMINI_API_KEY = os.getenv("gemini_api")
 
33
  "Health", "Science", "Education", "Finance", "Travel", "Food"
34
  ]
35
 
36
+ # Function to analyze sentiment using the pre-trained model
37
  def analyze_sentiment(text):
38
  try:
39
  sentiment_result = sentiment_pipeline(text)[0]
 
48
  except Exception as e:
49
  return f"Error analyzing sentiment: {e}", None
50
 
51
+ # Function to extract topic using zero-shot classification
52
  def extract_topic(text):
53
  try:
54
  topic_result = topic_pipeline(text, TOPIC_LABELS)
 
58
  except Exception as e:
59
  return f"Error extracting topic: {e}", None
60
 
61
+ # Helper to detect if the user asks for a specific entry.
62
+ # Searches for patterns like "data entry 1" or "entry 2" (case-insensitive).
63
+ def get_entry_index(prompt):
64
+ match = re.search(r'(?:data entry|entry)\s*(\d+)', prompt.lower())
65
+ if match:
66
+ # Convert to 0-indexed value.
67
+ return int(match.group(1)) - 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  return None
69
 
70
+ # Helper to filter the generative response.
71
+ # We expect the response to contain:
72
+ # "Let's break down this tweet-like MongoDB entry:" followed by text,
73
+ # then "Conclusion:" followed by text.
74
+ # We remove any extra parts and remove the header "Conclusion:".
75
+ def filter_ai_response(ai_text):
76
+ breakdown_marker = "Let's break down this tweet-like MongoDB entry:"
77
+ conclusion_marker = "Conclusion:"
78
+ if breakdown_marker in ai_text and conclusion_marker in ai_text:
79
+ # Split into two parts.
80
+ parts = ai_text.split(breakdown_marker, 1)[1]
81
+ breakdown_part, conclusion_part = parts.split(conclusion_marker, 1)
82
+ # Rebuild output with the breakdown section and the conclusion content (without the header)
83
+ filtered = breakdown_marker + "\n" + breakdown_part.strip() + "\n" + conclusion_part.strip()
84
+ return filtered
85
+ else:
86
+ # If the markers aren't found, return the original text.
87
+ return ai_text
88
+
89
+ # Main function to generate AI response along with sentiment and category analysis.
90
+ # If the prompt asks for a specific entry, fetch its "text" from MongoDB and build a custom prompt.
91
  def chatbot_response(user_prompt):
92
  if not user_prompt:
93
  return None, None, None, None, None
94
 
95
+ try:
96
+ entry_index = get_entry_index(user_prompt)
97
+ if entry_index is not None:
98
+ entry = get_entry_by_index(entry_index)
99
+ if entry is None:
100
+ return "❌ No entry found for the requested index.", None, None, None, None
101
+ entry_text = entry.get("text", "No text available.")
102
+ # Build a prompt instructing the Gemini model to provide analysis in a structured format.
103
  combined_prompt = (
104
+ f"Provide analysis for the following MongoDB entry:\n\n"
105
+ f"{entry_text}\n\n"
106
+ "Please respond in the following format:\n"
107
+ "Let's break down this tweet-like MongoDB entry:\n[Your detailed analysis here]\n"
108
+ "Conclusion:\n[Your conclusion here]"
109
  )
110
+ # Run sentiment and topic analysis on the entry's text.
 
 
111
  sentiment_label, sentiment_confidence = analyze_sentiment(entry_text)
112
  topic_label, topic_confidence = extract_topic(entry_text)
 
113
  else:
114
+ # If not an entry query, use the user prompt directly.
115
+ combined_prompt = user_prompt
116
+ sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
117
+ topic_label, topic_confidence = extract_topic(user_prompt)
118
 
119
+ # Generate AI response using Gemini.
120
+ model_gen = genai.GenerativeModel("gemini-1.5-pro")
121
+ ai_response = model_gen.generate_content(combined_prompt)
122
+ # Filter the generative response to show only the required sections.
123
+ filtered_response = filter_ai_response(ai_response.text)
124
+ return filtered_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence
125
+ except Exception as e:
126
+ return f"❌ Error: {e}", None, None, None, None
 
 
 
 
 
 
 
 
 
 
db.py CHANGED
@@ -3,11 +3,13 @@ import requests
3
  import io
4
  from pymongo import MongoClient
5
 
 
6
  def get_mongo_client():
7
  client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster")
8
  db = client["sentiment_db"]
9
  return db["tweets"]
10
 
 
11
  def insert_data_if_empty():
12
  collection = get_mongo_client()
13
  if collection.count_documents({}) == 0:
@@ -22,29 +24,12 @@ def insert_data_if_empty():
22
  except Exception as e:
23
  print(f"❌ Error loading dataset: {e}")
24
 
25
- def get_dataset_summary():
 
26
  collection = get_mongo_client()
27
- pipeline = [
28
- {"$group": {"_id": "$target", "count": {"$sum": 1}}}
29
- ]
30
- results = list(collection.aggregate(pipeline))
31
- mapping = {"0": "Negative", "2": "Neutral", "4": "Positive"}
32
- summary_parts = []
33
- total = 0
34
- for doc in results:
35
- target = str(doc["_id"])
36
- count = doc["count"]
37
- total += count
38
- label = mapping.get(target, target)
39
- summary_parts.append(f"{label}: {count}")
40
- summary = f"Total tweets: {total}. " + ", ".join(summary_parts) + "."
41
- return summary
42
-
43
- def get_entry_by_index(index):
44
- collection = get_mongo_client()
45
- # Sort by _id (assumes insertion order), skip to the requested index, and get one document.
46
- document = collection.find({}, {"_id": 0}).sort("_id", 1).skip(index).limit(1)
47
- docs = list(document)
48
  if docs:
49
- return docs[0].get("text", None)
50
  return None
 
3
  import io
4
  from pymongo import MongoClient
5
 
6
+ # Function to connect to MongoDB.
7
  def get_mongo_client():
8
  client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster")
9
  db = client["sentiment_db"]
10
  return db["tweets"]
11
 
12
+ # Function to insert data if the collection is empty.
13
  def insert_data_if_empty():
14
  collection = get_mongo_client()
15
  if collection.count_documents({}) == 0:
 
24
  except Exception as e:
25
  print(f"❌ Error loading dataset: {e}")
26
 
27
+ # Function to fetch a specific entry (by index) from the dataset.
28
+ def get_entry_by_index(index=0):
29
  collection = get_mongo_client()
30
+ # Skip 'index' documents and return one document.
31
+ doc_cursor = collection.find({}, {"_id": 0}).skip(index).limit(1)
32
+ docs = list(doc_cursor)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  if docs:
34
+ return docs[0]
35
  return None