KrSharangrav commited on
Commit
e332fa0
Β·
1 Parent(s): 61ca946

change in the ai response part of chatbot

Browse files
Files changed (3) hide show
  1. app.py +3 -3
  2. chatbot.py +25 -44
  3. db.py +23 -2
app.py CHANGED
@@ -3,13 +3,13 @@ import pandas as pd
3
  from db import insert_data_if_empty, get_mongo_client
4
  from chatbot import chatbot_response
5
 
6
- # Insert historical data into MongoDB if not already present
7
  insert_data_if_empty()
8
 
9
- # Connect to MongoDB (for potential further use)
10
  collection = get_mongo_client()
11
 
12
- st.subheader("πŸ’¬ Chatbot with Sentiment & Topic Analysis")
13
  user_prompt = st.text_area("Ask me something:")
14
 
15
  if st.button("Get AI Response"):
 
3
  from db import insert_data_if_empty, get_mongo_client
4
  from chatbot import chatbot_response
5
 
6
+ # Insert historical data into MongoDB if not already present.
7
  insert_data_if_empty()
8
 
9
+ # Connect to MongoDB (this may be used for additional visualizations if needed).
10
  collection = get_mongo_client()
11
 
12
+ st.subheader("πŸ’¬ Chatbot with Sentiment, Topic Analysis, and Dataset Insights")
13
  user_prompt = st.text_area("Ask me something:")
14
 
15
  if st.button("Get AI Response"):
chatbot.py CHANGED
@@ -2,8 +2,7 @@ import os
2
  import streamlit as st
3
  import google.generativeai as genai
4
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
5
- import pandas as pd
6
- from db import get_mongo_client
7
 
8
  # Configure Gemini API key
9
  GEMINI_API_KEY = os.getenv("gemini_api")
@@ -58,53 +57,35 @@ def extract_topic(text):
58
  except Exception as e:
59
  return f"Error extracting topic: {e}", None
60
 
61
- # Function to determine if the user's query is about the dataset
62
- def is_dataset_query(text):
63
- keywords = ["dataset", "data", "historical", "csv", "stored"]
64
- text_lower = text.lower()
65
- for keyword in keywords:
66
- if keyword in text_lower:
67
- return True
68
- return False
69
-
70
- # Function to retrieve insights from the dataset stored in MongoDB
71
- def get_dataset_insights():
72
- try:
73
- collection = get_mongo_client()
74
- data = list(collection.find({}, {"_id": 0}))
75
- if not data:
76
- return "The dataset in MongoDB is empty."
77
- df = pd.DataFrame(data)
78
- # Map the sentiment labels from sentiment140.csv: 0 -> Negative, 2 -> Neutral, 4 -> Positive.
79
- sentiment_mapping = {0: "Negative", 2: "Neutral", 4: "Positive"}
80
- if "target" in df.columns:
81
- df['sentiment_label'] = df['target'].apply(lambda x: sentiment_mapping.get(int(x), "Unknown"))
82
- summary = df['sentiment_label'].value_counts().to_dict()
83
- summary_str = ", ".join([f"{k}: {v}" for k, v in summary.items()])
84
- return f"The dataset sentiment distribution is: {summary_str}."
85
- else:
86
- return "The dataset does not have a 'target' field."
87
- except Exception as e:
88
- return f"Error retrieving dataset insights: {e}"
89
 
90
  # Function to generate AI response along with sentiment and topic analysis
91
  def chatbot_response(user_prompt):
92
  if not user_prompt:
93
  return None, None, None, None, None
94
 
95
- # Check if the query is about the dataset
96
- if is_dataset_query(user_prompt):
97
- dataset_insights = get_dataset_insights()
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
99
  topic_label, topic_confidence = extract_topic(user_prompt)
100
- return dataset_insights, sentiment_label, sentiment_confidence, topic_label, topic_confidence
101
- else:
102
- try:
103
- # Generate AI response using Gemini
104
- model_gen = genai.GenerativeModel("gemini-1.5-pro")
105
- ai_response = model_gen.generate_content(user_prompt)
106
- sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
107
- topic_label, topic_confidence = extract_topic(user_prompt)
108
- return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
109
- except Exception as e:
110
- return f"❌ Error: {e}", None, None, None, None
 
2
  import streamlit as st
3
  import google.generativeai as genai
4
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
5
+ from db import get_dataset_summary # Import the dataset summary function
 
6
 
7
  # Configure Gemini API key
8
  GEMINI_API_KEY = os.getenv("gemini_api")
 
57
  except Exception as e:
58
  return f"Error extracting topic: {e}", None
59
 
60
+ # Helper to check if the user query is about the dataset
61
+ def is_dataset_query(prompt):
62
+ keywords = ["dataset", "data", "csv", "mongodb", "historical"]
63
+ return any(keyword in prompt.lower() for keyword in keywords)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  # Function to generate AI response along with sentiment and topic analysis
66
  def chatbot_response(user_prompt):
67
  if not user_prompt:
68
  return None, None, None, None, None
69
 
70
+ try:
71
+ # If the query seems related to the dataset, fetch summary insights.
72
+ if is_dataset_query(user_prompt):
73
+ dataset_insights = get_dataset_summary()
74
+ combined_prompt = (
75
+ f"{user_prompt}\n\nDataset Insights:\n{dataset_insights}\n\n"
76
+ "Provide a detailed answer that incorporates these dataset insights."
77
+ )
78
+ else:
79
+ combined_prompt = user_prompt
80
+
81
+ # Generate AI response using Gemini with the (possibly augmented) prompt.
82
+ model_gen = genai.GenerativeModel("gemini-1.5-pro")
83
+ ai_response = model_gen.generate_content(combined_prompt)
84
+
85
+ # Perform sentiment analysis and topic extraction on the original user prompt.
86
  sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
87
  topic_label, topic_confidence = extract_topic(user_prompt)
88
+
89
+ return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
90
+ except Exception as e:
91
+ return f"❌ Error: {e}", None, None, None, None
 
 
 
 
 
 
 
db.py CHANGED
@@ -3,13 +3,13 @@ import requests
3
  import io
4
  from pymongo import MongoClient
5
 
6
- # Function to connect to MongoDB
7
  def get_mongo_client():
8
  client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster")
9
  db = client["sentiment_db"]
10
  return db["tweets"]
11
 
12
- # Function to insert data if the collection is empty
13
  def insert_data_if_empty():
14
  collection = get_mongo_client()
15
  if collection.count_documents({}) == 0:
@@ -23,3 +23,24 @@ def insert_data_if_empty():
23
  print("βœ… Data Inserted into MongoDB!")
24
  except Exception as e:
25
  print(f"❌ Error loading dataset: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import io
4
  from pymongo import MongoClient
5
 
6
+ # Function to connect to MongoDB.
7
  def get_mongo_client():
8
  client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster")
9
  db = client["sentiment_db"]
10
  return db["tweets"]
11
 
12
+ # Function to insert data if the collection is empty.
13
  def insert_data_if_empty():
14
  collection = get_mongo_client()
15
  if collection.count_documents({}) == 0:
 
23
  print("βœ… Data Inserted into MongoDB!")
24
  except Exception as e:
25
  print(f"❌ Error loading dataset: {e}")
26
+
27
+ # Function to get dataset summary from MongoDB.
28
+ def get_dataset_summary():
29
+ collection = get_mongo_client()
30
+ # Aggregate counts for each sentiment target.
31
+ pipeline = [
32
+ {"$group": {"_id": "$target", "count": {"$sum": 1}}}
33
+ ]
34
+ results = list(collection.aggregate(pipeline))
35
+ # Map the sentiment target values to labels.
36
+ mapping = {"0": "Negative", "2": "Neutral", "4": "Positive"}
37
+ summary_parts = []
38
+ total = 0
39
+ for doc in results:
40
+ target = str(doc["_id"])
41
+ count = doc["count"]
42
+ total += count
43
+ label = mapping.get(target, target)
44
+ summary_parts.append(f"{label}: {count}")
45
+ summary = f"Total tweets: {total}. " + ", ".join(summary_parts) + "."
46
+ return summary