KrSharangrav commited on
Commit
8d3fcda
Β·
1 Parent(s): 979706a

changes in the logic

Browse files
Files changed (3) hide show
  1. app.py +6 -6
  2. chatbot.py +28 -65
  3. db.py +0 -4
app.py CHANGED
@@ -1,16 +1,16 @@
1
  import streamlit as st
 
2
  from db import insert_data_if_empty, get_mongo_client
3
  from chatbot import chatbot_response
4
 
5
- # Ensure the historical data is inserted into MongoDB if not already present.
6
  insert_data_if_empty()
7
 
8
- # (Optional) Connect to MongoDB for further visualization if needed.
9
  collection = get_mongo_client()
10
 
11
- st.subheader("πŸ’¬ Chatbot with Analysis for Specific MongoDB Entries")
12
- st.write("Ask me something (e.g., 'Provide analysis for the data entry 1 in the dataset'): ")
13
- user_prompt = st.text_area("Your Query:")
14
 
15
  if st.button("Get AI Response"):
16
  ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence = chatbot_response(user_prompt)
@@ -22,4 +22,4 @@ if st.button("Get AI Response"):
22
  st.write("### Category Extraction:")
23
  st.write(f"**Detected Category:** {topic_label} ({topic_confidence:.2f} confidence)")
24
  else:
25
- st.warning("⚠️ Please enter a valid query for analysis.")
 
1
  import streamlit as st
2
+ import pandas as pd
3
  from db import insert_data_if_empty, get_mongo_client
4
  from chatbot import chatbot_response
5
 
6
+ # Insert the dataset into MongoDB if not already present.
7
  insert_data_if_empty()
8
 
9
+ # Connect to MongoDB (useful for potential visualizations)
10
  collection = get_mongo_client()
11
 
12
+ st.subheader("πŸ’¬ Chatbot: Analyze MongoDB Entries")
13
+ user_prompt = st.text_area("Ask me something (e.g., 'Provide analysis for the data entry 1 in the dataset'):")
 
14
 
15
  if st.button("Get AI Response"):
16
  ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence = chatbot_response(user_prompt)
 
22
  st.write("### Category Extraction:")
23
  st.write(f"**Detected Category:** {topic_label} ({topic_confidence:.2f} confidence)")
24
  else:
25
+ st.warning("⚠️ Please enter a question or text for analysis.")
chatbot.py CHANGED
@@ -3,7 +3,7 @@ import re
3
  import streamlit as st
4
  import google.generativeai as genai
5
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
6
- from db import get_entry_by_index # Helper to fetch a document by index
7
 
8
  # Configure Gemini API key
9
  GEMINI_API_KEY = os.getenv("gemini_api")
@@ -33,94 +33,57 @@ TOPIC_LABELS = [
33
  "Health", "Science", "Education", "Finance", "Travel", "Food"
34
  ]
35
 
36
- # Function to analyze sentiment using the pre-trained model
37
  def analyze_sentiment(text):
38
  try:
39
- sentiment_result = sentiment_pipeline(text)[0]
40
- label = sentiment_result['label']
41
- score = sentiment_result['score']
42
- sentiment_mapping = {
43
- "LABEL_0": "Negative",
44
- "LABEL_1": "Neutral",
45
- "LABEL_2": "Positive"
46
- }
47
- return sentiment_mapping.get(label, "Unknown"), score
48
  except Exception as e:
49
  return f"Error analyzing sentiment: {e}", None
50
 
51
- # Function to extract topic using zero-shot classification
52
  def extract_topic(text):
53
  try:
54
- topic_result = topic_pipeline(text, TOPIC_LABELS)
55
- top_topic = topic_result["labels"][0]
56
- confidence = topic_result["scores"][0]
57
  return top_topic, confidence
58
  except Exception as e:
59
  return f"Error extracting topic: {e}", None
60
 
61
- # Helper to detect if the user asks for a specific entry.
62
- # Searches for patterns like "data entry 1" or "entry 2" (case-insensitive).
63
- def get_entry_index(prompt):
64
- match = re.search(r'(?:data entry|entry)\s*(\d+)', prompt.lower())
65
  if match:
66
- # Convert to 0-indexed value.
67
- return int(match.group(1)) - 1
68
- return None
 
69
 
70
- # Helper to filter the generative response.
71
- # We expect the response to contain:
72
- # "Let's break down this tweet-like MongoDB entry:" followed by text,
73
- # then "Conclusion:" followed by text.
74
- # We remove any extra parts and remove the header "Conclusion:".
75
- def filter_ai_response(ai_text):
76
- breakdown_marker = "Let's break down this tweet-like MongoDB entry:"
77
- conclusion_marker = "Conclusion:"
78
- if breakdown_marker in ai_text and conclusion_marker in ai_text:
79
- # Split into two parts.
80
- parts = ai_text.split(breakdown_marker, 1)[1]
81
- breakdown_part, conclusion_part = parts.split(conclusion_marker, 1)
82
- # Rebuild output with the breakdown section and the conclusion content (without the header)
83
- filtered = breakdown_marker + "\n" + breakdown_part.strip() + "\n" + conclusion_part.strip()
84
- return filtered
85
- else:
86
- # If the markers aren't found, return the original text.
87
- return ai_text
88
-
89
- # Main function to generate AI response along with sentiment and category analysis.
90
- # If the prompt asks for a specific entry, fetch its "text" from MongoDB and build a custom prompt.
91
  def chatbot_response(user_prompt):
92
  if not user_prompt:
93
  return None, None, None, None, None
94
-
95
  try:
96
- entry_index = get_entry_index(user_prompt)
97
- if entry_index is not None:
98
- entry = get_entry_by_index(entry_index)
99
  if entry is None:
100
  return "❌ No entry found for the requested index.", None, None, None, None
101
  entry_text = entry.get("text", "No text available.")
102
- # Build a prompt instructing the Gemini model to provide analysis in a structured format.
103
- combined_prompt = (
104
- f"Provide analysis for the following MongoDB entry:\n\n"
105
- f"{entry_text}\n\n"
106
- "Please respond in the following format:\n"
107
- "Let's break down this tweet-like MongoDB entry:\n[Your detailed analysis here]\n"
108
- "Conclusion:\n[Your conclusion here]"
109
- )
110
- # Run sentiment and topic analysis on the entry's text.
111
  sentiment_label, sentiment_confidence = analyze_sentiment(entry_text)
112
  topic_label, topic_confidence = extract_topic(entry_text)
 
113
  else:
114
- # If not an entry query, use the user prompt directly.
115
- combined_prompt = user_prompt
 
116
  sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
117
  topic_label, topic_confidence = extract_topic(user_prompt)
118
-
119
- # Generate AI response using Gemini.
120
- model_gen = genai.GenerativeModel("gemini-1.5-pro")
121
- ai_response = model_gen.generate_content(combined_prompt)
122
- # Filter the generative response to show only the required sections.
123
- filtered_response = filter_ai_response(ai_response.text)
124
- return filtered_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence
125
  except Exception as e:
126
  return f"❌ Error: {e}", None, None, None, None
 
3
  import streamlit as st
4
  import google.generativeai as genai
5
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
6
+ from db import get_entry_by_index
7
 
8
  # Configure Gemini API key
9
  GEMINI_API_KEY = os.getenv("gemini_api")
 
33
  "Health", "Science", "Education", "Finance", "Travel", "Food"
34
  ]
35
 
 
36
  def analyze_sentiment(text):
37
  try:
38
+ result = sentiment_pipeline(text)[0]
39
+ label = result['label']
40
+ score = result['score']
41
+ mapping = {"LABEL_0": "Negative", "LABEL_1": "Neutral", "LABEL_2": "Positive"}
42
+ return mapping.get(label, "Unknown"), score
 
 
 
 
43
  except Exception as e:
44
  return f"Error analyzing sentiment: {e}", None
45
 
 
46
  def extract_topic(text):
47
  try:
48
+ result = topic_pipeline(text, TOPIC_LABELS)
49
+ top_topic = result["labels"][0]
50
+ confidence = result["scores"][0]
51
  return top_topic, confidence
52
  except Exception as e:
53
  return f"Error extracting topic: {e}", None
54
 
55
+ # Detect queries like "data entry 1" or "entry 3" (case-insensitive)
56
+ def is_entry_query(prompt):
57
+ pattern = r"(?:data entry|entry)\s*(\d+)"
58
+ match = re.search(pattern, prompt, re.IGNORECASE)
59
  if match:
60
+ # Convert to index (assuming user numbering starts at 1)
61
+ index = int(match.group(1)) - 1
62
+ return True, index
63
+ return False, None
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  def chatbot_response(user_prompt):
66
  if not user_prompt:
67
  return None, None, None, None, None
 
68
  try:
69
+ entry_query, index = is_entry_query(user_prompt)
70
+ if entry_query:
71
+ entry = get_entry_by_index(index)
72
  if entry is None:
73
  return "❌ No entry found for the requested index.", None, None, None, None
74
  entry_text = entry.get("text", "No text available.")
75
+ # Fixed AI response for entry queries (as per instructions)
76
+ ai_response_text = "Let's break down this tweet-like MongoDB entry:"
77
+ # Analyze the entry's text
 
 
 
 
 
 
78
  sentiment_label, sentiment_confidence = analyze_sentiment(entry_text)
79
  topic_label, topic_confidence = extract_topic(entry_text)
80
+ return ai_response_text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
81
  else:
82
+ # For non-entry queries, fallback to the generative model as usual.
83
+ model_gen = genai.GenerativeModel("gemini-1.5-pro")
84
+ ai_response = model_gen.generate_content(user_prompt)
85
  sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
86
  topic_label, topic_confidence = extract_topic(user_prompt)
87
+ return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
 
 
 
 
 
 
88
  except Exception as e:
89
  return f"❌ Error: {e}", None, None, None, None
db.py CHANGED
@@ -3,13 +3,11 @@ import requests
3
  import io
4
  from pymongo import MongoClient
5
 
6
- # Function to connect to MongoDB.
7
  def get_mongo_client():
8
  client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster")
9
  db = client["sentiment_db"]
10
  return db["tweets"]
11
 
12
- # Function to insert data if the collection is empty.
13
  def insert_data_if_empty():
14
  collection = get_mongo_client()
15
  if collection.count_documents({}) == 0:
@@ -24,10 +22,8 @@ def insert_data_if_empty():
24
  except Exception as e:
25
  print(f"❌ Error loading dataset: {e}")
26
 
27
- # Function to fetch a specific entry (by index) from the dataset.
28
  def get_entry_by_index(index=0):
29
  collection = get_mongo_client()
30
- # Skip 'index' documents and return one document.
31
  doc_cursor = collection.find({}, {"_id": 0}).skip(index).limit(1)
32
  docs = list(doc_cursor)
33
  if docs:
 
3
  import io
4
  from pymongo import MongoClient
5
 
 
6
  def get_mongo_client():
7
  client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster")
8
  db = client["sentiment_db"]
9
  return db["tweets"]
10
 
 
11
  def insert_data_if_empty():
12
  collection = get_mongo_client()
13
  if collection.count_documents({}) == 0:
 
22
  except Exception as e:
23
  print(f"❌ Error loading dataset: {e}")
24
 
 
25
  def get_entry_by_index(index=0):
26
  collection = get_mongo_client()
 
27
  doc_cursor = collection.find({}, {"_id": 0}).skip(index).limit(1)
28
  docs = list(doc_cursor)
29
  if docs: