Spaces:
Sleeping
Sleeping
KrSharangrav
commited on
Commit
·
979706a
1
Parent(s):
6e2dc41
change in the interaction
Browse files- app.py +6 -7
- chatbot.py +58 -53
- db.py +8 -23
app.py
CHANGED
@@ -1,17 +1,16 @@
|
|
1 |
import streamlit as st
|
2 |
-
import pandas as pd
|
3 |
from db import insert_data_if_empty, get_mongo_client
|
4 |
from chatbot import chatbot_response
|
5 |
|
6 |
-
#
|
7 |
insert_data_if_empty()
|
8 |
|
9 |
-
# Connect to MongoDB
|
10 |
collection = get_mongo_client()
|
11 |
|
12 |
-
st.subheader("💬 Chatbot with
|
13 |
-
|
14 |
-
|
15 |
|
16 |
if st.button("Get AI Response"):
|
17 |
ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence = chatbot_response(user_prompt)
|
@@ -23,4 +22,4 @@ if st.button("Get AI Response"):
|
|
23 |
st.write("### Category Extraction:")
|
24 |
st.write(f"**Detected Category:** {topic_label} ({topic_confidence:.2f} confidence)")
|
25 |
else:
|
26 |
-
st.warning("⚠️ Please enter a
|
|
|
1 |
import streamlit as st
|
|
|
2 |
from db import insert_data_if_empty, get_mongo_client
|
3 |
from chatbot import chatbot_response
|
4 |
|
5 |
+
# Ensure the historical data is inserted into MongoDB if not already present.
|
6 |
insert_data_if_empty()
|
7 |
|
8 |
+
# (Optional) Connect to MongoDB for further visualization if needed.
|
9 |
collection = get_mongo_client()
|
10 |
|
11 |
+
st.subheader("💬 Chatbot with Analysis for Specific MongoDB Entries")
|
12 |
+
st.write("Ask me something (e.g., 'Provide analysis for the data entry 1 in the dataset'): ")
|
13 |
+
user_prompt = st.text_area("Your Query:")
|
14 |
|
15 |
if st.button("Get AI Response"):
|
16 |
ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence = chatbot_response(user_prompt)
|
|
|
22 |
st.write("### Category Extraction:")
|
23 |
st.write(f"**Detected Category:** {topic_label} ({topic_confidence:.2f} confidence)")
|
24 |
else:
|
25 |
+
st.warning("⚠️ Please enter a valid query for analysis.")
|
chatbot.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1 |
import os
|
|
|
2 |
import streamlit as st
|
3 |
import google.generativeai as genai
|
4 |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
|
5 |
-
from db import
|
6 |
|
7 |
# Configure Gemini API key
|
8 |
GEMINI_API_KEY = os.getenv("gemini_api")
|
@@ -32,6 +33,7 @@ TOPIC_LABELS = [
|
|
32 |
"Health", "Science", "Education", "Finance", "Travel", "Food"
|
33 |
]
|
34 |
|
|
|
35 |
def analyze_sentiment(text):
|
36 |
try:
|
37 |
sentiment_result = sentiment_pipeline(text)[0]
|
@@ -46,6 +48,7 @@ def analyze_sentiment(text):
|
|
46 |
except Exception as e:
|
47 |
return f"Error analyzing sentiment: {e}", None
|
48 |
|
|
|
49 |
def extract_topic(text):
|
50 |
try:
|
51 |
topic_result = topic_pipeline(text, TOPIC_LABELS)
|
@@ -55,67 +58,69 @@ def extract_topic(text):
|
|
55 |
except Exception as e:
|
56 |
return f"Error extracting topic: {e}", None
|
57 |
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
"first": 0,
|
66 |
-
"1st": 0,
|
67 |
-
"second": 1,
|
68 |
-
"2nd": 1,
|
69 |
-
"third": 2,
|
70 |
-
"3rd": 2,
|
71 |
-
"fourth": 3,
|
72 |
-
"4th": 3,
|
73 |
-
"fifth": 4,
|
74 |
-
"5th": 4,
|
75 |
-
}
|
76 |
-
for word, index in ordinals.items():
|
77 |
-
if word in prompt.lower():
|
78 |
-
return index
|
79 |
return None
|
80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
def chatbot_response(user_prompt):
|
82 |
if not user_prompt:
|
83 |
return None, None, None, None, None
|
84 |
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
|
|
91 |
combined_prompt = (
|
92 |
-
f"
|
93 |
-
"
|
|
|
|
|
|
|
94 |
)
|
95 |
-
|
96 |
-
ai_response = model_gen.generate_content(combined_prompt)
|
97 |
-
# Analyze the entry text.
|
98 |
sentiment_label, sentiment_confidence = analyze_sentiment(entry_text)
|
99 |
topic_label, topic_confidence = extract_topic(entry_text)
|
100 |
-
return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
|
101 |
else:
|
102 |
-
|
|
|
|
|
|
|
103 |
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
combined_prompt = user_prompt
|
113 |
-
|
114 |
-
model_gen = genai.GenerativeModel("gemini-1.5-pro")
|
115 |
-
ai_response = model_gen.generate_content(combined_prompt)
|
116 |
-
|
117 |
-
# Run sentiment analysis and topic extraction on the original user prompt.
|
118 |
-
sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
|
119 |
-
topic_label, topic_confidence = extract_topic(user_prompt)
|
120 |
-
|
121 |
-
return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
|
|
|
1 |
import os
|
2 |
+
import re
|
3 |
import streamlit as st
|
4 |
import google.generativeai as genai
|
5 |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
|
6 |
+
from db import get_entry_by_index # Helper to fetch a document by index
|
7 |
|
8 |
# Configure Gemini API key
|
9 |
GEMINI_API_KEY = os.getenv("gemini_api")
|
|
|
33 |
"Health", "Science", "Education", "Finance", "Travel", "Food"
|
34 |
]
|
35 |
|
36 |
+
# Function to analyze sentiment using the pre-trained model
|
37 |
def analyze_sentiment(text):
|
38 |
try:
|
39 |
sentiment_result = sentiment_pipeline(text)[0]
|
|
|
48 |
except Exception as e:
|
49 |
return f"Error analyzing sentiment: {e}", None
|
50 |
|
51 |
+
# Function to extract topic using zero-shot classification
|
52 |
def extract_topic(text):
|
53 |
try:
|
54 |
topic_result = topic_pipeline(text, TOPIC_LABELS)
|
|
|
58 |
except Exception as e:
|
59 |
return f"Error extracting topic: {e}", None
|
60 |
|
61 |
+
# Helper to detect if the user asks for a specific entry.
|
62 |
+
# Searches for patterns like "data entry 1" or "entry 2" (case-insensitive).
|
63 |
+
def get_entry_index(prompt):
|
64 |
+
match = re.search(r'(?:data entry|entry)\s*(\d+)', prompt.lower())
|
65 |
+
if match:
|
66 |
+
# Convert to 0-indexed value.
|
67 |
+
return int(match.group(1)) - 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
return None
|
69 |
|
70 |
+
# Helper to filter the generative response.
|
71 |
+
# We expect the response to contain:
|
72 |
+
# "Let's break down this tweet-like MongoDB entry:" followed by text,
|
73 |
+
# then "Conclusion:" followed by text.
|
74 |
+
# We remove any extra parts and remove the header "Conclusion:".
|
75 |
+
def filter_ai_response(ai_text):
|
76 |
+
breakdown_marker = "Let's break down this tweet-like MongoDB entry:"
|
77 |
+
conclusion_marker = "Conclusion:"
|
78 |
+
if breakdown_marker in ai_text and conclusion_marker in ai_text:
|
79 |
+
# Split into two parts.
|
80 |
+
parts = ai_text.split(breakdown_marker, 1)[1]
|
81 |
+
breakdown_part, conclusion_part = parts.split(conclusion_marker, 1)
|
82 |
+
# Rebuild output with the breakdown section and the conclusion content (without the header)
|
83 |
+
filtered = breakdown_marker + "\n" + breakdown_part.strip() + "\n" + conclusion_part.strip()
|
84 |
+
return filtered
|
85 |
+
else:
|
86 |
+
# If the markers aren't found, return the original text.
|
87 |
+
return ai_text
|
88 |
+
|
89 |
+
# Main function to generate AI response along with sentiment and category analysis.
|
90 |
+
# If the prompt asks for a specific entry, fetch its "text" from MongoDB and build a custom prompt.
|
91 |
def chatbot_response(user_prompt):
|
92 |
if not user_prompt:
|
93 |
return None, None, None, None, None
|
94 |
|
95 |
+
try:
|
96 |
+
entry_index = get_entry_index(user_prompt)
|
97 |
+
if entry_index is not None:
|
98 |
+
entry = get_entry_by_index(entry_index)
|
99 |
+
if entry is None:
|
100 |
+
return "❌ No entry found for the requested index.", None, None, None, None
|
101 |
+
entry_text = entry.get("text", "No text available.")
|
102 |
+
# Build a prompt instructing the Gemini model to provide analysis in a structured format.
|
103 |
combined_prompt = (
|
104 |
+
f"Provide analysis for the following MongoDB entry:\n\n"
|
105 |
+
f"{entry_text}\n\n"
|
106 |
+
"Please respond in the following format:\n"
|
107 |
+
"Let's break down this tweet-like MongoDB entry:\n[Your detailed analysis here]\n"
|
108 |
+
"Conclusion:\n[Your conclusion here]"
|
109 |
)
|
110 |
+
# Run sentiment and topic analysis on the entry's text.
|
|
|
|
|
111 |
sentiment_label, sentiment_confidence = analyze_sentiment(entry_text)
|
112 |
topic_label, topic_confidence = extract_topic(entry_text)
|
|
|
113 |
else:
|
114 |
+
# If not an entry query, use the user prompt directly.
|
115 |
+
combined_prompt = user_prompt
|
116 |
+
sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
|
117 |
+
topic_label, topic_confidence = extract_topic(user_prompt)
|
118 |
|
119 |
+
# Generate AI response using Gemini.
|
120 |
+
model_gen = genai.GenerativeModel("gemini-1.5-pro")
|
121 |
+
ai_response = model_gen.generate_content(combined_prompt)
|
122 |
+
# Filter the generative response to show only the required sections.
|
123 |
+
filtered_response = filter_ai_response(ai_response.text)
|
124 |
+
return filtered_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence
|
125 |
+
except Exception as e:
|
126 |
+
return f"❌ Error: {e}", None, None, None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
db.py
CHANGED
@@ -3,11 +3,13 @@ import requests
|
|
3 |
import io
|
4 |
from pymongo import MongoClient
|
5 |
|
|
|
6 |
def get_mongo_client():
|
7 |
client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster")
|
8 |
db = client["sentiment_db"]
|
9 |
return db["tweets"]
|
10 |
|
|
|
11 |
def insert_data_if_empty():
|
12 |
collection = get_mongo_client()
|
13 |
if collection.count_documents({}) == 0:
|
@@ -22,29 +24,12 @@ def insert_data_if_empty():
|
|
22 |
except Exception as e:
|
23 |
print(f"❌ Error loading dataset: {e}")
|
24 |
|
25 |
-
|
|
|
26 |
collection = get_mongo_client()
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
results = list(collection.aggregate(pipeline))
|
31 |
-
mapping = {"0": "Negative", "2": "Neutral", "4": "Positive"}
|
32 |
-
summary_parts = []
|
33 |
-
total = 0
|
34 |
-
for doc in results:
|
35 |
-
target = str(doc["_id"])
|
36 |
-
count = doc["count"]
|
37 |
-
total += count
|
38 |
-
label = mapping.get(target, target)
|
39 |
-
summary_parts.append(f"{label}: {count}")
|
40 |
-
summary = f"Total tweets: {total}. " + ", ".join(summary_parts) + "."
|
41 |
-
return summary
|
42 |
-
|
43 |
-
def get_entry_by_index(index):
|
44 |
-
collection = get_mongo_client()
|
45 |
-
# Sort by _id (assumes insertion order), skip to the requested index, and get one document.
|
46 |
-
document = collection.find({}, {"_id": 0}).sort("_id", 1).skip(index).limit(1)
|
47 |
-
docs = list(document)
|
48 |
if docs:
|
49 |
-
return docs[0]
|
50 |
return None
|
|
|
3 |
import io
|
4 |
from pymongo import MongoClient
|
5 |
|
6 |
+
# Function to connect to MongoDB.
|
7 |
def get_mongo_client():
|
8 |
client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster")
|
9 |
db = client["sentiment_db"]
|
10 |
return db["tweets"]
|
11 |
|
12 |
+
# Function to insert data if the collection is empty.
|
13 |
def insert_data_if_empty():
|
14 |
collection = get_mongo_client()
|
15 |
if collection.count_documents({}) == 0:
|
|
|
24 |
except Exception as e:
|
25 |
print(f"❌ Error loading dataset: {e}")
|
26 |
|
27 |
+
# Function to fetch a specific entry (by index) from the dataset.
|
28 |
+
def get_entry_by_index(index=0):
|
29 |
collection = get_mongo_client()
|
30 |
+
# Skip 'index' documents and return one document.
|
31 |
+
doc_cursor = collection.find({}, {"_id": 0}).skip(index).limit(1)
|
32 |
+
docs = list(doc_cursor)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
if docs:
|
34 |
+
return docs[0]
|
35 |
return None
|