Spaces:
Sleeping
Sleeping
KrSharangrav
commited on
Commit
Β·
8d3fcda
1
Parent(s):
979706a
changes in the logic
Browse files- app.py +6 -6
- chatbot.py +28 -65
- db.py +0 -4
app.py
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
import streamlit as st
|
|
|
2 |
from db import insert_data_if_empty, get_mongo_client
|
3 |
from chatbot import chatbot_response
|
4 |
|
5 |
-
#
|
6 |
insert_data_if_empty()
|
7 |
|
8 |
-
#
|
9 |
collection = get_mongo_client()
|
10 |
|
11 |
-
st.subheader("π¬ Chatbot
|
12 |
-
st.
|
13 |
-
user_prompt = st.text_area("Your Query:")
|
14 |
|
15 |
if st.button("Get AI Response"):
|
16 |
ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence = chatbot_response(user_prompt)
|
@@ -22,4 +22,4 @@ if st.button("Get AI Response"):
|
|
22 |
st.write("### Category Extraction:")
|
23 |
st.write(f"**Detected Category:** {topic_label} ({topic_confidence:.2f} confidence)")
|
24 |
else:
|
25 |
-
st.warning("β οΈ Please enter a
|
|
|
1 |
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
from db import insert_data_if_empty, get_mongo_client
|
4 |
from chatbot import chatbot_response
|
5 |
|
6 |
+
# Insert the dataset into MongoDB if not already present.
|
7 |
insert_data_if_empty()
|
8 |
|
9 |
+
# Connect to MongoDB (useful for potential visualizations)
|
10 |
collection = get_mongo_client()
|
11 |
|
12 |
+
st.subheader("π¬ Chatbot: Analyze MongoDB Entries")
|
13 |
+
user_prompt = st.text_area("Ask me something (e.g., 'Provide analysis for the data entry 1 in the dataset'):")
|
|
|
14 |
|
15 |
if st.button("Get AI Response"):
|
16 |
ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence = chatbot_response(user_prompt)
|
|
|
22 |
st.write("### Category Extraction:")
|
23 |
st.write(f"**Detected Category:** {topic_label} ({topic_confidence:.2f} confidence)")
|
24 |
else:
|
25 |
+
st.warning("β οΈ Please enter a question or text for analysis.")
|
chatbot.py
CHANGED
@@ -3,7 +3,7 @@ import re
|
|
3 |
import streamlit as st
|
4 |
import google.generativeai as genai
|
5 |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
|
6 |
-
from db import get_entry_by_index
|
7 |
|
8 |
# Configure Gemini API key
|
9 |
GEMINI_API_KEY = os.getenv("gemini_api")
|
@@ -33,94 +33,57 @@ TOPIC_LABELS = [
|
|
33 |
"Health", "Science", "Education", "Finance", "Travel", "Food"
|
34 |
]
|
35 |
|
36 |
-
# Function to analyze sentiment using the pre-trained model
|
37 |
def analyze_sentiment(text):
|
38 |
try:
|
39 |
-
|
40 |
-
label =
|
41 |
-
score =
|
42 |
-
|
43 |
-
|
44 |
-
"LABEL_1": "Neutral",
|
45 |
-
"LABEL_2": "Positive"
|
46 |
-
}
|
47 |
-
return sentiment_mapping.get(label, "Unknown"), score
|
48 |
except Exception as e:
|
49 |
return f"Error analyzing sentiment: {e}", None
|
50 |
|
51 |
-
# Function to extract topic using zero-shot classification
|
52 |
def extract_topic(text):
|
53 |
try:
|
54 |
-
|
55 |
-
top_topic =
|
56 |
-
confidence =
|
57 |
return top_topic, confidence
|
58 |
except Exception as e:
|
59 |
return f"Error extracting topic: {e}", None
|
60 |
|
61 |
-
#
|
62 |
-
|
63 |
-
|
64 |
-
match = re.search(
|
65 |
if match:
|
66 |
-
# Convert to
|
67 |
-
|
68 |
-
|
|
|
69 |
|
70 |
-
# Helper to filter the generative response.
|
71 |
-
# We expect the response to contain:
|
72 |
-
# "Let's break down this tweet-like MongoDB entry:" followed by text,
|
73 |
-
# then "Conclusion:" followed by text.
|
74 |
-
# We remove any extra parts and remove the header "Conclusion:".
|
75 |
-
def filter_ai_response(ai_text):
|
76 |
-
breakdown_marker = "Let's break down this tweet-like MongoDB entry:"
|
77 |
-
conclusion_marker = "Conclusion:"
|
78 |
-
if breakdown_marker in ai_text and conclusion_marker in ai_text:
|
79 |
-
# Split into two parts.
|
80 |
-
parts = ai_text.split(breakdown_marker, 1)[1]
|
81 |
-
breakdown_part, conclusion_part = parts.split(conclusion_marker, 1)
|
82 |
-
# Rebuild output with the breakdown section and the conclusion content (without the header)
|
83 |
-
filtered = breakdown_marker + "\n" + breakdown_part.strip() + "\n" + conclusion_part.strip()
|
84 |
-
return filtered
|
85 |
-
else:
|
86 |
-
# If the markers aren't found, return the original text.
|
87 |
-
return ai_text
|
88 |
-
|
89 |
-
# Main function to generate AI response along with sentiment and category analysis.
|
90 |
-
# If the prompt asks for a specific entry, fetch its "text" from MongoDB and build a custom prompt.
|
91 |
def chatbot_response(user_prompt):
|
92 |
if not user_prompt:
|
93 |
return None, None, None, None, None
|
94 |
-
|
95 |
try:
|
96 |
-
|
97 |
-
if
|
98 |
-
entry = get_entry_by_index(
|
99 |
if entry is None:
|
100 |
return "β No entry found for the requested index.", None, None, None, None
|
101 |
entry_text = entry.get("text", "No text available.")
|
102 |
-
#
|
103 |
-
|
104 |
-
|
105 |
-
f"{entry_text}\n\n"
|
106 |
-
"Please respond in the following format:\n"
|
107 |
-
"Let's break down this tweet-like MongoDB entry:\n[Your detailed analysis here]\n"
|
108 |
-
"Conclusion:\n[Your conclusion here]"
|
109 |
-
)
|
110 |
-
# Run sentiment and topic analysis on the entry's text.
|
111 |
sentiment_label, sentiment_confidence = analyze_sentiment(entry_text)
|
112 |
topic_label, topic_confidence = extract_topic(entry_text)
|
|
|
113 |
else:
|
114 |
-
#
|
115 |
-
|
|
|
116 |
sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
|
117 |
topic_label, topic_confidence = extract_topic(user_prompt)
|
118 |
-
|
119 |
-
# Generate AI response using Gemini.
|
120 |
-
model_gen = genai.GenerativeModel("gemini-1.5-pro")
|
121 |
-
ai_response = model_gen.generate_content(combined_prompt)
|
122 |
-
# Filter the generative response to show only the required sections.
|
123 |
-
filtered_response = filter_ai_response(ai_response.text)
|
124 |
-
return filtered_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence
|
125 |
except Exception as e:
|
126 |
return f"β Error: {e}", None, None, None, None
|
|
|
3 |
import streamlit as st
|
4 |
import google.generativeai as genai
|
5 |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
|
6 |
+
from db import get_entry_by_index
|
7 |
|
8 |
# Configure Gemini API key
|
9 |
GEMINI_API_KEY = os.getenv("gemini_api")
|
|
|
33 |
"Health", "Science", "Education", "Finance", "Travel", "Food"
|
34 |
]
|
35 |
|
|
|
36 |
def analyze_sentiment(text):
|
37 |
try:
|
38 |
+
result = sentiment_pipeline(text)[0]
|
39 |
+
label = result['label']
|
40 |
+
score = result['score']
|
41 |
+
mapping = {"LABEL_0": "Negative", "LABEL_1": "Neutral", "LABEL_2": "Positive"}
|
42 |
+
return mapping.get(label, "Unknown"), score
|
|
|
|
|
|
|
|
|
43 |
except Exception as e:
|
44 |
return f"Error analyzing sentiment: {e}", None
|
45 |
|
|
|
46 |
def extract_topic(text):
|
47 |
try:
|
48 |
+
result = topic_pipeline(text, TOPIC_LABELS)
|
49 |
+
top_topic = result["labels"][0]
|
50 |
+
confidence = result["scores"][0]
|
51 |
return top_topic, confidence
|
52 |
except Exception as e:
|
53 |
return f"Error extracting topic: {e}", None
|
54 |
|
55 |
+
# Detect queries like "data entry 1" or "entry 3" (case-insensitive)
|
56 |
+
def is_entry_query(prompt):
|
57 |
+
pattern = r"(?:data entry|entry)\s*(\d+)"
|
58 |
+
match = re.search(pattern, prompt, re.IGNORECASE)
|
59 |
if match:
|
60 |
+
# Convert to index (assuming user numbering starts at 1)
|
61 |
+
index = int(match.group(1)) - 1
|
62 |
+
return True, index
|
63 |
+
return False, None
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
def chatbot_response(user_prompt):
|
66 |
if not user_prompt:
|
67 |
return None, None, None, None, None
|
|
|
68 |
try:
|
69 |
+
entry_query, index = is_entry_query(user_prompt)
|
70 |
+
if entry_query:
|
71 |
+
entry = get_entry_by_index(index)
|
72 |
if entry is None:
|
73 |
return "β No entry found for the requested index.", None, None, None, None
|
74 |
entry_text = entry.get("text", "No text available.")
|
75 |
+
# Fixed AI response for entry queries (as per instructions)
|
76 |
+
ai_response_text = "Let's break down this tweet-like MongoDB entry:"
|
77 |
+
# Analyze the entry's text
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
sentiment_label, sentiment_confidence = analyze_sentiment(entry_text)
|
79 |
topic_label, topic_confidence = extract_topic(entry_text)
|
80 |
+
return ai_response_text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
|
81 |
else:
|
82 |
+
# For non-entry queries, fallback to the generative model as usual.
|
83 |
+
model_gen = genai.GenerativeModel("gemini-1.5-pro")
|
84 |
+
ai_response = model_gen.generate_content(user_prompt)
|
85 |
sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
|
86 |
topic_label, topic_confidence = extract_topic(user_prompt)
|
87 |
+
return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
except Exception as e:
|
89 |
return f"β Error: {e}", None, None, None, None
|
db.py
CHANGED
@@ -3,13 +3,11 @@ import requests
|
|
3 |
import io
|
4 |
from pymongo import MongoClient
|
5 |
|
6 |
-
# Function to connect to MongoDB.
|
7 |
def get_mongo_client():
|
8 |
client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster")
|
9 |
db = client["sentiment_db"]
|
10 |
return db["tweets"]
|
11 |
|
12 |
-
# Function to insert data if the collection is empty.
|
13 |
def insert_data_if_empty():
|
14 |
collection = get_mongo_client()
|
15 |
if collection.count_documents({}) == 0:
|
@@ -24,10 +22,8 @@ def insert_data_if_empty():
|
|
24 |
except Exception as e:
|
25 |
print(f"β Error loading dataset: {e}")
|
26 |
|
27 |
-
# Function to fetch a specific entry (by index) from the dataset.
|
28 |
def get_entry_by_index(index=0):
|
29 |
collection = get_mongo_client()
|
30 |
-
# Skip 'index' documents and return one document.
|
31 |
doc_cursor = collection.find({}, {"_id": 0}).skip(index).limit(1)
|
32 |
docs = list(doc_cursor)
|
33 |
if docs:
|
|
|
3 |
import io
|
4 |
from pymongo import MongoClient
|
5 |
|
|
|
6 |
def get_mongo_client():
|
7 |
client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster")
|
8 |
db = client["sentiment_db"]
|
9 |
return db["tweets"]
|
10 |
|
|
|
11 |
def insert_data_if_empty():
|
12 |
collection = get_mongo_client()
|
13 |
if collection.count_documents({}) == 0:
|
|
|
22 |
except Exception as e:
|
23 |
print(f"β Error loading dataset: {e}")
|
24 |
|
|
|
25 |
def get_entry_by_index(index=0):
|
26 |
collection = get_mongo_client()
|
|
|
27 |
doc_cursor = collection.find({}, {"_id": 0}).skip(index).limit(1)
|
28 |
docs = list(doc_cursor)
|
29 |
if docs:
|