Spaces:
Sleeping
Sleeping
KrSharangrav
commited on
Commit
Β·
8d3fcda
1
Parent(s):
979706a
changes in the logic
Browse files- app.py +6 -6
- chatbot.py +28 -65
- db.py +0 -4
app.py
CHANGED
|
@@ -1,16 +1,16 @@
|
|
| 1 |
import streamlit as st
|
|
|
|
| 2 |
from db import insert_data_if_empty, get_mongo_client
|
| 3 |
from chatbot import chatbot_response
|
| 4 |
|
| 5 |
-
#
|
| 6 |
insert_data_if_empty()
|
| 7 |
|
| 8 |
-
#
|
| 9 |
collection = get_mongo_client()
|
| 10 |
|
| 11 |
-
st.subheader("π¬ Chatbot
|
| 12 |
-
st.
|
| 13 |
-
user_prompt = st.text_area("Your Query:")
|
| 14 |
|
| 15 |
if st.button("Get AI Response"):
|
| 16 |
ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence = chatbot_response(user_prompt)
|
|
@@ -22,4 +22,4 @@ if st.button("Get AI Response"):
|
|
| 22 |
st.write("### Category Extraction:")
|
| 23 |
st.write(f"**Detected Category:** {topic_label} ({topic_confidence:.2f} confidence)")
|
| 24 |
else:
|
| 25 |
-
st.warning("β οΈ Please enter a
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
from db import insert_data_if_empty, get_mongo_client
|
| 4 |
from chatbot import chatbot_response
|
| 5 |
|
| 6 |
+
# Insert the dataset into MongoDB if not already present.
|
| 7 |
insert_data_if_empty()
|
| 8 |
|
| 9 |
+
# Connect to MongoDB (useful for potential visualizations)
|
| 10 |
collection = get_mongo_client()
|
| 11 |
|
| 12 |
+
st.subheader("π¬ Chatbot: Analyze MongoDB Entries")
|
| 13 |
+
user_prompt = st.text_area("Ask me something (e.g., 'Provide analysis for the data entry 1 in the dataset'):")
|
|
|
|
| 14 |
|
| 15 |
if st.button("Get AI Response"):
|
| 16 |
ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence = chatbot_response(user_prompt)
|
|
|
|
| 22 |
st.write("### Category Extraction:")
|
| 23 |
st.write(f"**Detected Category:** {topic_label} ({topic_confidence:.2f} confidence)")
|
| 24 |
else:
|
| 25 |
+
st.warning("β οΈ Please enter a question or text for analysis.")
|
chatbot.py
CHANGED
|
@@ -3,7 +3,7 @@ import re
|
|
| 3 |
import streamlit as st
|
| 4 |
import google.generativeai as genai
|
| 5 |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
|
| 6 |
-
from db import get_entry_by_index
|
| 7 |
|
| 8 |
# Configure Gemini API key
|
| 9 |
GEMINI_API_KEY = os.getenv("gemini_api")
|
|
@@ -33,94 +33,57 @@ TOPIC_LABELS = [
|
|
| 33 |
"Health", "Science", "Education", "Finance", "Travel", "Food"
|
| 34 |
]
|
| 35 |
|
| 36 |
-
# Function to analyze sentiment using the pre-trained model
|
| 37 |
def analyze_sentiment(text):
|
| 38 |
try:
|
| 39 |
-
|
| 40 |
-
label =
|
| 41 |
-
score =
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
"LABEL_1": "Neutral",
|
| 45 |
-
"LABEL_2": "Positive"
|
| 46 |
-
}
|
| 47 |
-
return sentiment_mapping.get(label, "Unknown"), score
|
| 48 |
except Exception as e:
|
| 49 |
return f"Error analyzing sentiment: {e}", None
|
| 50 |
|
| 51 |
-
# Function to extract topic using zero-shot classification
|
| 52 |
def extract_topic(text):
|
| 53 |
try:
|
| 54 |
-
|
| 55 |
-
top_topic =
|
| 56 |
-
confidence =
|
| 57 |
return top_topic, confidence
|
| 58 |
except Exception as e:
|
| 59 |
return f"Error extracting topic: {e}", None
|
| 60 |
|
| 61 |
-
#
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
match = re.search(
|
| 65 |
if match:
|
| 66 |
-
# Convert to
|
| 67 |
-
|
| 68 |
-
|
|
|
|
| 69 |
|
| 70 |
-
# Helper to filter the generative response.
|
| 71 |
-
# We expect the response to contain:
|
| 72 |
-
# "Let's break down this tweet-like MongoDB entry:" followed by text,
|
| 73 |
-
# then "Conclusion:" followed by text.
|
| 74 |
-
# We remove any extra parts and remove the header "Conclusion:".
|
| 75 |
-
def filter_ai_response(ai_text):
|
| 76 |
-
breakdown_marker = "Let's break down this tweet-like MongoDB entry:"
|
| 77 |
-
conclusion_marker = "Conclusion:"
|
| 78 |
-
if breakdown_marker in ai_text and conclusion_marker in ai_text:
|
| 79 |
-
# Split into two parts.
|
| 80 |
-
parts = ai_text.split(breakdown_marker, 1)[1]
|
| 81 |
-
breakdown_part, conclusion_part = parts.split(conclusion_marker, 1)
|
| 82 |
-
# Rebuild output with the breakdown section and the conclusion content (without the header)
|
| 83 |
-
filtered = breakdown_marker + "\n" + breakdown_part.strip() + "\n" + conclusion_part.strip()
|
| 84 |
-
return filtered
|
| 85 |
-
else:
|
| 86 |
-
# If the markers aren't found, return the original text.
|
| 87 |
-
return ai_text
|
| 88 |
-
|
| 89 |
-
# Main function to generate AI response along with sentiment and category analysis.
|
| 90 |
-
# If the prompt asks for a specific entry, fetch its "text" from MongoDB and build a custom prompt.
|
| 91 |
def chatbot_response(user_prompt):
|
| 92 |
if not user_prompt:
|
| 93 |
return None, None, None, None, None
|
| 94 |
-
|
| 95 |
try:
|
| 96 |
-
|
| 97 |
-
if
|
| 98 |
-
entry = get_entry_by_index(
|
| 99 |
if entry is None:
|
| 100 |
return "β No entry found for the requested index.", None, None, None, None
|
| 101 |
entry_text = entry.get("text", "No text available.")
|
| 102 |
-
#
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
f"{entry_text}\n\n"
|
| 106 |
-
"Please respond in the following format:\n"
|
| 107 |
-
"Let's break down this tweet-like MongoDB entry:\n[Your detailed analysis here]\n"
|
| 108 |
-
"Conclusion:\n[Your conclusion here]"
|
| 109 |
-
)
|
| 110 |
-
# Run sentiment and topic analysis on the entry's text.
|
| 111 |
sentiment_label, sentiment_confidence = analyze_sentiment(entry_text)
|
| 112 |
topic_label, topic_confidence = extract_topic(entry_text)
|
|
|
|
| 113 |
else:
|
| 114 |
-
#
|
| 115 |
-
|
|
|
|
| 116 |
sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
|
| 117 |
topic_label, topic_confidence = extract_topic(user_prompt)
|
| 118 |
-
|
| 119 |
-
# Generate AI response using Gemini.
|
| 120 |
-
model_gen = genai.GenerativeModel("gemini-1.5-pro")
|
| 121 |
-
ai_response = model_gen.generate_content(combined_prompt)
|
| 122 |
-
# Filter the generative response to show only the required sections.
|
| 123 |
-
filtered_response = filter_ai_response(ai_response.text)
|
| 124 |
-
return filtered_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence
|
| 125 |
except Exception as e:
|
| 126 |
return f"β Error: {e}", None, None, None, None
|
|
|
|
| 3 |
import streamlit as st
|
| 4 |
import google.generativeai as genai
|
| 5 |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
|
| 6 |
+
from db import get_entry_by_index
|
| 7 |
|
| 8 |
# Configure Gemini API key
|
| 9 |
GEMINI_API_KEY = os.getenv("gemini_api")
|
|
|
|
| 33 |
"Health", "Science", "Education", "Finance", "Travel", "Food"
|
| 34 |
]
|
| 35 |
|
|
|
|
| 36 |
def analyze_sentiment(text):
|
| 37 |
try:
|
| 38 |
+
result = sentiment_pipeline(text)[0]
|
| 39 |
+
label = result['label']
|
| 40 |
+
score = result['score']
|
| 41 |
+
mapping = {"LABEL_0": "Negative", "LABEL_1": "Neutral", "LABEL_2": "Positive"}
|
| 42 |
+
return mapping.get(label, "Unknown"), score
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
except Exception as e:
|
| 44 |
return f"Error analyzing sentiment: {e}", None
|
| 45 |
|
|
|
|
| 46 |
def extract_topic(text):
|
| 47 |
try:
|
| 48 |
+
result = topic_pipeline(text, TOPIC_LABELS)
|
| 49 |
+
top_topic = result["labels"][0]
|
| 50 |
+
confidence = result["scores"][0]
|
| 51 |
return top_topic, confidence
|
| 52 |
except Exception as e:
|
| 53 |
return f"Error extracting topic: {e}", None
|
| 54 |
|
| 55 |
+
# Detect queries like "data entry 1" or "entry 3" (case-insensitive)
|
| 56 |
+
def is_entry_query(prompt):
|
| 57 |
+
pattern = r"(?:data entry|entry)\s*(\d+)"
|
| 58 |
+
match = re.search(pattern, prompt, re.IGNORECASE)
|
| 59 |
if match:
|
| 60 |
+
# Convert to index (assuming user numbering starts at 1)
|
| 61 |
+
index = int(match.group(1)) - 1
|
| 62 |
+
return True, index
|
| 63 |
+
return False, None
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
def chatbot_response(user_prompt):
|
| 66 |
if not user_prompt:
|
| 67 |
return None, None, None, None, None
|
|
|
|
| 68 |
try:
|
| 69 |
+
entry_query, index = is_entry_query(user_prompt)
|
| 70 |
+
if entry_query:
|
| 71 |
+
entry = get_entry_by_index(index)
|
| 72 |
if entry is None:
|
| 73 |
return "β No entry found for the requested index.", None, None, None, None
|
| 74 |
entry_text = entry.get("text", "No text available.")
|
| 75 |
+
# Fixed AI response for entry queries (as per instructions)
|
| 76 |
+
ai_response_text = "Let's break down this tweet-like MongoDB entry:"
|
| 77 |
+
# Analyze the entry's text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
sentiment_label, sentiment_confidence = analyze_sentiment(entry_text)
|
| 79 |
topic_label, topic_confidence = extract_topic(entry_text)
|
| 80 |
+
return ai_response_text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
|
| 81 |
else:
|
| 82 |
+
# For non-entry queries, fallback to the generative model as usual.
|
| 83 |
+
model_gen = genai.GenerativeModel("gemini-1.5-pro")
|
| 84 |
+
ai_response = model_gen.generate_content(user_prompt)
|
| 85 |
sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
|
| 86 |
topic_label, topic_confidence = extract_topic(user_prompt)
|
| 87 |
+
return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
except Exception as e:
|
| 89 |
return f"β Error: {e}", None, None, None, None
|
db.py
CHANGED
|
@@ -3,13 +3,11 @@ import requests
|
|
| 3 |
import io
|
| 4 |
from pymongo import MongoClient
|
| 5 |
|
| 6 |
-
# Function to connect to MongoDB.
|
| 7 |
def get_mongo_client():
|
| 8 |
client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster")
|
| 9 |
db = client["sentiment_db"]
|
| 10 |
return db["tweets"]
|
| 11 |
|
| 12 |
-
# Function to insert data if the collection is empty.
|
| 13 |
def insert_data_if_empty():
|
| 14 |
collection = get_mongo_client()
|
| 15 |
if collection.count_documents({}) == 0:
|
|
@@ -24,10 +22,8 @@ def insert_data_if_empty():
|
|
| 24 |
except Exception as e:
|
| 25 |
print(f"β Error loading dataset: {e}")
|
| 26 |
|
| 27 |
-
# Function to fetch a specific entry (by index) from the dataset.
|
| 28 |
def get_entry_by_index(index=0):
|
| 29 |
collection = get_mongo_client()
|
| 30 |
-
# Skip 'index' documents and return one document.
|
| 31 |
doc_cursor = collection.find({}, {"_id": 0}).skip(index).limit(1)
|
| 32 |
docs = list(doc_cursor)
|
| 33 |
if docs:
|
|
|
|
| 3 |
import io
|
| 4 |
from pymongo import MongoClient
|
| 5 |
|
|
|
|
| 6 |
def get_mongo_client():
|
| 7 |
client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster")
|
| 8 |
db = client["sentiment_db"]
|
| 9 |
return db["tweets"]
|
| 10 |
|
|
|
|
| 11 |
def insert_data_if_empty():
|
| 12 |
collection = get_mongo_client()
|
| 13 |
if collection.count_documents({}) == 0:
|
|
|
|
| 22 |
except Exception as e:
|
| 23 |
print(f"β Error loading dataset: {e}")
|
| 24 |
|
|
|
|
| 25 |
def get_entry_by_index(index=0):
|
| 26 |
collection = get_mongo_client()
|
|
|
|
| 27 |
doc_cursor = collection.find({}, {"_id": 0}).skip(index).limit(1)
|
| 28 |
docs = list(doc_cursor)
|
| 29 |
if docs:
|