Spaces:
Sleeping
Sleeping
KrSharangrav
commited on
Commit
·
867c886
1
Parent(s):
be89ae1
changes done post new csv insertion
Browse files- app.py +2 -2
- chatbot.py +46 -36
app.py
CHANGED
@@ -6,10 +6,10 @@ from chatbot import chatbot_response
|
|
6 |
# Insert historical data into MongoDB if not already present
|
7 |
insert_data_if_empty()
|
8 |
|
9 |
-
# Connect to MongoDB (
|
10 |
collection = get_mongo_client()
|
11 |
|
12 |
-
st.subheader("💬 Chatbot with Sentiment
|
13 |
user_prompt = st.text_area("Ask me something:")
|
14 |
|
15 |
if st.button("Get AI Response"):
|
|
|
6 |
# Insert historical data into MongoDB if not already present
|
7 |
insert_data_if_empty()
|
8 |
|
9 |
+
# Connect to MongoDB (for potential further use)
|
10 |
collection = get_mongo_client()
|
11 |
|
12 |
+
st.subheader("💬 Chatbot with Sentiment & Topic Analysis")
|
13 |
user_prompt = st.text_area("Ask me something:")
|
14 |
|
15 |
if st.button("Get AI Response"):
|
chatbot.py
CHANGED
@@ -2,6 +2,8 @@ import os
|
|
2 |
import streamlit as st
|
3 |
import google.generativeai as genai
|
4 |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
|
|
|
|
|
5 |
|
6 |
# Configure Gemini API key
|
7 |
GEMINI_API_KEY = os.getenv("gemini_api")
|
@@ -56,45 +58,53 @@ def extract_topic(text):
|
|
56 |
except Exception as e:
|
57 |
return f"Error extracting topic: {e}", None
|
58 |
|
59 |
-
# Function to
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
def chatbot_response(user_prompt):
|
62 |
if not user_prompt:
|
63 |
return None, None, None, None, None
|
64 |
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
ai_response = model_gen.generate_content(user_prompt)
|
69 |
-
|
70 |
-
# Perform sentiment analysis on the user prompt
|
71 |
sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
|
72 |
-
|
73 |
-
# Perform topic extraction on the user prompt
|
74 |
topic_label, topic_confidence = extract_topic(user_prompt)
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
stats_str = ""
|
87 |
-
total = 0
|
88 |
-
for r in results:
|
89 |
-
key = sentiment_map.get(r["_id"], r["_id"])
|
90 |
-
count = r["count"]
|
91 |
-
total += count
|
92 |
-
stats_str += f"{key}: {count}\n"
|
93 |
-
stats_str += f"Total records: {total}"
|
94 |
-
ai_response_text = ai_response.text + "\n\nDataset Information:\n" + stats_str
|
95 |
-
else:
|
96 |
-
ai_response_text = ai_response.text
|
97 |
-
|
98 |
-
return ai_response_text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
|
99 |
-
except Exception as e:
|
100 |
-
return f"❌ Error: {e}", None, None, None, None
|
|
|
2 |
import streamlit as st
|
3 |
import google.generativeai as genai
|
4 |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
|
5 |
+
import pandas as pd
|
6 |
+
from db import get_mongo_client
|
7 |
|
8 |
# Configure Gemini API key
|
9 |
GEMINI_API_KEY = os.getenv("gemini_api")
|
|
|
58 |
except Exception as e:
|
59 |
return f"Error extracting topic: {e}", None
|
60 |
|
61 |
+
# Function to determine if the user's query is about the dataset
|
62 |
+
def is_dataset_query(text):
|
63 |
+
keywords = ["dataset", "data", "historical", "csv", "stored"]
|
64 |
+
text_lower = text.lower()
|
65 |
+
for keyword in keywords:
|
66 |
+
if keyword in text_lower:
|
67 |
+
return True
|
68 |
+
return False
|
69 |
+
|
70 |
+
# Function to retrieve insights from the dataset stored in MongoDB
|
71 |
+
def get_dataset_insights():
|
72 |
+
try:
|
73 |
+
collection = get_mongo_client()
|
74 |
+
data = list(collection.find({}, {"_id": 0}))
|
75 |
+
if not data:
|
76 |
+
return "The dataset in MongoDB is empty."
|
77 |
+
df = pd.DataFrame(data)
|
78 |
+
# Map the sentiment labels from sentiment140.csv: 0 -> Negative, 2 -> Neutral, 4 -> Positive.
|
79 |
+
sentiment_mapping = {0: "Negative", 2: "Neutral", 4: "Positive"}
|
80 |
+
if "target" in df.columns:
|
81 |
+
df['sentiment_label'] = df['target'].apply(lambda x: sentiment_mapping.get(int(x), "Unknown"))
|
82 |
+
summary = df['sentiment_label'].value_counts().to_dict()
|
83 |
+
summary_str = ", ".join([f"{k}: {v}" for k, v in summary.items()])
|
84 |
+
return f"The dataset sentiment distribution is: {summary_str}."
|
85 |
+
else:
|
86 |
+
return "The dataset does not have a 'target' field."
|
87 |
+
except Exception as e:
|
88 |
+
return f"Error retrieving dataset insights: {e}"
|
89 |
+
|
90 |
+
# Function to generate AI response along with sentiment and topic analysis
|
91 |
def chatbot_response(user_prompt):
|
92 |
if not user_prompt:
|
93 |
return None, None, None, None, None
|
94 |
|
95 |
+
# Check if the query is about the dataset
|
96 |
+
if is_dataset_query(user_prompt):
|
97 |
+
dataset_insights = get_dataset_insights()
|
|
|
|
|
|
|
98 |
sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
|
|
|
|
|
99 |
topic_label, topic_confidence = extract_topic(user_prompt)
|
100 |
+
return dataset_insights, sentiment_label, sentiment_confidence, topic_label, topic_confidence
|
101 |
+
else:
|
102 |
+
try:
|
103 |
+
# Generate AI response using Gemini
|
104 |
+
model_gen = genai.GenerativeModel("gemini-1.5-pro")
|
105 |
+
ai_response = model_gen.generate_content(user_prompt)
|
106 |
+
sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
|
107 |
+
topic_label, topic_confidence = extract_topic(user_prompt)
|
108 |
+
return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
|
109 |
+
except Exception as e:
|
110 |
+
return f"❌ Error: {e}", None, None, None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|