Spaces:
Sleeping
Sleeping
KrSharangrav
commited on
Commit
·
867c886
1
Parent(s):
be89ae1
changes done post new csv insertion
Browse files- app.py +2 -2
- chatbot.py +46 -36
app.py
CHANGED
|
@@ -6,10 +6,10 @@ from chatbot import chatbot_response
|
|
| 6 |
# Insert historical data into MongoDB if not already present
|
| 7 |
insert_data_if_empty()
|
| 8 |
|
| 9 |
-
# Connect to MongoDB (
|
| 10 |
collection = get_mongo_client()
|
| 11 |
|
| 12 |
-
st.subheader("💬 Chatbot with Sentiment
|
| 13 |
user_prompt = st.text_area("Ask me something:")
|
| 14 |
|
| 15 |
if st.button("Get AI Response"):
|
|
|
|
| 6 |
# Insert historical data into MongoDB if not already present
|
| 7 |
insert_data_if_empty()
|
| 8 |
|
| 9 |
+
# Connect to MongoDB (for potential further use)
|
| 10 |
collection = get_mongo_client()
|
| 11 |
|
| 12 |
+
st.subheader("💬 Chatbot with Sentiment & Topic Analysis")
|
| 13 |
user_prompt = st.text_area("Ask me something:")
|
| 14 |
|
| 15 |
if st.button("Get AI Response"):
|
chatbot.py
CHANGED
|
@@ -2,6 +2,8 @@ import os
|
|
| 2 |
import streamlit as st
|
| 3 |
import google.generativeai as genai
|
| 4 |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
|
|
|
|
|
|
|
| 5 |
|
| 6 |
# Configure Gemini API key
|
| 7 |
GEMINI_API_KEY = os.getenv("gemini_api")
|
|
@@ -56,45 +58,53 @@ def extract_topic(text):
|
|
| 56 |
except Exception as e:
|
| 57 |
return f"Error extracting topic: {e}", None
|
| 58 |
|
| 59 |
-
# Function to
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
def chatbot_response(user_prompt):
|
| 62 |
if not user_prompt:
|
| 63 |
return None, None, None, None, None
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
ai_response = model_gen.generate_content(user_prompt)
|
| 69 |
-
|
| 70 |
-
# Perform sentiment analysis on the user prompt
|
| 71 |
sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
|
| 72 |
-
|
| 73 |
-
# Perform topic extraction on the user prompt
|
| 74 |
topic_label, topic_confidence = extract_topic(user_prompt)
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
stats_str = ""
|
| 87 |
-
total = 0
|
| 88 |
-
for r in results:
|
| 89 |
-
key = sentiment_map.get(r["_id"], r["_id"])
|
| 90 |
-
count = r["count"]
|
| 91 |
-
total += count
|
| 92 |
-
stats_str += f"{key}: {count}\n"
|
| 93 |
-
stats_str += f"Total records: {total}"
|
| 94 |
-
ai_response_text = ai_response.text + "\n\nDataset Information:\n" + stats_str
|
| 95 |
-
else:
|
| 96 |
-
ai_response_text = ai_response.text
|
| 97 |
-
|
| 98 |
-
return ai_response_text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
|
| 99 |
-
except Exception as e:
|
| 100 |
-
return f"❌ Error: {e}", None, None, None, None
|
|
|
|
| 2 |
import streamlit as st
|
| 3 |
import google.generativeai as genai
|
| 4 |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
|
| 5 |
+
import pandas as pd
|
| 6 |
+
from db import get_mongo_client
|
| 7 |
|
| 8 |
# Configure Gemini API key
|
| 9 |
GEMINI_API_KEY = os.getenv("gemini_api")
|
|
|
|
| 58 |
except Exception as e:
|
| 59 |
return f"Error extracting topic: {e}", None
|
| 60 |
|
| 61 |
+
# Function to determine if the user's query is about the dataset
|
| 62 |
+
def is_dataset_query(text):
|
| 63 |
+
keywords = ["dataset", "data", "historical", "csv", "stored"]
|
| 64 |
+
text_lower = text.lower()
|
| 65 |
+
for keyword in keywords:
|
| 66 |
+
if keyword in text_lower:
|
| 67 |
+
return True
|
| 68 |
+
return False
|
| 69 |
+
|
| 70 |
+
# Function to retrieve insights from the dataset stored in MongoDB
|
| 71 |
+
def get_dataset_insights():
|
| 72 |
+
try:
|
| 73 |
+
collection = get_mongo_client()
|
| 74 |
+
data = list(collection.find({}, {"_id": 0}))
|
| 75 |
+
if not data:
|
| 76 |
+
return "The dataset in MongoDB is empty."
|
| 77 |
+
df = pd.DataFrame(data)
|
| 78 |
+
# Map the sentiment labels from sentiment140.csv: 0 -> Negative, 2 -> Neutral, 4 -> Positive.
|
| 79 |
+
sentiment_mapping = {0: "Negative", 2: "Neutral", 4: "Positive"}
|
| 80 |
+
if "target" in df.columns:
|
| 81 |
+
df['sentiment_label'] = df['target'].apply(lambda x: sentiment_mapping.get(int(x), "Unknown"))
|
| 82 |
+
summary = df['sentiment_label'].value_counts().to_dict()
|
| 83 |
+
summary_str = ", ".join([f"{k}: {v}" for k, v in summary.items()])
|
| 84 |
+
return f"The dataset sentiment distribution is: {summary_str}."
|
| 85 |
+
else:
|
| 86 |
+
return "The dataset does not have a 'target' field."
|
| 87 |
+
except Exception as e:
|
| 88 |
+
return f"Error retrieving dataset insights: {e}"
|
| 89 |
+
|
| 90 |
+
# Function to generate AI response along with sentiment and topic analysis
|
| 91 |
def chatbot_response(user_prompt):
|
| 92 |
if not user_prompt:
|
| 93 |
return None, None, None, None, None
|
| 94 |
|
| 95 |
+
# Check if the query is about the dataset
|
| 96 |
+
if is_dataset_query(user_prompt):
|
| 97 |
+
dataset_insights = get_dataset_insights()
|
|
|
|
|
|
|
|
|
|
| 98 |
sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
|
|
|
|
|
|
|
| 99 |
topic_label, topic_confidence = extract_topic(user_prompt)
|
| 100 |
+
return dataset_insights, sentiment_label, sentiment_confidence, topic_label, topic_confidence
|
| 101 |
+
else:
|
| 102 |
+
try:
|
| 103 |
+
# Generate AI response using Gemini
|
| 104 |
+
model_gen = genai.GenerativeModel("gemini-1.5-pro")
|
| 105 |
+
ai_response = model_gen.generate_content(user_prompt)
|
| 106 |
+
sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
|
| 107 |
+
topic_label, topic_confidence = extract_topic(user_prompt)
|
| 108 |
+
return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
|
| 109 |
+
except Exception as e:
|
| 110 |
+
return f"❌ Error: {e}", None, None, None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|