Spaces:

sharangrav24
/

SentimentAnalysis

Sleeping

App Files Files Community

KrSharangrav commited on Mar 15

Commit

979706a

1 Parent(s): 6e2dc41

change in the interaction

Browse files

Files changed (3) hide show

app.py +6 -7
chatbot.py +58 -53
db.py +8 -23

app.py CHANGED Viewed

@@ -1,17 +1,16 @@
 import streamlit as st
-import pandas as pd
 from db import insert_data_if_empty, get_mongo_client
 from chatbot import chatbot_response
-# Insert historical data into MongoDB if not already present.
 insert_data_if_empty()
-# Connect to MongoDB (for additional visualizations if needed).
 collection = get_mongo_client()
-st.subheader("💬 Chatbot with Sentiment, Topic Analysis, and Dataset Entry Insights")
-user_prompt = st.text_area(
-    "Ask me something (e.g., 'What is the sentiment and category for the first data entry in the dataset'):")
 if st.button("Get AI Response"):
     ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence = chatbot_response(user_prompt)
@@ -23,4 +22,4 @@ if st.button("Get AI Response"):
         st.write("### Category Extraction:")
         st.write(f"**Detected Category:** {topic_label} ({topic_confidence:.2f} confidence)")
     else:
-        st.warning("⚠️ Please enter a question or text for analysis.")

 import streamlit as st
 from db import insert_data_if_empty, get_mongo_client
 from chatbot import chatbot_response
+# Ensure the historical data is inserted into MongoDB if not already present.
 insert_data_if_empty()
+# (Optional) Connect to MongoDB for further visualization if needed.
 collection = get_mongo_client()
+st.subheader("💬 Chatbot with Analysis for Specific MongoDB Entries")
+st.write("Ask me something (e.g., 'Provide analysis for the data entry 1 in the dataset'): ")
+user_prompt = st.text_area("Your Query:")
 if st.button("Get AI Response"):
     ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence = chatbot_response(user_prompt)
         st.write("### Category Extraction:")
         st.write(f"**Detected Category:** {topic_label} ({topic_confidence:.2f} confidence)")
     else:
+        st.warning("⚠️ Please enter a valid query for analysis.")

chatbot.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import os
 import streamlit as st
 import google.generativeai as genai
 from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
-from db import get_dataset_summary, get_entry_by_index
 # Configure Gemini API key
 GEMINI_API_KEY = os.getenv("gemini_api")
@@ -32,6 +33,7 @@ TOPIC_LABELS = [
     "Health", "Science", "Education", "Finance", "Travel", "Food"
 ]
 def analyze_sentiment(text):
     try:
         sentiment_result = sentiment_pipeline(text)[0]
@@ -46,6 +48,7 @@ def analyze_sentiment(text):
     except Exception as e:
         return f"Error analyzing sentiment: {e}", None
 def extract_topic(text):
     try:
         topic_result = topic_pipeline(text, TOPIC_LABELS)
@@ -55,67 +58,69 @@ def extract_topic(text):
     except Exception as e:
         return f"Error extracting topic: {e}", None
-def is_dataset_query(prompt):
-    keywords = ["dataset", "data", "csv", "mongodb", "historical"]
-    return any(keyword in prompt.lower() for keyword in keywords)
-def extract_entry_index(prompt):
-    # Map ordinal words to indices (0-indexed)
-    ordinals = {
-        "first": 0,
-        "1st": 0,
-        "second": 1,
-        "2nd": 1,
-        "third": 2,
-        "3rd": 2,
-        "fourth": 3,
-        "4th": 3,
-        "fifth": 4,
-        "5th": 4,
-    }
-    for word, index in ordinals.items():
-        if word in prompt.lower():
-            return index
     return None
 def chatbot_response(user_prompt):
     if not user_prompt:
         return None, None, None, None, None
-    # Check if the query is about a specific dataset entry.
-    entry_index = extract_entry_index(user_prompt)
-    if entry_index is not None:
-        entry_text = get_entry_by_index(entry_index)
-        if entry_text:
-            # Create a combined prompt for Gemini to generate detailed insights.
             combined_prompt = (
-                f"Analyze the following dataset entry from MongoDB:\n\n{entry_text}\n\n"
-                "Provide detailed insights, including sentiment analysis and category extraction."
             )
-            model_gen = genai.GenerativeModel("gemini-1.5-pro")
-            ai_response = model_gen.generate_content(combined_prompt)
-            # Analyze the entry text.
             sentiment_label, sentiment_confidence = analyze_sentiment(entry_text)
             topic_label, topic_confidence = extract_topic(entry_text)
-            return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
         else:
-            return f"❌ No entry found for index {entry_index+1}.", None, None, None, None
-    # Otherwise, if the query is about the dataset in general.
-    if is_dataset_query(user_prompt):
-        dataset_insights = get_dataset_summary()
-        combined_prompt = (
-            f"{user_prompt}\n\nDataset Insights:\n{dataset_insights}\n\n"
-            "Provide a detailed answer that incorporates these dataset insights."
-        )
-    else:
-        combined_prompt = user_prompt
-    model_gen = genai.GenerativeModel("gemini-1.5-pro")
-    ai_response = model_gen.generate_content(combined_prompt)
-    # Run sentiment analysis and topic extraction on the original user prompt.
-    sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
-    topic_label, topic_confidence = extract_topic(user_prompt)
-    return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence

 import os
+import re
 import streamlit as st
 import google.generativeai as genai
 from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
+from db import get_entry_by_index  # Helper to fetch a document by index
 # Configure Gemini API key
 GEMINI_API_KEY = os.getenv("gemini_api")
     "Health", "Science", "Education", "Finance", "Travel", "Food"
 ]
+# Function to analyze sentiment using the pre-trained model
 def analyze_sentiment(text):
     try:
         sentiment_result = sentiment_pipeline(text)[0]
     except Exception as e:
         return f"Error analyzing sentiment: {e}", None
+# Function to extract topic using zero-shot classification
 def extract_topic(text):
     try:
         topic_result = topic_pipeline(text, TOPIC_LABELS)
     except Exception as e:
         return f"Error extracting topic: {e}", None
+# Helper to detect if the user asks for a specific entry.
+# Searches for patterns like "data entry 1" or "entry 2" (case-insensitive).
+def get_entry_index(prompt):
+    match = re.search(r'(?:data entry|entry)\s*(\d+)', prompt.lower())
+    if match:
+        # Convert to 0-indexed value.
+        return int(match.group(1)) - 1
     return None
+# Helper to filter the generative response.
+# We expect the response to contain:
+#   "Let's break down this tweet-like MongoDB entry:" followed by text,
+#   then "Conclusion:" followed by text.
+# We remove any extra parts and remove the header "Conclusion:".
+def filter_ai_response(ai_text):
+    breakdown_marker = "Let's break down this tweet-like MongoDB entry:"
+    conclusion_marker = "Conclusion:"
+    if breakdown_marker in ai_text and conclusion_marker in ai_text:
+        # Split into two parts.
+        parts = ai_text.split(breakdown_marker, 1)[1]
+        breakdown_part, conclusion_part = parts.split(conclusion_marker, 1)
+        # Rebuild output with the breakdown section and the conclusion content (without the header)
+        filtered = breakdown_marker + "\n" + breakdown_part.strip() + "\n" + conclusion_part.strip()
+        return filtered
+    else:
+        # If the markers aren't found, return the original text.
+        return ai_text
+# Main function to generate AI response along with sentiment and category analysis.
+# If the prompt asks for a specific entry, fetch its "text" from MongoDB and build a custom prompt.
 def chatbot_response(user_prompt):
     if not user_prompt:
         return None, None, None, None, None
+    try:
+        entry_index = get_entry_index(user_prompt)
+        if entry_index is not None:
+            entry = get_entry_by_index(entry_index)
+            if entry is None:
+                return "❌ No entry found for the requested index.", None, None, None, None
+            entry_text = entry.get("text", "No text available.")
+            # Build a prompt instructing the Gemini model to provide analysis in a structured format.
             combined_prompt = (
+                f"Provide analysis for the following MongoDB entry:\n\n"
+                f"{entry_text}\n\n"
+                "Please respond in the following format:\n"
+                "Let's break down this tweet-like MongoDB entry:\n[Your detailed analysis here]\n"
+                "Conclusion:\n[Your conclusion here]"
             )
+            # Run sentiment and topic analysis on the entry's text.
             sentiment_label, sentiment_confidence = analyze_sentiment(entry_text)
             topic_label, topic_confidence = extract_topic(entry_text)
         else:
+            # If not an entry query, use the user prompt directly.
+            combined_prompt = user_prompt
+            sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
+            topic_label, topic_confidence = extract_topic(user_prompt)
+        # Generate AI response using Gemini.
+        model_gen = genai.GenerativeModel("gemini-1.5-pro")
+        ai_response = model_gen.generate_content(combined_prompt)
+        # Filter the generative response to show only the required sections.
+        filtered_response = filter_ai_response(ai_response.text)
+        return filtered_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence
+    except Exception as e:
+        return f"❌ Error: {e}", None, None, None, None

db.py CHANGED Viewed

@@ -3,11 +3,13 @@ import requests
 import io
 from pymongo import MongoClient
 def get_mongo_client():
     client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster")
     db = client["sentiment_db"]
     return db["tweets"]
 def insert_data_if_empty():
     collection = get_mongo_client()
     if collection.count_documents({}) == 0:
@@ -22,29 +24,12 @@ def insert_data_if_empty():
         except Exception as e:
             print(f"❌ Error loading dataset: {e}")
-def get_dataset_summary():
     collection = get_mongo_client()
-    pipeline = [
-        {"$group": {"_id": "$target", "count": {"$sum": 1}}}
-    ]
-    results = list(collection.aggregate(pipeline))
-    mapping = {"0": "Negative", "2": "Neutral", "4": "Positive"}
-    summary_parts = []
-    total = 0
-    for doc in results:
-        target = str(doc["_id"])
-        count = doc["count"]
-        total += count
-        label = mapping.get(target, target)
-        summary_parts.append(f"{label}: {count}")
-    summary = f"Total tweets: {total}. " + ", ".join(summary_parts) + "."
-    return summary
-def get_entry_by_index(index):
-    collection = get_mongo_client()
-    # Sort by _id (assumes insertion order), skip to the requested index, and get one document.
-    document = collection.find({}, {"_id": 0}).sort("_id", 1).skip(index).limit(1)
-    docs = list(document)
     if docs:
-        return docs[0].get("text", None)
     return None

 import io
 from pymongo import MongoClient
+# Function to connect to MongoDB.
 def get_mongo_client():
     client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster")
     db = client["sentiment_db"]
     return db["tweets"]
+# Function to insert data if the collection is empty.
 def insert_data_if_empty():
     collection = get_mongo_client()
     if collection.count_documents({}) == 0:
         except Exception as e:
             print(f"❌ Error loading dataset: {e}")
+# Function to fetch a specific entry (by index) from the dataset.
+def get_entry_by_index(index=0):
     collection = get_mongo_client()
+    # Skip 'index' documents and return one document.
+    doc_cursor = collection.find({}, {"_id": 0}).skip(index).limit(1)
+    docs = list(doc_cursor)
     if docs:
+        return docs[0]
     return None