Spaces:

sharangrav24
/

SentimentAnalysis

Running

App Files Files Community

KrSharangrav commited on Mar 15

Commit

f16063a

1 Parent(s): 0df2e14

more changes to all 3 py files

Browse files

Files changed (3) hide show

app.py +6 -22
chatbot.py +28 -21
db.py +4 -8

app.py CHANGED Viewed

@@ -1,29 +1,15 @@
 import streamlit as st
 import pandas as pd
 from db import insert_data_if_empty, get_mongo_client
-from chatbot import chatbot_response  # Updated chatbot functionality using the fine-tuned model
-# 1. Ensure historical data is loaded into MongoDB
 insert_data_if_empty()
-# 2. Connect to MongoDB collection (for potential historical data display)
 collection = get_mongo_client()
-# Optional: Display historical data from the dataset (uncomment if needed)
-# st.title("📊 Historical Data and Chatbot Analysis")
-# st.subheader("Historical Data from MongoDB")
-# data = list(collection.find({}, {"_id": 0}).limit(5))
-# if data:
-#     st.write(pd.DataFrame(data))
-# else:
-#     st.warning("No data found in MongoDB. Please try refreshing.")
-#
-# if st.button("Show Complete Data"):
-#     all_data = list(collection.find({}, {"_id": 0}))
-#     st.write(pd.DataFrame(all_data))
-# 3. Chatbot interface
-st.subheader("💬 Chatbot with Fine-Tuned Sentiment & Topic Analysis")
 user_prompt = st.text_area("Ask me something:")
 if st.button("Get AI Response"):
@@ -31,11 +17,9 @@ if st.button("Get AI Response"):
     if ai_response:
         st.write("### AI Response:")
         st.write(ai_response)
         st.write("### Sentiment Analysis:")
         st.write(f"**Sentiment:** {sentiment_label} ({sentiment_confidence:.2f} confidence)")
-        st.write("### Topic Extraction:")
         st.write(f"**Detected Category:** {topic_label} ({topic_confidence:.2f} confidence)")
     else:
-        st.warning("Please enter some text for analysis.")

 import streamlit as st
 import pandas as pd
 from db import insert_data_if_empty, get_mongo_client
+from chatbot import chatbot_response  # Import the updated chatbot functionality
+# Ensure that historical data is in the database
 insert_data_if_empty()
+# Connect to MongoDB
 collection = get_mongo_client()
+st.subheader("💬 Chatbot with Sentiment & Topic Analysis")
 user_prompt = st.text_area("Ask me something:")
 if st.button("Get AI Response"):
     if ai_response:
         st.write("### AI Response:")
         st.write(ai_response)
         st.write("### Sentiment Analysis:")
         st.write(f"**Sentiment:** {sentiment_label} ({sentiment_confidence:.2f} confidence)")
+        st.write("### Category Extraction:")
         st.write(f"**Detected Category:** {topic_label} ({topic_confidence:.2f} confidence)")
     else:
+        st.warning("⚠️ Please enter a question or text for analysis.")

chatbot.py CHANGED Viewed

@@ -4,22 +4,37 @@ import google.generativeai as genai
 from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
 from datasets import load_dataset
-# 🔑 Fetch API key from Hugging Face Secrets
 GEMINI_API_KEY = os.getenv("gemini_api")
 if GEMINI_API_KEY:
     genai.configure(api_key=GEMINI_API_KEY)
 else:
     st.error("⚠️ Google API key is missing! Set it in Hugging Face Secrets.")
-# Define path for the fine-tuned model
 FINE_TUNED_MODEL_DIR = "fine-tuned-sentiment-model"
-# Function to fine-tune sentiment analysis model using sentiment140.csv
 def fine_tune_model():
     st.info("Fine-tuning sentiment model. This may take a while...")
     # Load the dataset from the local CSV file.
-    # Ensure that 'sentiment140.csv' is in your working directory.
     try:
         dataset = load_dataset('csv', data_files={'train': 'sentiment140.csv'}, encoding='ISO-8859-1')
     except Exception as e:
@@ -27,7 +42,6 @@ def fine_tune_model():
         return None, None
     # Convert sentiment labels: sentiment140 labels are 0 (Negative), 2 (Neutral), 4 (Positive).
-    # We map them to 0,1,2 respectively.
     def convert_labels(example):
         mapping = {0: 0, 2: 1, 4: 2}
         example["label"] = mapping[int(example["target"])]
@@ -35,10 +49,7 @@ def fine_tune_model():
     dataset = dataset.map(convert_labels)
-    # Base model name
     base_model_name = "cardiffnlp/twitter-roberta-base-sentiment"
-    # Initialize tokenizer and model
     tokenizer = AutoTokenizer.from_pretrained(base_model_name)
     model = AutoModelForSequenceClassification.from_pretrained(base_model_name, num_labels=3)
@@ -47,10 +58,9 @@ def fine_tune_model():
         return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)
     tokenized_dataset = dataset.map(tokenize_function, batched=True)
-    # Set training arguments (for demo purposes, we use 1 epoch; adjust as needed)
     training_args = TrainingArguments(
         output_dir="./results",
-        num_train_epochs=1,
         per_device_train_batch_size=8,
         logging_steps=10,
         save_steps=50,
@@ -69,13 +79,12 @@ def fine_tune_model():
     trainer.train()
-    # Save the fine-tuned model and tokenizer
     model.save_pretrained(FINE_TUNED_MODEL_DIR)
     tokenizer.save_pretrained(FINE_TUNED_MODEL_DIR)
     st.success("✅ Fine-tuning complete and model saved.")
     return model, tokenizer
-# Load (or fine-tune) the sentiment analysis model and tokenizer
 if not os.path.exists(FINE_TUNED_MODEL_DIR):
     model, tokenizer = fine_tune_model()
     if model is None or tokenizer is None:
@@ -84,7 +93,7 @@ else:
     tokenizer = AutoTokenizer.from_pretrained(FINE_TUNED_MODEL_DIR)
     model = AutoModelForSequenceClassification.from_pretrained(FINE_TUNED_MODEL_DIR)
-# Initialize sentiment analysis pipeline using the fine-tuned model
 try:
     sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
 except Exception as e:
@@ -106,10 +115,8 @@ TOPIC_LABELS = [
 def analyze_sentiment(text):
     try:
         sentiment_result = sentiment_pipeline(text)[0]
-        label = sentiment_result['label']  # e.g., "LABEL_0", "LABEL_1", "LABEL_2"
-        score = sentiment_result['score']  # Confidence score
-        # Map model labels to human-readable format
         sentiment_mapping = {
             "LABEL_0": "Negative",
             "LABEL_1": "Neutral",
@@ -123,13 +130,13 @@ def analyze_sentiment(text):
 def extract_topic(text):
     try:
         topic_result = topic_pipeline(text, TOPIC_LABELS)
-        top_topic = topic_result["labels"][0]  # Highest confidence topic
         confidence = topic_result["scores"][0]
         return top_topic, confidence
     except Exception as e:
         return f"Error extracting topic: {e}", None
-# Function to generate AI response, sentiment, and topic
 def chatbot_response(user_prompt):
     if not user_prompt:
         return None, None, None, None, None
@@ -139,10 +146,10 @@ def chatbot_response(user_prompt):
         model_gen = genai.GenerativeModel("gemini-1.5-pro")
         ai_response = model_gen.generate_content(user_prompt)
-        # Run Sentiment Analysis
         sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
-        # Run Topic Extraction
         topic_label, topic_confidence = extract_topic(user_prompt)
         return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence

 from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
 from datasets import load_dataset
+# --- Monkey Patch for Accelerator ---
+try:
+    import accelerate
+    from accelerate import Accelerator
+    import inspect
+    # If the Accelerator.__init__ does not accept "dispatch_batches", remove it from kwargs.
+    if 'dispatch_batches' not in inspect.signature(Accelerator.__init__).parameters:
+        old_init = Accelerator.__init__
+        def new_init(self, *args, **kwargs):
+            if 'dispatch_batches' in kwargs:
+                kwargs.pop('dispatch_batches')
+            old_init(self, *args, **kwargs)
+        Accelerator.__init__ = new_init
+except Exception as e:
+    st.error(f"Error patching Accelerator: {e}")
+# --- Configure Gemini API ---
 GEMINI_API_KEY = os.getenv("gemini_api")
 if GEMINI_API_KEY:
     genai.configure(api_key=GEMINI_API_KEY)
 else:
     st.error("⚠️ Google API key is missing! Set it in Hugging Face Secrets.")
+# Path to save/load the fine-tuned model
 FINE_TUNED_MODEL_DIR = "fine-tuned-sentiment-model"
+# --- Fine-tune the Sentiment Model ---
 def fine_tune_model():
     st.info("Fine-tuning sentiment model. This may take a while...")
     # Load the dataset from the local CSV file.
     try:
         dataset = load_dataset('csv', data_files={'train': 'sentiment140.csv'}, encoding='ISO-8859-1')
     except Exception as e:
         return None, None
     # Convert sentiment labels: sentiment140 labels are 0 (Negative), 2 (Neutral), 4 (Positive).
     def convert_labels(example):
         mapping = {0: 0, 2: 1, 4: 2}
         example["label"] = mapping[int(example["target"])]
     dataset = dataset.map(convert_labels)
     base_model_name = "cardiffnlp/twitter-roberta-base-sentiment"
     tokenizer = AutoTokenizer.from_pretrained(base_model_name)
     model = AutoModelForSequenceClassification.from_pretrained(base_model_name, num_labels=3)
         return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)
     tokenized_dataset = dataset.map(tokenize_function, batched=True)
     training_args = TrainingArguments(
         output_dir="./results",
+        num_train_epochs=1,                  # For demonstration, we train for 1 epoch.
         per_device_train_batch_size=8,
         logging_steps=10,
         save_steps=50,
     trainer.train()
     model.save_pretrained(FINE_TUNED_MODEL_DIR)
     tokenizer.save_pretrained(FINE_TUNED_MODEL_DIR)
     st.success("✅ Fine-tuning complete and model saved.")
     return model, tokenizer
+# Load or fine-tune the sentiment model
 if not os.path.exists(FINE_TUNED_MODEL_DIR):
     model, tokenizer = fine_tune_model()
     if model is None or tokenizer is None:
     tokenizer = AutoTokenizer.from_pretrained(FINE_TUNED_MODEL_DIR)
     model = AutoModelForSequenceClassification.from_pretrained(FINE_TUNED_MODEL_DIR)
+# Create sentiment analysis pipeline from the fine-tuned model
 try:
     sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
 except Exception as e:
 def analyze_sentiment(text):
     try:
         sentiment_result = sentiment_pipeline(text)[0]
+        label = sentiment_result['label']
+        score = sentiment_result['score']
         sentiment_mapping = {
             "LABEL_0": "Negative",
             "LABEL_1": "Neutral",
 def extract_topic(text):
     try:
         topic_result = topic_pipeline(text, TOPIC_LABELS)
+        top_topic = topic_result["labels"][0]
         confidence = topic_result["scores"][0]
         return top_topic, confidence
     except Exception as e:
         return f"Error extracting topic: {e}", None
+# Function to generate AI response along with sentiment and topic analysis
 def chatbot_response(user_prompt):
     if not user_prompt:
         return None, None, None, None, None
         model_gen = genai.GenerativeModel("gemini-1.5-pro")
         ai_response = model_gen.generate_content(user_prompt)
+        # Sentiment Analysis
         sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
+        # Topic Extraction
         topic_label, topic_confidence = extract_topic(user_prompt)
         return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence

db.py CHANGED Viewed

@@ -3,26 +3,22 @@ import requests
 import io
 from pymongo import MongoClient
-# Function to establish MongoDB connection and return the collection
 def get_mongo_client():
     client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster")
     db = client["sentiment_db"]
     return db["tweets"]
-# Function to insert the dataset into MongoDB if the collection is empty
 def insert_data_if_empty():
     collection = get_mongo_client()
     if collection.count_documents({}) == 0:
-        print("🟢 No data found in MongoDB. Inserting dataset...")
         csv_url = "https://huggingface.co/spaces/sharangrav24/SentimentAnalysis/resolve/main/sentiment140.csv"
         try:
             response = requests.get(csv_url)
-            response.raise_for_status()  # Ensure the request was successful
             df = pd.read_csv(io.StringIO(response.text), encoding="ISO-8859-1")
-            # Insert dataset records into MongoDB
             collection.insert_many(df.to_dict("records"))
             print("✅ Data Inserted into MongoDB!")
         except Exception as e:

 import io
 from pymongo import MongoClient
+# Function to connect to MongoDB
 def get_mongo_client():
     client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster")
     db = client["sentiment_db"]
     return db["tweets"]
+# Function to insert data if the collection is empty
 def insert_data_if_empty():
     collection = get_mongo_client()
     if collection.count_documents({}) == 0:
+        print("🟢 No data found. Inserting dataset...")
         csv_url = "https://huggingface.co/spaces/sharangrav24/SentimentAnalysis/resolve/main/sentiment140.csv"
         try:
             response = requests.get(csv_url)
+            response.raise_for_status()
             df = pd.read_csv(io.StringIO(response.text), encoding="ISO-8859-1")
             collection.insert_many(df.to_dict("records"))
             print("✅ Data Inserted into MongoDB!")
         except Exception as e: