SentimentAnalysis / chatbot.py
KrSharangrav
more changes to all 3 py files
f16063a
raw
history blame
5.73 kB
import os
import streamlit as st
import google.generativeai as genai
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
# --- Monkey Patch for Accelerator ---
try:
import accelerate
from accelerate import Accelerator
import inspect
# If the Accelerator.__init__ does not accept "dispatch_batches", remove it from kwargs.
if 'dispatch_batches' not in inspect.signature(Accelerator.__init__).parameters:
old_init = Accelerator.__init__
def new_init(self, *args, **kwargs):
if 'dispatch_batches' in kwargs:
kwargs.pop('dispatch_batches')
old_init(self, *args, **kwargs)
Accelerator.__init__ = new_init
except Exception as e:
st.error(f"Error patching Accelerator: {e}")
# --- Configure Gemini API ---
GEMINI_API_KEY = os.getenv("gemini_api")
if GEMINI_API_KEY:
genai.configure(api_key=GEMINI_API_KEY)
else:
st.error("⚠️ Google API key is missing! Set it in Hugging Face Secrets.")
# Path to save/load the fine-tuned model
FINE_TUNED_MODEL_DIR = "fine-tuned-sentiment-model"
# --- Fine-tune the Sentiment Model ---
def fine_tune_model():
st.info("Fine-tuning sentiment model. This may take a while...")
# Load the dataset from the local CSV file.
try:
dataset = load_dataset('csv', data_files={'train': 'sentiment140.csv'}, encoding='ISO-8859-1')
except Exception as e:
st.error(f"❌ Error loading dataset: {e}")
return None, None
# Convert sentiment labels: sentiment140 labels are 0 (Negative), 2 (Neutral), 4 (Positive).
def convert_labels(example):
mapping = {0: 0, 2: 1, 4: 2}
example["label"] = mapping[int(example["target"])]
return example
dataset = dataset.map(convert_labels)
base_model_name = "cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
model = AutoModelForSequenceClassification.from_pretrained(base_model_name, num_labels=3)
# Tokenize the dataset; assuming the CSV has a column named "text"
def tokenize_function(examples):
return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)
tokenized_dataset = dataset.map(tokenize_function, batched=True)
training_args = TrainingArguments(
output_dir="./results",
num_train_epochs=1, # For demonstration, we train for 1 epoch.
per_device_train_batch_size=8,
logging_steps=10,
save_steps=50,
evaluation_strategy="no",
learning_rate=2e-5,
weight_decay=0.01,
logging_dir='./logs',
disable_tqdm=False
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset["train"]
)
trainer.train()
model.save_pretrained(FINE_TUNED_MODEL_DIR)
tokenizer.save_pretrained(FINE_TUNED_MODEL_DIR)
st.success("βœ… Fine-tuning complete and model saved.")
return model, tokenizer
# Load or fine-tune the sentiment model
if not os.path.exists(FINE_TUNED_MODEL_DIR):
model, tokenizer = fine_tune_model()
if model is None or tokenizer is None:
st.error("❌ Failed to fine-tune the sentiment analysis model.")
else:
tokenizer = AutoTokenizer.from_pretrained(FINE_TUNED_MODEL_DIR)
model = AutoModelForSequenceClassification.from_pretrained(FINE_TUNED_MODEL_DIR)
# Create sentiment analysis pipeline from the fine-tuned model
try:
sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
except Exception as e:
st.error(f"❌ Error loading sentiment pipeline: {e}")
# Load Topic Extraction Model
try:
topic_pipeline = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
except Exception as e:
st.error(f"❌ Error loading topic extraction model: {e}")
# Predefined topic labels for classification
TOPIC_LABELS = [
"Technology", "Politics", "Business", "Sports", "Entertainment",
"Health", "Science", "Education", "Finance", "Travel", "Food"
]
# Function to analyze sentiment
def analyze_sentiment(text):
try:
sentiment_result = sentiment_pipeline(text)[0]
label = sentiment_result['label']
score = sentiment_result['score']
sentiment_mapping = {
"LABEL_0": "Negative",
"LABEL_1": "Neutral",
"LABEL_2": "Positive"
}
return sentiment_mapping.get(label, "Unknown"), score
except Exception as e:
return f"Error analyzing sentiment: {e}", None
# Function to extract topic
def extract_topic(text):
try:
topic_result = topic_pipeline(text, TOPIC_LABELS)
top_topic = topic_result["labels"][0]
confidence = topic_result["scores"][0]
return top_topic, confidence
except Exception as e:
return f"Error extracting topic: {e}", None
# Function to generate AI response along with sentiment and topic analysis
def chatbot_response(user_prompt):
if not user_prompt:
return None, None, None, None, None
try:
# Generate AI Response using Gemini
model_gen = genai.GenerativeModel("gemini-1.5-pro")
ai_response = model_gen.generate_content(user_prompt)
# Sentiment Analysis
sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
# Topic Extraction
topic_label, topic_confidence = extract_topic(user_prompt)
return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
except Exception as e:
return f"❌ Error: {e}", None, None, None, None