Spaces:

sharangrav24
/

SentimentAnalysis

Sleeping

SentimentAnalysis / chatbot.py

KrSharangrav

more changes to all 3 py files

f16063a 4 months ago

5.73 kB

	import os
	import streamlit as st
	import google.generativeai as genai
	from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
	from datasets import load_dataset

	# --- Monkey Patch for Accelerator ---
	try:
	import accelerate
	from accelerate import Accelerator
	import inspect
	# If the Accelerator.__init__ does not accept "dispatch_batches", remove it from kwargs.
	if 'dispatch_batches' not in inspect.signature(Accelerator.__init__).parameters:
	old_init = Accelerator.__init__
	def new_init(self, args, *kwargs):
	if 'dispatch_batches' in kwargs:
	kwargs.pop('dispatch_batches')
	old_init(self, args, *kwargs)
	Accelerator.__init__ = new_init
	except Exception as e:
	st.error(f"Error patching Accelerator: {e}")

	# --- Configure Gemini API ---
	GEMINI_API_KEY = os.getenv("gemini_api")
	if GEMINI_API_KEY:
	genai.configure(api_key=GEMINI_API_KEY)
	else:
	st.error("⚠️ Google API key is missing! Set it in Hugging Face Secrets.")

	# Path to save/load the fine-tuned model
	FINE_TUNED_MODEL_DIR = "fine-tuned-sentiment-model"

	# --- Fine-tune the Sentiment Model ---
	def fine_tune_model():
	st.info("Fine-tuning sentiment model. This may take a while...")

	# Load the dataset from the local CSV file.
	try:
	dataset = load_dataset('csv', data_files={'train': 'sentiment140.csv'}, encoding='ISO-8859-1')
	except Exception as e:
	st.error(f"❌ Error loading dataset: {e}")
	return None, None

	# Convert sentiment labels: sentiment140 labels are 0 (Negative), 2 (Neutral), 4 (Positive).
	def convert_labels(example):
	mapping = {0: 0, 2: 1, 4: 2}
	example["label"] = mapping[int(example["target"])]
	return example

	dataset = dataset.map(convert_labels)

	base_model_name = "cardiffnlp/twitter-roberta-base-sentiment"
	tokenizer = AutoTokenizer.from_pretrained(base_model_name)
	model = AutoModelForSequenceClassification.from_pretrained(base_model_name, num_labels=3)

	# Tokenize the dataset; assuming the CSV has a column named "text"
	def tokenize_function(examples):
	return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)
	tokenized_dataset = dataset.map(tokenize_function, batched=True)

	training_args = TrainingArguments(
	output_dir="./results",
	num_train_epochs=1, # For demonstration, we train for 1 epoch.
	per_device_train_batch_size=8,
	logging_steps=10,
	save_steps=50,
	evaluation_strategy="no",
	learning_rate=2e-5,
	weight_decay=0.01,
	logging_dir='./logs',
	disable_tqdm=False
	)

	trainer = Trainer(
	model=model,
	args=training_args,
	train_dataset=tokenized_dataset["train"]
	)

	trainer.train()

	model.save_pretrained(FINE_TUNED_MODEL_DIR)
	tokenizer.save_pretrained(FINE_TUNED_MODEL_DIR)
	st.success("✅ Fine-tuning complete and model saved.")
	return model, tokenizer

	# Load or fine-tune the sentiment model
	if not os.path.exists(FINE_TUNED_MODEL_DIR):
	model, tokenizer = fine_tune_model()
	if model is None or tokenizer is None:
	st.error("❌ Failed to fine-tune the sentiment analysis model.")
	else:
	tokenizer = AutoTokenizer.from_pretrained(FINE_TUNED_MODEL_DIR)
	model = AutoModelForSequenceClassification.from_pretrained(FINE_TUNED_MODEL_DIR)

	# Create sentiment analysis pipeline from the fine-tuned model
	try:
	sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
	except Exception as e:
	st.error(f"❌ Error loading sentiment pipeline: {e}")

	# Load Topic Extraction Model
	try:
	topic_pipeline = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
	except Exception as e:
	st.error(f"❌ Error loading topic extraction model: {e}")

	# Predefined topic labels for classification
	TOPIC_LABELS = [
	"Technology", "Politics", "Business", "Sports", "Entertainment",
	"Health", "Science", "Education", "Finance", "Travel", "Food"
	]

	# Function to analyze sentiment
	def analyze_sentiment(text):
	try:
	sentiment_result = sentiment_pipeline(text)[0]
	label = sentiment_result['label']
	score = sentiment_result['score']
	sentiment_mapping = {
	"LABEL_0": "Negative",
	"LABEL_1": "Neutral",
	"LABEL_2": "Positive"
	}
	return sentiment_mapping.get(label, "Unknown"), score
	except Exception as e:
	return f"Error analyzing sentiment: {e}", None

	# Function to extract topic
	def extract_topic(text):
	try:
	topic_result = topic_pipeline(text, TOPIC_LABELS)
	top_topic = topic_result["labels"][0]
	confidence = topic_result["scores"][0]
	return top_topic, confidence
	except Exception as e:
	return f"Error extracting topic: {e}", None

	# Function to generate AI response along with sentiment and topic analysis
	def chatbot_response(user_prompt):
	if not user_prompt:
	return None, None, None, None, None

	try:
	# Generate AI Response using Gemini
	model_gen = genai.GenerativeModel("gemini-1.5-pro")
	ai_response = model_gen.generate_content(user_prompt)

	# Sentiment Analysis
	sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)

	# Topic Extraction
	topic_label, topic_confidence = extract_topic(user_prompt)

	return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
	except Exception as e:
	return f"❌ Error: {e}", None, None, None, None