Spaces:
Sleeping
Sleeping
File size: 3,635 Bytes
7268351 be89ae1 e332fa0 b83a640 be89ae1 7268351 be89ae1 7268351 be89ae1 f5b718b 7268351 be89ae1 7268351 5a94c8e f763dd0 be89ae1 7268351 f16063a 7268351 be89ae1 5a94c8e f763dd0 5a94c8e f16063a f5b718b 5a94c8e f763dd0 5a94c8e f763dd0 e332fa0 867c886 7268351 5a94c8e 7268351 e332fa0 5a94c8e e332fa0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import os
import streamlit as st
import google.generativeai as genai
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from db import get_dataset_summary # Import the dataset summary function
# Configure Gemini API key
GEMINI_API_KEY = os.getenv("gemini_api")
if GEMINI_API_KEY:
genai.configure(api_key=GEMINI_API_KEY)
else:
st.error("⚠️ Google API key is missing! Set it in Hugging Face Secrets.")
# Load pre-trained sentiment analysis model
MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment"
try:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
except Exception as e:
st.error(f"❌ Error loading sentiment model: {e}")
# Load Topic Extraction Model
try:
topic_pipeline = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
except Exception as e:
st.error(f"❌ Error loading topic extraction model: {e}")
# Predefined topic labels for classification
TOPIC_LABELS = [
"Technology", "Politics", "Business", "Sports", "Entertainment",
"Health", "Science", "Education", "Finance", "Travel", "Food"
]
# Function to analyze sentiment using the pre-trained model
def analyze_sentiment(text):
try:
sentiment_result = sentiment_pipeline(text)[0]
label = sentiment_result['label']
score = sentiment_result['score']
sentiment_mapping = {
"LABEL_0": "Negative",
"LABEL_1": "Neutral",
"LABEL_2": "Positive"
}
return sentiment_mapping.get(label, "Unknown"), score
except Exception as e:
return f"Error analyzing sentiment: {e}", None
# Function to extract topic using zero-shot classification
def extract_topic(text):
try:
topic_result = topic_pipeline(text, TOPIC_LABELS)
top_topic = topic_result["labels"][0]
confidence = topic_result["scores"][0]
return top_topic, confidence
except Exception as e:
return f"Error extracting topic: {e}", None
# Helper to check if the user query is about the dataset
def is_dataset_query(prompt):
keywords = ["dataset", "data", "csv", "mongodb", "historical"]
return any(keyword in prompt.lower() for keyword in keywords)
# Function to generate AI response along with sentiment and topic analysis
def chatbot_response(user_prompt):
if not user_prompt:
return None, None, None, None, None
try:
# If the query seems related to the dataset, fetch summary insights.
if is_dataset_query(user_prompt):
dataset_insights = get_dataset_summary()
combined_prompt = (
f"{user_prompt}\n\nDataset Insights:\n{dataset_insights}\n\n"
"Provide a detailed answer that incorporates these dataset insights."
)
else:
combined_prompt = user_prompt
# Generate AI response using Gemini with the (possibly augmented) prompt.
model_gen = genai.GenerativeModel("gemini-1.5-pro")
ai_response = model_gen.generate_content(combined_prompt)
# Perform sentiment analysis and topic extraction on the original user prompt.
sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
topic_label, topic_confidence = extract_topic(user_prompt)
return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
except Exception as e:
return f"❌ Error: {e}", None, None, None, None
|