Spaces:
Sleeping
Sleeping
File size: 4,600 Bytes
7268351 be89ae1 6e2dc41 b83a640 be89ae1 7268351 be89ae1 7268351 be89ae1 f5b718b 7268351 be89ae1 7268351 5a94c8e f763dd0 7268351 f16063a 7268351 5a94c8e f763dd0 5a94c8e f16063a f5b718b 5a94c8e f763dd0 5a94c8e f763dd0 e332fa0 867c886 6e2dc41 7268351 5a94c8e 7268351 6e2dc41 e332fa0 6e2dc41 e332fa0 6e2dc41 e332fa0 6e2dc41 e332fa0 6e2dc41 e332fa0 6e2dc41 e332fa0 6e2dc41 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import os
import streamlit as st
import google.generativeai as genai
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from db import get_dataset_summary, get_entry_by_index
# Configure Gemini API key
GEMINI_API_KEY = os.getenv("gemini_api")
if GEMINI_API_KEY:
genai.configure(api_key=GEMINI_API_KEY)
else:
st.error("⚠️ Google API key is missing! Set it in Hugging Face Secrets.")
# Load pre-trained sentiment analysis model
MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment"
try:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
except Exception as e:
st.error(f"❌ Error loading sentiment model: {e}")
# Load Topic Extraction Model
try:
topic_pipeline = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
except Exception as e:
st.error(f"❌ Error loading topic extraction model: {e}")
# Predefined topic labels for classification
TOPIC_LABELS = [
"Technology", "Politics", "Business", "Sports", "Entertainment",
"Health", "Science", "Education", "Finance", "Travel", "Food"
]
def analyze_sentiment(text):
try:
sentiment_result = sentiment_pipeline(text)[0]
label = sentiment_result['label']
score = sentiment_result['score']
sentiment_mapping = {
"LABEL_0": "Negative",
"LABEL_1": "Neutral",
"LABEL_2": "Positive"
}
return sentiment_mapping.get(label, "Unknown"), score
except Exception as e:
return f"Error analyzing sentiment: {e}", None
def extract_topic(text):
try:
topic_result = topic_pipeline(text, TOPIC_LABELS)
top_topic = topic_result["labels"][0]
confidence = topic_result["scores"][0]
return top_topic, confidence
except Exception as e:
return f"Error extracting topic: {e}", None
def is_dataset_query(prompt):
keywords = ["dataset", "data", "csv", "mongodb", "historical"]
return any(keyword in prompt.lower() for keyword in keywords)
def extract_entry_index(prompt):
# Map ordinal words to indices (0-indexed)
ordinals = {
"first": 0,
"1st": 0,
"second": 1,
"2nd": 1,
"third": 2,
"3rd": 2,
"fourth": 3,
"4th": 3,
"fifth": 4,
"5th": 4,
}
for word, index in ordinals.items():
if word in prompt.lower():
return index
return None
def chatbot_response(user_prompt):
if not user_prompt:
return None, None, None, None, None
# Check if the query is about a specific dataset entry.
entry_index = extract_entry_index(user_prompt)
if entry_index is not None:
entry_text = get_entry_by_index(entry_index)
if entry_text:
# Create a combined prompt for Gemini to generate detailed insights.
combined_prompt = (
f"Analyze the following dataset entry from MongoDB:\n\n{entry_text}\n\n"
"Provide detailed insights, including sentiment analysis and category extraction."
)
model_gen = genai.GenerativeModel("gemini-1.5-pro")
ai_response = model_gen.generate_content(combined_prompt)
# Analyze the entry text.
sentiment_label, sentiment_confidence = analyze_sentiment(entry_text)
topic_label, topic_confidence = extract_topic(entry_text)
return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
else:
return f"❌ No entry found for index {entry_index+1}.", None, None, None, None
# Otherwise, if the query is about the dataset in general.
if is_dataset_query(user_prompt):
dataset_insights = get_dataset_summary()
combined_prompt = (
f"{user_prompt}\n\nDataset Insights:\n{dataset_insights}\n\n"
"Provide a detailed answer that incorporates these dataset insights."
)
else:
combined_prompt = user_prompt
model_gen = genai.GenerativeModel("gemini-1.5-pro")
ai_response = model_gen.generate_content(combined_prompt)
# Run sentiment analysis and topic extraction on the original user prompt.
sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
topic_label, topic_confidence = extract_topic(user_prompt)
return ai_response.text, sentiment_label, sentiment_confidence, topic_label, topic_confidence
|