Spaces:
Sleeping
Sleeping
File size: 5,560 Bytes
7268351 979706a 7268351 be89ae1 979706a b83a640 be89ae1 7268351 be89ae1 7268351 be89ae1 f5b718b 7268351 be89ae1 7268351 5a94c8e f763dd0 979706a 7268351 f16063a 7268351 979706a 5a94c8e f763dd0 5a94c8e f16063a f5b718b 5a94c8e f763dd0 5a94c8e f763dd0 979706a 6e2dc41 979706a 7268351 5a94c8e 7268351 979706a e332fa0 979706a e332fa0 979706a 6e2dc41 e332fa0 979706a e332fa0 979706a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import os
import re
import streamlit as st
import google.generativeai as genai
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from db import get_entry_by_index # Helper to fetch a document by index
# Configure Gemini API key
GEMINI_API_KEY = os.getenv("gemini_api")
if GEMINI_API_KEY:
genai.configure(api_key=GEMINI_API_KEY)
else:
st.error("⚠️ Google API key is missing! Set it in Hugging Face Secrets.")
# Load pre-trained sentiment analysis model
MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment"
try:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
except Exception as e:
st.error(f"❌ Error loading sentiment model: {e}")
# Load Topic Extraction Model
try:
topic_pipeline = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
except Exception as e:
st.error(f"❌ Error loading topic extraction model: {e}")
# Predefined topic labels for classification
TOPIC_LABELS = [
"Technology", "Politics", "Business", "Sports", "Entertainment",
"Health", "Science", "Education", "Finance", "Travel", "Food"
]
# Function to analyze sentiment using the pre-trained model
def analyze_sentiment(text):
try:
sentiment_result = sentiment_pipeline(text)[0]
label = sentiment_result['label']
score = sentiment_result['score']
sentiment_mapping = {
"LABEL_0": "Negative",
"LABEL_1": "Neutral",
"LABEL_2": "Positive"
}
return sentiment_mapping.get(label, "Unknown"), score
except Exception as e:
return f"Error analyzing sentiment: {e}", None
# Function to extract topic using zero-shot classification
def extract_topic(text):
try:
topic_result = topic_pipeline(text, TOPIC_LABELS)
top_topic = topic_result["labels"][0]
confidence = topic_result["scores"][0]
return top_topic, confidence
except Exception as e:
return f"Error extracting topic: {e}", None
# Helper to detect if the user asks for a specific entry.
# Searches for patterns like "data entry 1" or "entry 2" (case-insensitive).
def get_entry_index(prompt):
match = re.search(r'(?:data entry|entry)\s*(\d+)', prompt.lower())
if match:
# Convert to 0-indexed value.
return int(match.group(1)) - 1
return None
# Helper to filter the generative response.
# We expect the response to contain:
# "Let's break down this tweet-like MongoDB entry:" followed by text,
# then "Conclusion:" followed by text.
# We remove any extra parts and remove the header "Conclusion:".
def filter_ai_response(ai_text):
breakdown_marker = "Let's break down this tweet-like MongoDB entry:"
conclusion_marker = "Conclusion:"
if breakdown_marker in ai_text and conclusion_marker in ai_text:
# Split into two parts.
parts = ai_text.split(breakdown_marker, 1)[1]
breakdown_part, conclusion_part = parts.split(conclusion_marker, 1)
# Rebuild output with the breakdown section and the conclusion content (without the header)
filtered = breakdown_marker + "\n" + breakdown_part.strip() + "\n" + conclusion_part.strip()
return filtered
else:
# If the markers aren't found, return the original text.
return ai_text
# Main function to generate AI response along with sentiment and category analysis.
# If the prompt asks for a specific entry, fetch its "text" from MongoDB and build a custom prompt.
def chatbot_response(user_prompt):
if not user_prompt:
return None, None, None, None, None
try:
entry_index = get_entry_index(user_prompt)
if entry_index is not None:
entry = get_entry_by_index(entry_index)
if entry is None:
return "❌ No entry found for the requested index.", None, None, None, None
entry_text = entry.get("text", "No text available.")
# Build a prompt instructing the Gemini model to provide analysis in a structured format.
combined_prompt = (
f"Provide analysis for the following MongoDB entry:\n\n"
f"{entry_text}\n\n"
"Please respond in the following format:\n"
"Let's break down this tweet-like MongoDB entry:\n[Your detailed analysis here]\n"
"Conclusion:\n[Your conclusion here]"
)
# Run sentiment and topic analysis on the entry's text.
sentiment_label, sentiment_confidence = analyze_sentiment(entry_text)
topic_label, topic_confidence = extract_topic(entry_text)
else:
# If not an entry query, use the user prompt directly.
combined_prompt = user_prompt
sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
topic_label, topic_confidence = extract_topic(user_prompt)
# Generate AI response using Gemini.
model_gen = genai.GenerativeModel("gemini-1.5-pro")
ai_response = model_gen.generate_content(combined_prompt)
# Filter the generative response to show only the required sections.
filtered_response = filter_ai_response(ai_response.text)
return filtered_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence
except Exception as e:
return f"❌ Error: {e}", None, None, None, None
|