Spaces:
Sleeping
Sleeping
File size: 13,787 Bytes
01a3727 6e2368e 01a3727 b1b6f63 01a3727 b1b6f63 e667020 b1b6f63 e667020 b1b6f63 e667020 a6565d8 e667020 a6565d8 e667020 b1b6f63 01a3727 6e2368e 01a3727 6e2368e 01a3727 6e2368e b1b6f63 e667020 b1b6f63 e667020 b1b6f63 01a3727 6e2368e 01a3727 6e2368e 01a3727 6e2368e b1b6f63 e360e01 b1b6f63 6f89f62 b1b6f63 6f89f62 b1b6f63 6f89f62 b1b6f63 6f89f62 b1b6f63 6f89f62 b1b6f63 6f89f62 b1b6f63 6e2368e 01a3727 6e2368e 01a3727 6e2368e 01a3727 6e2368e 01a3727 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 |
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import json
import os
import requests
import re
# Function to extract text from HTML (from shopping_assistant.py)
def extract_text_from_html(html):
"""
Extract text from HTML without using BeautifulSoup
"""
# Remove HTML tags
text = re.sub(r'<[^>]+>', ' ', html)
# Remove extra whitespace
text = re.sub(r'\s+', ' ', text)
# Decode HTML entities
text = text.replace(' ', ' ').replace('&', '&').replace('<', '<').replace('>', '>')
return text.strip()
# Function to fetch deals from DealsFinders.com (from shopping_assistant.py)
def fetch_deals_data(url="https://www.dealsfinders.com/wp-json/wp/v2/posts", num_pages=2, per_page=100):
"""
Fetch deals data exclusively from the DealsFinders API
"""
all_deals = []
# Fetch from the DealsFinders API
for page in range(1, num_pages + 1):
try:
# Add a user agent to avoid being blocked
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'
}
response = requests.get(f"{url}?page={page}&per_page={per_page}", headers=headers)
if response.status_code == 200:
deals = response.json()
all_deals.extend(deals)
print(f"Fetched page {page} with {len(deals)} deals from DealsFinders API")
# If we get fewer deals than requested, we've reached the end
if len(deals) < per_page:
print(f"Reached the end of available deals at page {page}")
break
else:
print(f"Failed to fetch page {page} from DealsFinders API: {response.status_code}")
break
except Exception as e:
print(f"Error fetching page {page} from DealsFinders API: {str(e)}")
break
return all_deals
# Function to process deals data (from shopping_assistant.py)
def process_deals_data(deals_data):
"""
Process the deals data into a structured format
"""
processed_deals = []
for deal in deals_data:
try:
# Extract relevant information using our HTML text extractor
content_html = deal.get('content', {}).get('rendered', '')
excerpt_html = deal.get('excerpt', {}).get('rendered', '')
clean_content = extract_text_from_html(content_html)
clean_excerpt = extract_text_from_html(excerpt_html)
processed_deal = {
'id': deal.get('id'),
'title': deal.get('title', {}).get('rendered', ''),
'link': deal.get('link', ''),
'date': deal.get('date', ''),
'content': clean_content,
'excerpt': clean_excerpt
}
processed_deals.append(processed_deal)
except Exception as e:
print(f"Error processing deal: {str(e)}")
return processed_deals
# Define product categories
category_descriptions = {
"electronics": "Electronic devices like headphones, speakers, TVs, smartphones, and gadgets",
"computers": "Laptops, desktops, computer parts, monitors, and computing accessories",
"mobile": "Mobile phones, smartphones, phone cases, screen protectors, and chargers",
"audio": "Headphones, earbuds, speakers, microphones, and audio equipment",
"clothing": "Clothes, shirts, pants, dresses, and fashion items",
"footwear": "Shoes, boots, sandals, slippers, and all types of footwear",
"home": "Home decor, furniture, bedding, and household items",
"kitchen": "Kitchen appliances, cookware, utensils, and kitchen gadgets",
"toys": "Toys, games, and children's entertainment items",
"sports": "Sports equipment, fitness gear, and outdoor recreation items",
"beauty": "Beauty products, makeup, skincare, and personal care items",
"books": "Books, e-books, audiobooks, and reading materials"
}
# List of categories
categories = list(category_descriptions.keys())
# Try to load the recommended models
try:
# 1. Load BART model for zero-shot classification
from transformers import pipeline
# Initialize the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
print("Using facebook/bart-large-mnli for classification")
# 2. Load MPNet model for semantic search
from sentence_transformers import SentenceTransformer, util
# Load the sentence transformer model
sentence_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
print("Using sentence-transformers/all-mpnet-base-v2 for semantic search")
# Pre-compute embeddings for category descriptions
category_texts = list(category_descriptions.values())
category_embeddings = sentence_model.encode(category_texts, convert_to_tensor=True)
# Using recommended models
using_recommended_models = True
except Exception as e:
# Fall back to local model if recommended models fail to load
print(f"Error loading recommended models: {str(e)}")
print("Falling back to local model")
model_path = os.path.dirname(os.path.abspath(__file__))
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)
# Load the local categories
try:
with open(os.path.join(model_path, "categories.json"), "r") as f:
categories = json.load(f)
except Exception as e:
print(f"Error loading categories: {str(e)}")
categories = ["electronics", "clothing", "home", "kitchen", "toys", "other"]
# Not using recommended models
using_recommended_models = False
# Global variable to store deals data
deals_cache = None
def classify_text(text, fetch_deals=True):
"""
Classify the text using the model and fetch relevant deals
"""
global deals_cache
# Get the top categories based on the model type
if using_recommended_models:
# Using BART for zero-shot classification
result = classifier(text, categories, multi_label=True)
# Extract categories and scores
top_categories = []
for i, (category, score) in enumerate(zip(result['labels'], result['scores'])):
if score > 0.1: # Lower threshold for zero-shot classification
top_categories.append((category, score))
# Limit to top 3 categories
if i >= 2:
break
else:
# Using the original classification model
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
# Get the model prediction
with torch.no_grad():
outputs = model(**inputs)
predictions = torch.sigmoid(outputs.logits)
# Get the top categories
top_categories = []
for i, score in enumerate(predictions[0]):
if score > 0.5: # Threshold for multi-label classification
top_categories.append((categories[i], score.item()))
# Sort by score
top_categories.sort(key=lambda x: x[1], reverse=True)
# Format the classification results
if top_categories:
result = f"Top categories for '{text}':\n\n"
for category, score in top_categories:
result += f"- {category}: {score:.4f}\n"
result += f"\nBased on your query, I would recommend looking for deals in the **{top_categories[0][0]}** category.\n\n"
else:
result = f"No categories found for '{text}'. Please try a different query.\n\n"
# Fetch and display deals if requested
if fetch_deals:
result += "## Relevant Deals from DealsFinders.com\n\n"
try:
# Fetch deals data if not already cached
if deals_cache is None:
deals_data = fetch_deals_data(num_pages=2) # Limit to 2 pages for faster response
deals_cache = process_deals_data(deals_data)
# Using MPNet for semantic search if available
if using_recommended_models:
# Create deal texts for semantic search
deal_texts = []
for deal in deals_cache:
# Combine title and excerpt for better matching
deal_text = f"{deal['title']} {deal['excerpt']}"
deal_texts.append(deal_text)
# Encode the query and deals
query_embedding = sentence_model.encode(text, convert_to_tensor=True)
deal_embeddings = sentence_model.encode(deal_texts, convert_to_tensor=True)
# Calculate semantic similarity
similarities = util.cos_sim(query_embedding, deal_embeddings)[0]
# Get top 5 most similar deals
top_indices = torch.topk(similarities, k=min(5, len(deals_cache))).indices
# Extract the relevant deals
relevant_deals = [deals_cache[idx] for idx in top_indices]
else:
# Fallback to keyword-based search
query_terms = text.lower().split()
expanded_terms = list(query_terms)
# Add related terms based on the query
if any(term in text.lower() for term in ['headphone', 'headphones']):
expanded_terms.extend(['earbuds', 'earphones', 'earpods', 'airpods', 'audio', 'bluetooth', 'wireless'])
elif any(term in text.lower() for term in ['laptop', 'computer']):
expanded_terms.extend(['notebook', 'macbook', 'chromebook', 'pc'])
elif any(term in text.lower() for term in ['tv', 'television']):
expanded_terms.extend(['smart tv', 'roku', 'streaming'])
elif any(term in text.lower() for term in ['kitchen', 'appliance']):
expanded_terms.extend(['mixer', 'blender', 'toaster', 'microwave', 'oven'])
# Score deals based on relevance to the query
scored_deals = []
for deal in deals_cache:
title = deal['title'].lower()
content = deal['content'].lower()
excerpt = deal['excerpt'].lower()
score = 0
# Check original query terms (higher weight)
for term in query_terms:
if term in title:
score += 10
if term in content:
score += 3
if term in excerpt:
score += 3
# Check expanded terms (lower weight)
for term in expanded_terms:
if term not in query_terms: # Skip original terms
if term in title:
score += 5
if term in content:
score += 1
if term in excerpt:
score += 1
# Add to scored deals if it has any relevance
if score > 0:
scored_deals.append((deal, score))
# Sort by score (descending)
scored_deals.sort(key=lambda x: x[1], reverse=True)
# Extract the deals from the scored list
relevant_deals = [deal for deal, _ in scored_deals[:5]]
if relevant_deals:
for i, deal in enumerate(relevant_deals, 1):
result += f"{i}. [{deal['title']}]({deal['link']})\n\n"
else:
result += "No specific deals found for your query. Try a different search term or browse the recommended category.\n\n"
except Exception as e:
result += f"Error fetching deals: {str(e)}\n\n"
return result
# Create the Gradio interface
demo = gr.Interface(
fn=classify_text,
inputs=[
gr.Textbox(
lines=2,
placeholder="Enter your shopping query here...",
label="Shopping Query"
),
gr.Checkbox(
label="Fetch Deals",
value=True,
info="Check to fetch and display deals from DealsFinders.com"
)
],
outputs=gr.Markdown(label="Results"),
title="Shopping Assistant",
description="""
This demo shows how to use the Shopping Assistant model to classify shopping queries into categories and find relevant deals.
Enter a shopping query below to see which categories it belongs to and find deals from DealsFinders.com.
Examples:
- "I'm looking for headphones"
- "Do you have any kitchen appliance deals?"
- "Show me the best laptop deals"
- "I need a new smart TV"
""",
examples=[
["I'm looking for headphones", True],
["Do you have any kitchen appliance deals?", True],
["Show me the best laptop deals", True],
["I need a new smart TV", True],
["headphone deals", True]
],
theme=gr.themes.Soft()
)
# Launch the app
if __name__ == "__main__":
demo.launch()
|