Spaces:

selvaonline
/

shopping-assistant-demo

Sleeping

App Files Files Community

shopping-assistant-demo / app.py

selvaonline

Upload app.py with huggingface_hub

e667020 verified 4 months ago

raw

history blame

11.7 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import json
	import os
	import requests
	import re

	# Function to extract text from HTML (from shopping_assistant.py)
	def extract_text_from_html(html):
	"""
	Extract text from HTML without using BeautifulSoup
	"""
	# Remove HTML tags
	text = re.sub(r'<[^>]+>', ' ', html)
	# Remove extra whitespace
	text = re.sub(r'\s+', ' ', text)
	# Decode HTML entities
	text = text.replace(' ', ' ').replace('&', '&').replace('<', '<').replace('>', '>')
	return text.strip()

	# Function to fetch deals from DealsFinders.com (from shopping_assistant.py)
	def fetch_deals_data(url="https://www.dealsfinders.com/wp-json/wp/v2/posts", num_pages=2, per_page=100):
	"""
	Fetch deals data exclusively from the DealsFinders API
	"""
	all_deals = []

	# Fetch from the DealsFinders API
	for page in range(1, num_pages + 1):
	try:
	# Add a user agent to avoid being blocked
	headers = {
	'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'
	}
	response = requests.get(f"{url}?page={page}&per_page={per_page}", headers=headers)

	if response.status_code == 200:
	deals = response.json()
	all_deals.extend(deals)
	print(f"Fetched page {page} with {len(deals)} deals from DealsFinders API")

	# If we get fewer deals than requested, we've reached the end
	if len(deals) < per_page:
	print(f"Reached the end of available deals at page {page}")
	break
	else:
	print(f"Failed to fetch page {page} from DealsFinders API: {response.status_code}")
	break
	except Exception as e:
	print(f"Error fetching page {page} from DealsFinders API: {str(e)}")
	break

	return all_deals

	# Function to process deals data (from shopping_assistant.py)
	def process_deals_data(deals_data):
	"""
	Process the deals data into a structured format
	"""
	processed_deals = []

	for deal in deals_data:
	try:
	# Extract relevant information using our HTML text extractor
	content_html = deal.get('content', {}).get('rendered', '')
	excerpt_html = deal.get('excerpt', {}).get('rendered', '')

	clean_content = extract_text_from_html(content_html)
	clean_excerpt = extract_text_from_html(excerpt_html)

	processed_deal = {
	'id': deal.get('id'),
	'title': deal.get('title', {}).get('rendered', ''),
	'link': deal.get('link', ''),
	'date': deal.get('date', ''),
	'content': clean_content,
	'excerpt': clean_excerpt
	}
	processed_deals.append(processed_deal)
	except Exception as e:
	print(f"Error processing deal: {str(e)}")

	return processed_deals

	# Load the e-commerce specific model and tokenizer
	try:
	# Try to load the e-commerce BERT model
	tokenizer = AutoTokenizer.from_pretrained("prithivida/ecommerce-bert-base-uncased")
	model = AutoModelForSequenceClassification.from_pretrained("prithivida/ecommerce-bert-base-uncased")

	# E-commerce BERT categories
	categories = [
	"electronics", "computers", "mobile_phones", "accessories",
	"clothing", "footwear", "watches", "jewelry",
	"home", "kitchen", "furniture", "decor",
	"beauty", "personal_care", "health", "wellness",
	"toys", "games", "sports", "outdoors",
	"books", "stationery", "music", "movies"
	]
	print("Using e-commerce BERT model")
	except Exception as e:
	# Fall back to local model if e-commerce BERT fails to load
	print(f"Error loading e-commerce BERT model: {str(e)}")
	print("Falling back to local model")

	model_id = "selvaonline/shopping-assistant"
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForSequenceClassification.from_pretrained(model_id)

	# Load the local categories
	try:
	from huggingface_hub import hf_hub_download
	categories_path = hf_hub_download(repo_id=model_id, filename="categories.json")
	with open(categories_path, "r") as f:
	categories = json.load(f)
	except Exception as e:
	print(f"Error loading categories: {str(e)}")
	categories = ["electronics", "clothing", "home", "kitchen", "toys", "other"]

	# Global variable to store deals data
	deals_cache = None

	def classify_text(text, fetch_deals=True):
	"""
	Classify the text using the model and fetch relevant deals
	"""
	global deals_cache

	# Prepare the input for classification
	inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)

	# Get the model prediction
	with torch.no_grad():
	outputs = model(**inputs)

	# Handle different model output formats
	if hasattr(outputs, 'logits'):
	# For models that return logits
	if outputs.logits.shape[1] == len(categories):
	# Multi-label classification
	predictions = torch.sigmoid(outputs.logits)

	# Get the top categories
	top_categories = []
	for i, score in enumerate(predictions[0]):
	if score > 0.3: # Lower threshold for e-commerce model
	top_categories.append((categories[i], score.item()))
	else:
	# Single-label classification
	probabilities = torch.softmax(outputs.logits, dim=1)
	values, indices = torch.topk(probabilities, 3)

	top_categories = []
	for i, idx in enumerate(indices[0]):
	if idx < len(categories):
	top_categories.append((categories[idx.item()], values[0][i].item()))
	else:
	# Fallback for other model formats
	predictions = torch.sigmoid(outputs[0])

	# Get the top categories
	top_categories = []
	for i, score in enumerate(predictions[0]):
	if score > 0.5:
	top_categories.append((categories[i], score.item()))

	# Sort by score
	top_categories.sort(key=lambda x: x[1], reverse=True)

	# Format the classification results
	if top_categories:
	result = f"Top categories for '{text}':\n\n"
	for category, score in top_categories:
	result += f"- {category}: {score:.4f}\n"

	result += f"\nBased on your query, I would recommend looking for deals in the {top_categories[0][0]} category.\n\n"
	else:
	result = f"No categories found for '{text}'. Please try a different query.\n\n"

	# Fetch and display deals if requested
	if fetch_deals:
	result += "## Relevant Deals from DealsFinders.com\n\n"

	try:
	# Fetch deals data if not already cached
	if deals_cache is None:
	deals_data = fetch_deals_data(num_pages=2) # Limit to 2 pages for faster response
	deals_cache = process_deals_data(deals_data)

	# Extract query terms and expand with related terms
	query_terms = text.lower().split()
	expanded_terms = list(query_terms)

	# Add related terms based on the query
	if any(term in text.lower() for term in ['headphone', 'headphones']):
	expanded_terms.extend(['earbuds', 'earphones', 'earpods', 'airpods', 'audio', 'bluetooth', 'wireless'])
	elif any(term in text.lower() for term in ['laptop', 'computer']):
	expanded_terms.extend(['notebook', 'macbook', 'chromebook', 'pc'])
	elif any(term in text.lower() for term in ['tv', 'television']):
	expanded_terms.extend(['smart tv', 'roku', 'streaming'])
	elif any(term in text.lower() for term in ['kitchen', 'appliance']):
	expanded_terms.extend(['mixer', 'blender', 'toaster', 'microwave', 'oven'])

	# Score deals based on relevance to the query
	scored_deals = []
	for deal in deals_cache:
	title = deal['title'].lower()
	content = deal['content'].lower()
	excerpt = deal['excerpt'].lower()

	score = 0

	# Check original query terms (higher weight)
	for term in query_terms:
	if term in title:
	score += 10
	if term in content:
	score += 3
	if term in excerpt:
	score += 3

	# Check expanded terms (lower weight)
	for term in expanded_terms:
	if term not in query_terms: # Skip original terms
	if term in title:
	score += 5
	if term in content:
	score += 1
	if term in excerpt:
	score += 1

	# Add to scored deals if it has any relevance
	if score > 0:
	scored_deals.append((deal, score))

	# Sort by score (descending)
	scored_deals.sort(key=lambda x: x[1], reverse=True)

	# Extract the deals from the scored list
	relevant_deals = [deal for deal, _ in scored_deals[:5]]

	if relevant_deals:
	for i, deal in enumerate(relevant_deals, 1):
	result += f"{i}. [{deal['title']}]({deal['link']})\n\n"
	else:
	result += "No specific deals found for your query. Try a different search term or browse the recommended category.\n\n"

	except Exception as e:
	result += f"Error fetching deals: {str(e)}\n\n"

	return result

	# Create the Gradio interface
	demo = gr.Interface(
	fn=classify_text,
	inputs=[
	gr.Textbox(
	lines=2,
	placeholder="Enter your shopping query here...",
	label="Shopping Query"
	),
	gr.Checkbox(
	label="Fetch Deals",
	value=True,
	info="Check to fetch and display deals from DealsFinders.com"
	)
	],
	outputs=gr.Markdown(label="Results"),
	title="Shopping Assistant",
	description="""
	This demo shows how to use the Shopping Assistant model to classify shopping queries into categories and find relevant deals.
	Enter a shopping query below to see which categories it belongs to and find deals from DealsFinders.com.

	Examples:
	- "I'm looking for headphones"
	- "Do you have any kitchen appliance deals?"
	- "Show me the best laptop deals"
	- "I need a new smart TV"
	""",
	examples=[
	["I'm looking for headphones", True],
	["Do you have any kitchen appliance deals?", True],
	["Show me the best laptop deals", True],
	["I need a new smart TV", True],
	["headphone deals", True]
	],
	theme=gr.themes.Soft()
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch()