Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import json | |
| import os | |
| import requests | |
| import re | |
| # Function to extract text from HTML (from shopping_assistant.py) | |
| def extract_text_from_html(html): | |
| """ | |
| Extract text from HTML without using BeautifulSoup | |
| """ | |
| # Remove HTML tags | |
| text = re.sub(r'<[^>]+>', ' ', html) | |
| # Remove extra whitespace | |
| text = re.sub(r'\s+', ' ', text) | |
| # Decode HTML entities | |
| text = text.replace(' ', ' ').replace('&', '&').replace('<', '<').replace('>', '>') | |
| return text.strip() | |
| # Function to fetch deals from DealsFinders.com (from shopping_assistant.py) | |
| def fetch_deals_data(url="https://www.dealsfinders.com/wp-json/wp/v2/posts", num_pages=2, per_page=100): | |
| """ | |
| Fetch deals data exclusively from the DealsFinders API | |
| """ | |
| all_deals = [] | |
| # Fetch from the DealsFinders API | |
| for page in range(1, num_pages + 1): | |
| try: | |
| # Add a user agent to avoid being blocked | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36' | |
| } | |
| response = requests.get(f"{url}?page={page}&per_page={per_page}", headers=headers) | |
| if response.status_code == 200: | |
| deals = response.json() | |
| all_deals.extend(deals) | |
| print(f"Fetched page {page} with {len(deals)} deals from DealsFinders API") | |
| # If we get fewer deals than requested, we've reached the end | |
| if len(deals) < per_page: | |
| print(f"Reached the end of available deals at page {page}") | |
| break | |
| else: | |
| print(f"Failed to fetch page {page} from DealsFinders API: {response.status_code}") | |
| break | |
| except Exception as e: | |
| print(f"Error fetching page {page} from DealsFinders API: {str(e)}") | |
| break | |
| return all_deals | |
| # Function to process deals data (from shopping_assistant.py) | |
| def process_deals_data(deals_data): | |
| """ | |
| Process the deals data into a structured format | |
| """ | |
| processed_deals = [] | |
| for deal in deals_data: | |
| try: | |
| # Extract relevant information using our HTML text extractor | |
| content_html = deal.get('content', {}).get('rendered', '') | |
| excerpt_html = deal.get('excerpt', {}).get('rendered', '') | |
| clean_content = extract_text_from_html(content_html) | |
| clean_excerpt = extract_text_from_html(excerpt_html) | |
| processed_deal = { | |
| 'id': deal.get('id'), | |
| 'title': deal.get('title', {}).get('rendered', ''), | |
| 'link': deal.get('link', ''), | |
| 'date': deal.get('date', ''), | |
| 'content': clean_content, | |
| 'excerpt': clean_excerpt | |
| } | |
| processed_deals.append(processed_deal) | |
| except Exception as e: | |
| print(f"Error processing deal: {str(e)}") | |
| return processed_deals | |
| # Define product categories | |
| category_descriptions = { | |
| "electronics": "Electronic devices like headphones, speakers, TVs, smartphones, and gadgets", | |
| "computers": "Laptops, desktops, computer parts, monitors, and computing accessories", | |
| "mobile": "Mobile phones, smartphones, phone cases, screen protectors, and chargers", | |
| "audio": "Headphones, earbuds, speakers, microphones, and audio equipment", | |
| "clothing": "Clothes, shirts, pants, dresses, and fashion items", | |
| "footwear": "Shoes, boots, sandals, slippers, and all types of footwear", | |
| "home": "Home decor, furniture, bedding, and household items", | |
| "kitchen": "Kitchen appliances, cookware, utensils, and kitchen gadgets", | |
| "toys": "Toys, games, and children's entertainment items", | |
| "sports": "Sports equipment, fitness gear, and outdoor recreation items", | |
| "beauty": "Beauty products, makeup, skincare, and personal care items", | |
| "books": "Books, e-books, audiobooks, and reading materials" | |
| } | |
| # List of categories | |
| categories = list(category_descriptions.keys()) | |
| # Try to load the recommended models | |
| try: | |
| # 1. Load BART model for zero-shot classification | |
| from transformers import pipeline | |
| # Initialize the zero-shot classification pipeline | |
| classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") | |
| print("Using facebook/bart-large-mnli for classification") | |
| # 2. Load MPNet model for semantic search | |
| from sentence_transformers import SentenceTransformer, util | |
| # Load the sentence transformer model | |
| sentence_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2') | |
| print("Using sentence-transformers/all-mpnet-base-v2 for semantic search") | |
| # Pre-compute embeddings for category descriptions | |
| category_texts = list(category_descriptions.values()) | |
| category_embeddings = sentence_model.encode(category_texts, convert_to_tensor=True) | |
| # Using recommended models | |
| using_recommended_models = True | |
| except Exception as e: | |
| # Fall back to local model if recommended models fail to load | |
| print(f"Error loading recommended models: {str(e)}") | |
| print("Falling back to local model") | |
| model_path = os.path.dirname(os.path.abspath(__file__)) | |
| tokenizer = AutoTokenizer.from_pretrained(model_path) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_path) | |
| # Load the local categories | |
| try: | |
| with open(os.path.join(model_path, "categories.json"), "r") as f: | |
| categories = json.load(f) | |
| except Exception as e: | |
| print(f"Error loading categories: {str(e)}") | |
| categories = ["electronics", "clothing", "home", "kitchen", "toys", "other"] | |
| # Not using recommended models | |
| using_recommended_models = False | |
| # Global variable to store deals data | |
| deals_cache = None | |
| def classify_text(text, fetch_deals=True): | |
| """ | |
| Classify the text using the model and fetch relevant deals | |
| """ | |
| global deals_cache | |
| # Get the top categories based on the model type | |
| if using_recommended_models: | |
| # Using BART for zero-shot classification | |
| result = classifier(text, categories, multi_label=True) | |
| # Extract categories and scores | |
| top_categories = [] | |
| for i, (category, score) in enumerate(zip(result['labels'], result['scores'])): | |
| if score > 0.1: # Lower threshold for zero-shot classification | |
| top_categories.append((category, score)) | |
| # Limit to top 3 categories | |
| if i >= 2: | |
| break | |
| else: | |
| # Using the original classification model | |
| inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) | |
| # Get the model prediction | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| predictions = torch.sigmoid(outputs.logits) | |
| # Get the top categories | |
| top_categories = [] | |
| for i, score in enumerate(predictions[0]): | |
| if score > 0.5: # Threshold for multi-label classification | |
| top_categories.append((categories[i], score.item())) | |
| # Sort by score | |
| top_categories.sort(key=lambda x: x[1], reverse=True) | |
| # Format the classification results | |
| if top_categories: | |
| result = f"Top categories for '{text}':\n\n" | |
| for category, score in top_categories: | |
| result += f"- {category}: {score:.4f}\n" | |
| result += f"\nBased on your query, I would recommend looking for deals in the **{top_categories[0][0]}** category.\n\n" | |
| else: | |
| result = f"No categories found for '{text}'. Please try a different query.\n\n" | |
| # Fetch and display deals if requested | |
| if fetch_deals: | |
| result += "## Relevant Deals from DealsFinders.com\n\n" | |
| try: | |
| # Fetch deals data if not already cached | |
| if deals_cache is None: | |
| deals_data = fetch_deals_data(num_pages=2) # Limit to 2 pages for faster response | |
| deals_cache = process_deals_data(deals_data) | |
| # Using MPNet for semantic search if available | |
| if using_recommended_models: | |
| # Create deal texts for semantic search | |
| deal_texts = [] | |
| for deal in deals_cache: | |
| # Combine title and excerpt for better matching | |
| deal_text = f"{deal['title']} {deal['excerpt']}" | |
| deal_texts.append(deal_text) | |
| # Encode the query and deals | |
| query_embedding = sentence_model.encode(text, convert_to_tensor=True) | |
| deal_embeddings = sentence_model.encode(deal_texts, convert_to_tensor=True) | |
| # Calculate semantic similarity | |
| similarities = util.cos_sim(query_embedding, deal_embeddings)[0] | |
| # Get top 5 most similar deals | |
| top_indices = torch.topk(similarities, k=min(5, len(deals_cache))).indices | |
| # Extract the relevant deals | |
| relevant_deals = [deals_cache[idx] for idx in top_indices] | |
| else: | |
| # Fallback to keyword-based search | |
| query_terms = text.lower().split() | |
| expanded_terms = list(query_terms) | |
| # Add related terms based on the query | |
| if any(term in text.lower() for term in ['headphone', 'headphones']): | |
| expanded_terms.extend(['earbuds', 'earphones', 'earpods', 'airpods', 'audio', 'bluetooth', 'wireless']) | |
| elif any(term in text.lower() for term in ['laptop', 'computer']): | |
| expanded_terms.extend(['notebook', 'macbook', 'chromebook', 'pc']) | |
| elif any(term in text.lower() for term in ['tv', 'television']): | |
| expanded_terms.extend(['smart tv', 'roku', 'streaming']) | |
| elif any(term in text.lower() for term in ['kitchen', 'appliance']): | |
| expanded_terms.extend(['mixer', 'blender', 'toaster', 'microwave', 'oven']) | |
| # Score deals based on relevance to the query | |
| scored_deals = [] | |
| for deal in deals_cache: | |
| title = deal['title'].lower() | |
| content = deal['content'].lower() | |
| excerpt = deal['excerpt'].lower() | |
| score = 0 | |
| # Check original query terms (higher weight) | |
| for term in query_terms: | |
| if term in title: | |
| score += 10 | |
| if term in content: | |
| score += 3 | |
| if term in excerpt: | |
| score += 3 | |
| # Check expanded terms (lower weight) | |
| for term in expanded_terms: | |
| if term not in query_terms: # Skip original terms | |
| if term in title: | |
| score += 5 | |
| if term in content: | |
| score += 1 | |
| if term in excerpt: | |
| score += 1 | |
| # Add to scored deals if it has any relevance | |
| if score > 0: | |
| scored_deals.append((deal, score)) | |
| # Sort by score (descending) | |
| scored_deals.sort(key=lambda x: x[1], reverse=True) | |
| # Extract the deals from the scored list | |
| relevant_deals = [deal for deal, _ in scored_deals[:5]] | |
| if relevant_deals: | |
| for i, deal in enumerate(relevant_deals, 1): | |
| result += f"{i}. [{deal['title']}]({deal['link']})\n\n" | |
| else: | |
| result += "No specific deals found for your query. Try a different search term or browse the recommended category.\n\n" | |
| except Exception as e: | |
| result += f"Error fetching deals: {str(e)}\n\n" | |
| return result | |
| # Create the Gradio interface | |
| demo = gr.Interface( | |
| fn=classify_text, | |
| inputs=[ | |
| gr.Textbox( | |
| lines=2, | |
| placeholder="Enter your shopping query here...", | |
| label="Shopping Query" | |
| ), | |
| gr.Checkbox( | |
| label="Fetch Deals", | |
| value=True, | |
| info="Check to fetch and display deals from DealsFinders.com" | |
| ) | |
| ], | |
| outputs=gr.Markdown(label="Results"), | |
| title="Shopping Assistant", | |
| description=""" | |
| This demo shows how to use the Shopping Assistant model to classify shopping queries into categories and find relevant deals. | |
| Enter a shopping query below to see which categories it belongs to and find deals from DealsFinders.com. | |
| Examples: | |
| - "I'm looking for headphones" | |
| - "Do you have any kitchen appliance deals?" | |
| - "Show me the best laptop deals" | |
| - "I need a new smart TV" | |
| """, | |
| examples=[ | |
| ["I'm looking for headphones", True], | |
| ["Do you have any kitchen appliance deals?", True], | |
| ["Show me the best laptop deals", True], | |
| ["I need a new smart TV", True], | |
| ["headphone deals", True] | |
| ], | |
| theme=gr.themes.Soft() | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() | |
