|
import gradio as gr |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import json |
|
import os |
|
import requests |
|
import re |
|
|
|
|
|
def extract_text_from_html(html): |
|
""" |
|
Extract text from HTML without using BeautifulSoup |
|
""" |
|
|
|
text = re.sub(r'<[^>]+>', ' ', html) |
|
|
|
text = re.sub(r'\s+', ' ', text) |
|
|
|
text = text.replace(' ', ' ').replace('&', '&').replace('<', '<').replace('>', '>') |
|
return text.strip() |
|
|
|
|
|
def fetch_deals_data(url="https://www.dealsfinders.com/wp-json/wp/v2/posts", num_pages=2, per_page=100): |
|
""" |
|
Fetch deals data exclusively from the DealsFinders API |
|
""" |
|
all_deals = [] |
|
|
|
|
|
for page in range(1, num_pages + 1): |
|
try: |
|
|
|
headers = { |
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36' |
|
} |
|
response = requests.get(f"{url}?page={page}&per_page={per_page}", headers=headers) |
|
|
|
if response.status_code == 200: |
|
deals = response.json() |
|
all_deals.extend(deals) |
|
print(f"Fetched page {page} with {len(deals)} deals from DealsFinders API") |
|
|
|
|
|
if len(deals) < per_page: |
|
print(f"Reached the end of available deals at page {page}") |
|
break |
|
else: |
|
print(f"Failed to fetch page {page} from DealsFinders API: {response.status_code}") |
|
break |
|
except Exception as e: |
|
print(f"Error fetching page {page} from DealsFinders API: {str(e)}") |
|
break |
|
|
|
return all_deals |
|
|
|
|
|
def process_deals_data(deals_data): |
|
""" |
|
Process the deals data into a structured format |
|
""" |
|
processed_deals = [] |
|
|
|
for deal in deals_data: |
|
try: |
|
|
|
content_html = deal.get('content', {}).get('rendered', '') |
|
excerpt_html = deal.get('excerpt', {}).get('rendered', '') |
|
|
|
clean_content = extract_text_from_html(content_html) |
|
clean_excerpt = extract_text_from_html(excerpt_html) |
|
|
|
processed_deal = { |
|
'id': deal.get('id'), |
|
'title': deal.get('title', {}).get('rendered', ''), |
|
'link': deal.get('link', ''), |
|
'date': deal.get('date', ''), |
|
'content': clean_content, |
|
'excerpt': clean_excerpt |
|
} |
|
processed_deals.append(processed_deal) |
|
except Exception as e: |
|
print(f"Error processing deal: {str(e)}") |
|
|
|
return processed_deals |
|
|
|
|
|
model_id = "selvaonline/shopping-assistant" |
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
model = AutoModelForSequenceClassification.from_pretrained(model_id) |
|
|
|
|
|
try: |
|
from huggingface_hub import hf_hub_download |
|
categories_path = hf_hub_download(repo_id=model_id, filename="categories.json") |
|
with open(categories_path, "r") as f: |
|
categories = json.load(f) |
|
except Exception as e: |
|
print(f"Error loading categories: {str(e)}") |
|
categories = ["electronics", "clothing", "home", "kitchen", "toys", "other"] |
|
|
|
|
|
deals_cache = None |
|
|
|
def classify_text(text, fetch_deals=True): |
|
""" |
|
Classify the text using the model and fetch relevant deals |
|
""" |
|
global deals_cache |
|
|
|
|
|
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
predictions = torch.sigmoid(outputs.logits) |
|
|
|
|
|
top_categories = [] |
|
for i, score in enumerate(predictions[0]): |
|
if score > 0.5: |
|
top_categories.append((categories[i], score.item())) |
|
|
|
|
|
top_categories.sort(key=lambda x: x[1], reverse=True) |
|
|
|
|
|
if top_categories: |
|
result = f"Top categories for '{text}':\n\n" |
|
for category, score in top_categories: |
|
result += f"- {category}: {score:.4f}\n" |
|
|
|
result += f"\nBased on your query, I would recommend looking for deals in the **{top_categories[0][0]}** category.\n\n" |
|
else: |
|
result = f"No categories found for '{text}'. Please try a different query.\n\n" |
|
|
|
|
|
if fetch_deals: |
|
result += "## Relevant Deals from DealsFinders.com\n\n" |
|
|
|
try: |
|
|
|
if deals_cache is None: |
|
deals_data = fetch_deals_data(num_pages=2) |
|
deals_cache = process_deals_data(deals_data) |
|
|
|
|
|
query_terms = text.lower().split() |
|
relevant_deals = [] |
|
|
|
for deal in deals_cache: |
|
title = deal['title'].lower() |
|
content = deal['content'].lower() |
|
excerpt = deal['excerpt'].lower() |
|
|
|
|
|
if any(term in title or term in content or term in excerpt for term in query_terms): |
|
relevant_deals.append(deal) |
|
|
|
|
|
relevant_deals = relevant_deals[:5] |
|
|
|
if relevant_deals: |
|
for i, deal in enumerate(relevant_deals, 1): |
|
result += f"{i}. [{deal['title']}]({deal['link']})\n\n" |
|
else: |
|
result += "No specific deals found for your query. Try a different search term or browse the recommended category.\n\n" |
|
|
|
except Exception as e: |
|
result += f"Error fetching deals: {str(e)}\n\n" |
|
|
|
return result |
|
|
|
|
|
demo = gr.Interface( |
|
fn=classify_text, |
|
inputs=[ |
|
gr.Textbox( |
|
lines=2, |
|
placeholder="Enter your shopping query here...", |
|
label="Shopping Query" |
|
), |
|
gr.Checkbox( |
|
label="Fetch Deals", |
|
value=True, |
|
info="Check to fetch and display deals from DealsFinders.com" |
|
) |
|
], |
|
outputs=gr.Markdown(label="Results"), |
|
title="Shopping Assistant", |
|
description=""" |
|
This demo shows how to use the Shopping Assistant model to classify shopping queries into categories and find relevant deals. |
|
Enter a shopping query below to see which categories it belongs to and find deals from DealsFinders.com. |
|
|
|
Examples: |
|
- "I'm looking for headphones" |
|
- "Do you have any kitchen appliance deals?" |
|
- "Show me the best laptop deals" |
|
- "I need a new smart TV" |
|
""", |
|
examples=[ |
|
["I'm looking for headphones", True], |
|
["Do you have any kitchen appliance deals?", True], |
|
["Show me the best laptop deals", True], |
|
["I need a new smart TV", True], |
|
["headphone deals", True] |
|
], |
|
theme=gr.themes.Soft() |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|