Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
@@ -1,8 +1,87 @@
|
|
1 |
-
|
2 |
import gradio as gr
|
3 |
import torch
|
4 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
5 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
# Load the model and tokenizer
|
8 |
model_id = "selvaonline/shopping-assistant"
|
@@ -19,11 +98,16 @@ except Exception as e:
|
|
19 |
print(f"Error loading categories: {str(e)}")
|
20 |
categories = ["electronics", "clothing", "home", "kitchen", "toys", "other"]
|
21 |
|
22 |
-
|
|
|
|
|
|
|
23 |
"""
|
24 |
-
Classify the text using the model
|
25 |
"""
|
26 |
-
|
|
|
|
|
27 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
28 |
|
29 |
# Get the model prediction
|
@@ -40,31 +124,73 @@ def classify_text(text):
|
|
40 |
# Sort by score
|
41 |
top_categories.sort(key=lambda x: x[1], reverse=True)
|
42 |
|
43 |
-
# Format the results
|
44 |
if top_categories:
|
45 |
result = f"Top categories for '{text}':\n\n"
|
46 |
for category, score in top_categories:
|
47 |
result += f"- {category}: {score:.4f}\n"
|
48 |
|
49 |
-
result += f"\nBased on your query, I would recommend looking for deals in the **{top_categories[0][0]}** category
|
50 |
else:
|
51 |
-
result = f"No categories found for '{text}'. Please try a different query
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
return result
|
54 |
|
55 |
# Create the Gradio interface
|
56 |
demo = gr.Interface(
|
57 |
fn=classify_text,
|
58 |
-
inputs=
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
outputs=gr.Markdown(label="Results"),
|
64 |
title="Shopping Assistant",
|
65 |
description="""
|
66 |
-
This demo shows how to use the Shopping Assistant model to classify shopping queries into categories.
|
67 |
-
Enter a shopping query below to see which categories it belongs to.
|
68 |
|
69 |
Examples:
|
70 |
- "I'm looking for headphones"
|
@@ -73,10 +199,11 @@ demo = gr.Interface(
|
|
73 |
- "I need a new smart TV"
|
74 |
""",
|
75 |
examples=[
|
76 |
-
["I'm looking for headphones"],
|
77 |
-
["Do you have any kitchen appliance deals?"],
|
78 |
-
["Show me the best laptop deals"],
|
79 |
-
["I need a new smart TV"]
|
|
|
80 |
],
|
81 |
theme=gr.themes.Soft()
|
82 |
)
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
4 |
import json
|
5 |
+
import os
|
6 |
+
import requests
|
7 |
+
import re
|
8 |
+
|
9 |
+
# Function to extract text from HTML (from shopping_assistant.py)
|
10 |
+
def extract_text_from_html(html):
|
11 |
+
"""
|
12 |
+
Extract text from HTML without using BeautifulSoup
|
13 |
+
"""
|
14 |
+
# Remove HTML tags
|
15 |
+
text = re.sub(r'<[^>]+>', ' ', html)
|
16 |
+
# Remove extra whitespace
|
17 |
+
text = re.sub(r'\s+', ' ', text)
|
18 |
+
# Decode HTML entities
|
19 |
+
text = text.replace(' ', ' ').replace('&', '&').replace('<', '<').replace('>', '>')
|
20 |
+
return text.strip()
|
21 |
+
|
22 |
+
# Function to fetch deals from DealsFinders.com (from shopping_assistant.py)
|
23 |
+
def fetch_deals_data(url="https://www.dealsfinders.com/wp-json/wp/v2/posts", num_pages=2, per_page=100):
|
24 |
+
"""
|
25 |
+
Fetch deals data exclusively from the DealsFinders API
|
26 |
+
"""
|
27 |
+
all_deals = []
|
28 |
+
|
29 |
+
# Fetch from the DealsFinders API
|
30 |
+
for page in range(1, num_pages + 1):
|
31 |
+
try:
|
32 |
+
# Add a user agent to avoid being blocked
|
33 |
+
headers = {
|
34 |
+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'
|
35 |
+
}
|
36 |
+
response = requests.get(f"{url}?page={page}&per_page={per_page}", headers=headers)
|
37 |
+
|
38 |
+
if response.status_code == 200:
|
39 |
+
deals = response.json()
|
40 |
+
all_deals.extend(deals)
|
41 |
+
print(f"Fetched page {page} with {len(deals)} deals from DealsFinders API")
|
42 |
+
|
43 |
+
# If we get fewer deals than requested, we've reached the end
|
44 |
+
if len(deals) < per_page:
|
45 |
+
print(f"Reached the end of available deals at page {page}")
|
46 |
+
break
|
47 |
+
else:
|
48 |
+
print(f"Failed to fetch page {page} from DealsFinders API: {response.status_code}")
|
49 |
+
break
|
50 |
+
except Exception as e:
|
51 |
+
print(f"Error fetching page {page} from DealsFinders API: {str(e)}")
|
52 |
+
break
|
53 |
+
|
54 |
+
return all_deals
|
55 |
+
|
56 |
+
# Function to process deals data (from shopping_assistant.py)
|
57 |
+
def process_deals_data(deals_data):
|
58 |
+
"""
|
59 |
+
Process the deals data into a structured format
|
60 |
+
"""
|
61 |
+
processed_deals = []
|
62 |
+
|
63 |
+
for deal in deals_data:
|
64 |
+
try:
|
65 |
+
# Extract relevant information using our HTML text extractor
|
66 |
+
content_html = deal.get('content', {}).get('rendered', '')
|
67 |
+
excerpt_html = deal.get('excerpt', {}).get('rendered', '')
|
68 |
+
|
69 |
+
clean_content = extract_text_from_html(content_html)
|
70 |
+
clean_excerpt = extract_text_from_html(excerpt_html)
|
71 |
+
|
72 |
+
processed_deal = {
|
73 |
+
'id': deal.get('id'),
|
74 |
+
'title': deal.get('title', {}).get('rendered', ''),
|
75 |
+
'link': deal.get('link', ''),
|
76 |
+
'date': deal.get('date', ''),
|
77 |
+
'content': clean_content,
|
78 |
+
'excerpt': clean_excerpt
|
79 |
+
}
|
80 |
+
processed_deals.append(processed_deal)
|
81 |
+
except Exception as e:
|
82 |
+
print(f"Error processing deal: {str(e)}")
|
83 |
+
|
84 |
+
return processed_deals
|
85 |
|
86 |
# Load the model and tokenizer
|
87 |
model_id = "selvaonline/shopping-assistant"
|
|
|
98 |
print(f"Error loading categories: {str(e)}")
|
99 |
categories = ["electronics", "clothing", "home", "kitchen", "toys", "other"]
|
100 |
|
101 |
+
# Global variable to store deals data
|
102 |
+
deals_cache = None
|
103 |
+
|
104 |
+
def classify_text(text, fetch_deals=True):
|
105 |
"""
|
106 |
+
Classify the text using the model and fetch relevant deals
|
107 |
"""
|
108 |
+
global deals_cache
|
109 |
+
|
110 |
+
# Prepare the input for classification
|
111 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
112 |
|
113 |
# Get the model prediction
|
|
|
124 |
# Sort by score
|
125 |
top_categories.sort(key=lambda x: x[1], reverse=True)
|
126 |
|
127 |
+
# Format the classification results
|
128 |
if top_categories:
|
129 |
result = f"Top categories for '{text}':\n\n"
|
130 |
for category, score in top_categories:
|
131 |
result += f"- {category}: {score:.4f}\n"
|
132 |
|
133 |
+
result += f"\nBased on your query, I would recommend looking for deals in the **{top_categories[0][0]}** category.\n\n"
|
134 |
else:
|
135 |
+
result = f"No categories found for '{text}'. Please try a different query.\n\n"
|
136 |
+
|
137 |
+
# Fetch and display deals if requested
|
138 |
+
if fetch_deals:
|
139 |
+
result += "## Relevant Deals from DealsFinders.com\n\n"
|
140 |
+
|
141 |
+
try:
|
142 |
+
# Fetch deals data if not already cached
|
143 |
+
if deals_cache is None:
|
144 |
+
deals_data = fetch_deals_data(num_pages=2) # Limit to 2 pages for faster response
|
145 |
+
deals_cache = process_deals_data(deals_data)
|
146 |
+
|
147 |
+
# Search for relevant deals
|
148 |
+
query_terms = text.lower().split()
|
149 |
+
relevant_deals = []
|
150 |
+
|
151 |
+
for deal in deals_cache:
|
152 |
+
title = deal['title'].lower()
|
153 |
+
content = deal['content'].lower()
|
154 |
+
excerpt = deal['excerpt'].lower()
|
155 |
+
|
156 |
+
# Check if any query term is in the deal information
|
157 |
+
if any(term in title or term in content or term in excerpt for term in query_terms):
|
158 |
+
relevant_deals.append(deal)
|
159 |
+
|
160 |
+
# Limit to top 5 most relevant deals
|
161 |
+
relevant_deals = relevant_deals[:5]
|
162 |
+
|
163 |
+
if relevant_deals:
|
164 |
+
for i, deal in enumerate(relevant_deals, 1):
|
165 |
+
result += f"{i}. [{deal['title']}]({deal['link']})\n\n"
|
166 |
+
else:
|
167 |
+
result += "No specific deals found for your query. Try a different search term or browse the recommended category.\n\n"
|
168 |
+
|
169 |
+
except Exception as e:
|
170 |
+
result += f"Error fetching deals: {str(e)}\n\n"
|
171 |
|
172 |
return result
|
173 |
|
174 |
# Create the Gradio interface
|
175 |
demo = gr.Interface(
|
176 |
fn=classify_text,
|
177 |
+
inputs=[
|
178 |
+
gr.Textbox(
|
179 |
+
lines=2,
|
180 |
+
placeholder="Enter your shopping query here...",
|
181 |
+
label="Shopping Query"
|
182 |
+
),
|
183 |
+
gr.Checkbox(
|
184 |
+
label="Fetch Deals",
|
185 |
+
value=True,
|
186 |
+
info="Check to fetch and display deals from DealsFinders.com"
|
187 |
+
)
|
188 |
+
],
|
189 |
outputs=gr.Markdown(label="Results"),
|
190 |
title="Shopping Assistant",
|
191 |
description="""
|
192 |
+
This demo shows how to use the Shopping Assistant model to classify shopping queries into categories and find relevant deals.
|
193 |
+
Enter a shopping query below to see which categories it belongs to and find deals from DealsFinders.com.
|
194 |
|
195 |
Examples:
|
196 |
- "I'm looking for headphones"
|
|
|
199 |
- "I need a new smart TV"
|
200 |
""",
|
201 |
examples=[
|
202 |
+
["I'm looking for headphones", True],
|
203 |
+
["Do you have any kitchen appliance deals?", True],
|
204 |
+
["Show me the best laptop deals", True],
|
205 |
+
["I need a new smart TV", True],
|
206 |
+
["headphone deals", True]
|
207 |
],
|
208 |
theme=gr.themes.Soft()
|
209 |
)
|