File size: 11,656 Bytes
01a3727
 
 
 
6e2368e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01a3727
e667020
01a3727
e667020
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01a3727
 
e667020
 
 
 
01a3727
6e2368e
 
 
 
01a3727
6e2368e
01a3727
6e2368e
 
 
01a3727
 
 
 
 
e667020
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01a3727
 
 
 
6e2368e
01a3727
 
 
 
 
6e2368e
01a3727
6e2368e
 
 
 
 
 
 
 
 
 
 
 
6f89f62
 
 
6e2368e
6f89f62
 
 
 
 
 
 
 
 
e360e01
6f89f62
 
6e2368e
 
 
6f89f62
e360e01
6f89f62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e360e01
 
6f89f62
08f5082
6f89f62
 
6e2368e
 
 
 
 
 
 
 
 
01a3727
 
 
 
 
 
6e2368e
 
 
 
 
 
 
 
 
 
 
 
01a3727
 
 
6e2368e
 
01a3727
 
 
 
 
 
 
 
6e2368e
 
 
 
 
01a3727
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import json
import os
import requests
import re

# Function to extract text from HTML (from shopping_assistant.py)
def extract_text_from_html(html):
    """
    Extract text from HTML without using BeautifulSoup
    """
    # Remove HTML tags
    text = re.sub(r'<[^>]+>', ' ', html)
    # Remove extra whitespace
    text = re.sub(r'\s+', ' ', text)
    # Decode HTML entities
    text = text.replace('&nbsp;', ' ').replace('&amp;', '&').replace('&lt;', '<').replace('&gt;', '>')
    return text.strip()

# Function to fetch deals from DealsFinders.com (from shopping_assistant.py)
def fetch_deals_data(url="https://www.dealsfinders.com/wp-json/wp/v2/posts", num_pages=2, per_page=100):
    """
    Fetch deals data exclusively from the DealsFinders API
    """
    all_deals = []
    
    # Fetch from the DealsFinders API
    for page in range(1, num_pages + 1):
        try:
            # Add a user agent to avoid being blocked
            headers = {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'
            }
            response = requests.get(f"{url}?page={page}&per_page={per_page}", headers=headers)
            
            if response.status_code == 200:
                deals = response.json()
                all_deals.extend(deals)
                print(f"Fetched page {page} with {len(deals)} deals from DealsFinders API")
                
                # If we get fewer deals than requested, we've reached the end
                if len(deals) < per_page:
                    print(f"Reached the end of available deals at page {page}")
                    break
            else:
                print(f"Failed to fetch page {page} from DealsFinders API: {response.status_code}")
                break
        except Exception as e:
            print(f"Error fetching page {page} from DealsFinders API: {str(e)}")
            break
    
    return all_deals

# Function to process deals data (from shopping_assistant.py)
def process_deals_data(deals_data):
    """
    Process the deals data into a structured format
    """
    processed_deals = []
    
    for deal in deals_data:
        try:
            # Extract relevant information using our HTML text extractor
            content_html = deal.get('content', {}).get('rendered', '')
            excerpt_html = deal.get('excerpt', {}).get('rendered', '')
            
            clean_content = extract_text_from_html(content_html)
            clean_excerpt = extract_text_from_html(excerpt_html)
            
            processed_deal = {
                'id': deal.get('id'),
                'title': deal.get('title', {}).get('rendered', ''),
                'link': deal.get('link', ''),
                'date': deal.get('date', ''),
                'content': clean_content,
                'excerpt': clean_excerpt
            }
            processed_deals.append(processed_deal)
        except Exception as e:
            print(f"Error processing deal: {str(e)}")
    
    return processed_deals

# Load the e-commerce specific model and tokenizer
try:
    # Try to load the e-commerce BERT model
    tokenizer = AutoTokenizer.from_pretrained("prithivida/ecommerce-bert-base-uncased")
    model = AutoModelForSequenceClassification.from_pretrained("prithivida/ecommerce-bert-base-uncased")
    
    # E-commerce BERT categories
    categories = [
        "electronics", "computers", "mobile_phones", "accessories",
        "clothing", "footwear", "watches", "jewelry",
        "home", "kitchen", "furniture", "decor",
        "beauty", "personal_care", "health", "wellness",
        "toys", "games", "sports", "outdoors",
        "books", "stationery", "music", "movies"
    ]
    print("Using e-commerce BERT model")
except Exception as e:
    # Fall back to local model if e-commerce BERT fails to load
    print(f"Error loading e-commerce BERT model: {str(e)}")
    print("Falling back to local model")
    
    model_id = "selvaonline/shopping-assistant"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForSequenceClassification.from_pretrained(model_id)
    
    # Load the local categories
    try:
        from huggingface_hub import hf_hub_download
    categories_path = hf_hub_download(repo_id=model_id, filename="categories.json")
    with open(categories_path, "r") as f:
            categories = json.load(f)
    except Exception as e:
        print(f"Error loading categories: {str(e)}")
        categories = ["electronics", "clothing", "home", "kitchen", "toys", "other"]

# Global variable to store deals data
deals_cache = None

def classify_text(text, fetch_deals=True):
    """
    Classify the text using the model and fetch relevant deals
    """
    global deals_cache
    
    # Prepare the input for classification
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    
    # Get the model prediction
    with torch.no_grad():
        outputs = model(**inputs)
        
        # Handle different model output formats
        if hasattr(outputs, 'logits'):
            # For models that return logits
            if outputs.logits.shape[1] == len(categories):
                # Multi-label classification
                predictions = torch.sigmoid(outputs.logits)
                
                # Get the top categories
                top_categories = []
                for i, score in enumerate(predictions[0]):
                    if score > 0.3:  # Lower threshold for e-commerce model
                        top_categories.append((categories[i], score.item()))
            else:
                # Single-label classification
                probabilities = torch.softmax(outputs.logits, dim=1)
                values, indices = torch.topk(probabilities, 3)
                
                top_categories = []
                for i, idx in enumerate(indices[0]):
                    if idx < len(categories):
                        top_categories.append((categories[idx.item()], values[0][i].item()))
        else:
            # Fallback for other model formats
            predictions = torch.sigmoid(outputs[0])
            
            # Get the top categories
            top_categories = []
            for i, score in enumerate(predictions[0]):
                if score > 0.5:
                    top_categories.append((categories[i], score.item()))
    
    # Sort by score
    top_categories.sort(key=lambda x: x[1], reverse=True)
    
    # Format the classification results
    if top_categories:
        result = f"Top categories for '{text}':\n\n"
        for category, score in top_categories:
            result += f"- {category}: {score:.4f}\n"
        
        result += f"\nBased on your query, I would recommend looking for deals in the **{top_categories[0][0]}** category.\n\n"
    else:
        result = f"No categories found for '{text}'. Please try a different query.\n\n"
    
    # Fetch and display deals if requested
    if fetch_deals:
        result += "## Relevant Deals from DealsFinders.com\n\n"
        
        try:
            # Fetch deals data if not already cached
            if deals_cache is None:
                deals_data = fetch_deals_data(num_pages=2)  # Limit to 2 pages for faster response
                deals_cache = process_deals_data(deals_data)
            
            # Extract query terms and expand with related terms
            query_terms = text.lower().split()
            expanded_terms = list(query_terms)
            
            # Add related terms based on the query
            if any(term in text.lower() for term in ['headphone', 'headphones']):
                expanded_terms.extend(['earbuds', 'earphones', 'earpods', 'airpods', 'audio', 'bluetooth', 'wireless'])
            elif any(term in text.lower() for term in ['laptop', 'computer']):
                expanded_terms.extend(['notebook', 'macbook', 'chromebook', 'pc'])
            elif any(term in text.lower() for term in ['tv', 'television']):
                expanded_terms.extend(['smart tv', 'roku', 'streaming'])
            elif any(term in text.lower() for term in ['kitchen', 'appliance']):
                expanded_terms.extend(['mixer', 'blender', 'toaster', 'microwave', 'oven'])
            
            # Score deals based on relevance to the query
            scored_deals = []
            for deal in deals_cache:
                title = deal['title'].lower()
                content = deal['content'].lower()
                excerpt = deal['excerpt'].lower()
                
                score = 0
                
                # Check original query terms (higher weight)
                for term in query_terms:
                    if term in title:
                        score += 10
                    if term in content:
                        score += 3
                    if term in excerpt:
                        score += 3
                
                # Check expanded terms (lower weight)
                for term in expanded_terms:
                    if term not in query_terms:  # Skip original terms
                        if term in title:
                            score += 5
                        if term in content:
                            score += 1
                        if term in excerpt:
                            score += 1
                
                # Add to scored deals if it has any relevance
                if score > 0:
                    scored_deals.append((deal, score))
            
            # Sort by score (descending)
            scored_deals.sort(key=lambda x: x[1], reverse=True)
            
            # Extract the deals from the scored list
            relevant_deals = [deal for deal, _ in scored_deals[:5]]
            
            if relevant_deals:
                for i, deal in enumerate(relevant_deals, 1):
                    result += f"{i}. [{deal['title']}]({deal['link']})\n\n"
            else:
                result += "No specific deals found for your query. Try a different search term or browse the recommended category.\n\n"
        
        except Exception as e:
            result += f"Error fetching deals: {str(e)}\n\n"
    
    return result

# Create the Gradio interface
demo = gr.Interface(
    fn=classify_text,
    inputs=[
        gr.Textbox(
            lines=2, 
            placeholder="Enter your shopping query here...",
            label="Shopping Query"
        ),
        gr.Checkbox(
            label="Fetch Deals",
            value=True,
            info="Check to fetch and display deals from DealsFinders.com"
        )
    ],
    outputs=gr.Markdown(label="Results"),
    title="Shopping Assistant",
    description="""
    This demo shows how to use the Shopping Assistant model to classify shopping queries into categories and find relevant deals.
    Enter a shopping query below to see which categories it belongs to and find deals from DealsFinders.com.
    
    Examples:
    - "I'm looking for headphones"
    - "Do you have any kitchen appliance deals?"
    - "Show me the best laptop deals"
    - "I need a new smart TV"
    """,
    examples=[
        ["I'm looking for headphones", True],
        ["Do you have any kitchen appliance deals?", True],
        ["Show me the best laptop deals", True],
        ["I need a new smart TV", True],
        ["headphone deals", True]
    ],
    theme=gr.themes.Soft()
)

# Launch the app
if __name__ == "__main__":
    demo.launch()