File size: 26,081 Bytes
01a3727
 
 
 
6e2368e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02ae7c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e2368e
02ae7c1
6e2368e
02ae7c1
6e2368e
02ae7c1
 
 
 
 
6e2368e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02ae7c1
 
6e2368e
 
02ae7c1
 
 
 
 
 
 
6e2368e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01a3727
b1b6f63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01a3727
b1b6f63
 
 
 
 
 
 
 
 
e667020
b1b6f63
 
 
 
 
 
 
 
 
 
e667020
b1b6f63
 
e667020
 
a6565d8
 
 
e667020
 
 
a6565d8
e667020
 
 
 
b1b6f63
 
 
01a3727
02ae7c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e2368e
 
 
1ba7ad7
02ae7c1
1ba7ad7
02ae7c1
 
1ba7ad7
02ae7c1
1ba7ad7
 
02ae7c1
 
 
 
1ba7ad7
 
 
02ae7c1
6e2368e
01a3727
6e2368e
01a3727
6e2368e
 
b1b6f63
 
 
 
e667020
b1b6f63
 
 
 
 
e667020
b1b6f63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01a3727
6e2368e
01a3727
 
 
 
 
6e2368e
01a3727
6e2368e
 
 
 
 
 
 
 
 
02ae7c1
 
6e2368e
 
b1b6f63
 
 
 
 
 
 
 
 
 
 
 
e360e01
b1b6f63
 
6f89f62
b1b6f63
 
6f89f62
b1b6f63
 
 
02ae7c1
b1b6f63
 
 
02ae7c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1b6f63
 
 
 
 
 
 
 
 
 
 
 
6f89f62
b1b6f63
6f89f62
b1b6f63
6f89f62
b1b6f63
 
 
 
 
 
 
 
 
 
 
 
02ae7c1
 
 
 
 
 
 
 
 
b1b6f63
 
 
6f89f62
b1b6f63
 
 
 
 
6e2368e
 
 
 
 
 
 
 
 
01a3727
 
 
 
 
 
6e2368e
 
 
 
 
 
 
 
 
 
 
 
01a3727
 
 
6e2368e
 
01a3727
 
 
 
 
 
 
 
6e2368e
 
 
 
 
01a3727
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import json
import os
import requests
import re

# Function to extract text from HTML (from shopping_assistant.py)
def extract_text_from_html(html):
    """
    Extract text from HTML without using BeautifulSoup
    """
    # Remove HTML tags
    text = re.sub(r'<[^>]+>', ' ', html)
    # Remove extra whitespace
    text = re.sub(r'\s+', ' ', text)
    # Decode HTML entities
    text = text.replace('&nbsp;', ' ').replace('&amp;', '&').replace('&lt;', '<').replace('&gt;', '>')
    return text.strip()

# Sample deals data to use as fallback
SAMPLE_DEALS = [
  {
    "id": 1,
    "title": {
      "rendered": "Apple AirPods Pro (2nd Generation) - 20% Off"
    },
    "link": "https://www.example.com/deals/airpods-pro",
    "date": "2025-02-25T10:00:00",
    "content": {
      "rendered": "<p>Get the latest Apple AirPods Pro (2nd Generation) for 20% off the regular price. These wireless earbuds feature active noise cancellation, transparency mode, and spatial audio with dynamic head tracking.</p><p>Regular price: $249.99</p><p>Deal price: $199.99</p><p>You save: $50.00</p>"
    },
    "excerpt": {
      "rendered": "<p>Apple AirPods Pro (2nd Generation) with active noise cancellation and transparency mode. Now 20% off - only $199.99!</p>"
    }
  },
  {
    "id": 2,
    "title": {
      "rendered": "Samsung 65\" QLED 4K Smart TV - $300 Off"
    },
    "link": "https://www.example.com/deals/samsung-qled-tv",
    "date": "2025-02-26T09:30:00",
    "content": {
      "rendered": "<p>Upgrade your home entertainment with this Samsung 65\" QLED 4K Smart TV. Features Quantum HDR, Motion Xcelerator Turbo+, and Object Tracking Sound for an immersive viewing experience.</p><p>Regular price: $1,299.99</p><p>Deal price: $999.99</p><p>You save: $300.00</p>"
    },
    "excerpt": {
      "rendered": "<p>Samsung 65\" QLED 4K Smart TV with Quantum HDR and Object Tracking Sound. Save $300 - now only $999.99!</p>"
    }
  },
  {
    "id": 3,
    "title": {
      "rendered": "Sony WH-1000XM5 Wireless Headphones - 25% Off"
    },
    "link": "https://www.example.com/deals/sony-wh1000xm5",
    "date": "2025-02-26T14:15:00",
    "content": {
      "rendered": "<p>Experience industry-leading noise cancellation with the Sony WH-1000XM5 wireless headphones. Features 30-hour battery life, quick charging, and exceptional sound quality with the new Integrated Processor V1.</p><p>Regular price: $399.99</p><p>Deal price: $299.99</p><p>You save: $100.00</p>"
    },
    "excerpt": {
      "rendered": "<p>Sony WH-1000XM5 wireless headphones with industry-leading noise cancellation and 30-hour battery life. Now 25% off at $299.99!</p>"
    }
  },
  {
    "id": 4,
    "title": {
      "rendered": "Bose QuietComfort Ultra Headphones - 20% Off"
    },
    "link": "https://www.example.com/deals/bose-quietcomfort-ultra",
    "date": "2025-02-25T15:30:00",
    "content": {
      "rendered": "<p>Experience the ultimate in noise cancellation with Bose QuietComfort Ultra headphones. Features spatial audio, custom EQ, and up to 24 hours of battery life.</p><p>Regular price: $429.99</p><p>Deal price: $343.99</p><p>You save: $86.00</p>"
    },
    "excerpt": {
      "rendered": "<p>Bose QuietComfort Ultra headphones with advanced noise cancellation and spatial audio. Now 20% off at $343.99!</p>"
    }
  },
  {
    "id": 5,
    "title": {
      "rendered": "Beats Studio Pro Wireless Headphones - 40% Off"
    },
    "link": "https://www.example.com/deals/beats-studio-pro",
    "date": "2025-02-26T16:30:00",
    "content": {
      "rendered": "<p>The Beats Studio Pro wireless headphones deliver premium sound with active noise cancellation, transparency mode, and up to 40 hours of battery life.</p><p>Regular price: $349.99</p><p>Deal price: $209.99</p><p>You save: $140.00</p>"
    },
    "excerpt": {
      "rendered": "<p>Beats Studio Pro wireless headphones with active noise cancellation and 40-hour battery life. Now 40% off at $209.99!</p>"
    }
  },
  {
    "id": 6,
    "title": {
      "rendered": "Dyson V12 Detect Slim Cordless Vacuum - $150 Off"
    },
    "link": "https://www.example.com/deals/dyson-v12",
    "date": "2025-02-27T08:45:00",
    "content": {
      "rendered": "<p>The Dyson V12 Detect Slim cordless vacuum features a laser that reveals microscopic dust, an LCD screen that displays particle counts, and powerful suction for deep cleaning.</p><p>Regular price: $649.99</p><p>Deal price: $499.99</p><p>You save: $150.00</p>"
    },
    "excerpt": {
      "rendered": "<p>Dyson V12 Detect Slim cordless vacuum with laser dust detection and powerful suction. Save $150 - now only $499.99!</p>"
    }
  },
  {
    "id": 7,
    "title": {
      "rendered": "Nintendo Switch OLED Model - Bundle Deal"
    },
    "link": "https://www.example.com/deals/nintendo-switch-oled",
    "date": "2025-02-27T11:20:00",
    "content": {
      "rendered": "<p>Get the Nintendo Switch OLED Model with a vibrant 7-inch OLED screen, plus two games and a carrying case. The perfect gaming package for home or on-the-go play.</p><p>Regular price: $439.99</p><p>Deal price: $379.99</p><p>You save: $60.00</p>"
    },
    "excerpt": {
      "rendered": "<p>Nintendo Switch OLED Model bundle with two games and carrying case. Special bundle price of $379.99!</p>"
    }
  },
  {
    "id": 8,
    "title": {
      "rendered": "MacBook Air M3 - $200 Off"
    },
    "link": "https://www.example.com/deals/macbook-air-m3",
    "date": "2025-02-26T10:45:00",
    "content": {
      "rendered": "<p>The latest MacBook Air with M3 chip offers incredible performance and battery life in an ultra-thin design. Features a 13.6-inch Liquid Retina display, 8GB RAM, and 256GB SSD storage.</p><p>Regular price: $1,099.99</p><p>Deal price: $899.99</p><p>You save: $200.00</p>"
    },
    "excerpt": {
      "rendered": "<p>MacBook Air with M3 chip, 13.6-inch Liquid Retina display, and all-day battery life. Save $200 - now only $899.99!</p>"
    }
  },
  {
    "id": 9,
    "title": {
      "rendered": "Kindle Paperwhite Signature Edition - 30% Off"
    },
    "link": "https://www.example.com/deals/kindle-paperwhite",
    "date": "2025-02-27T09:15:00",
    "content": {
      "rendered": "<p>The Kindle Paperwhite Signature Edition features a 6.8-inch display, wireless charging, auto-adjusting front light, and 32GB storage. Perfect for reading anywhere, anytime.</p><p>Regular price: $189.99</p><p>Deal price: $132.99</p><p>You save: $57.00</p>"
    },
    "excerpt": {
      "rendered": "<p>Kindle Paperwhite Signature Edition with 6.8-inch display, wireless charging, and 32GB storage. Now 30% off at $132.99!</p>"
    }
  },
  {
    "id": 10,
    "title": {
      "rendered": "LG C3 65\" OLED 4K Smart TV - $500 Off"
    },
    "link": "https://www.example.com/deals/lg-c3-oled",
    "date": "2025-02-25T13:00:00",
    "content": {
      "rendered": "<p>Experience stunning picture quality with the LG C3 65\" OLED 4K Smart TV. Features self-lit OLED pixels, Dolby Vision, Dolby Atmos, and NVIDIA G-SYNC for gaming.</p><p>Regular price: $1,799.99</p><p>Deal price: $1,299.99</p><p>You save: $500.00</p>"
    },
    "excerpt": {
      "rendered": "<p>LG C3 65\" OLED 4K Smart TV with self-lit pixels and Dolby Vision. Save $500 - now only $1,299.99!</p>"
    }
  }
]

# Function to fetch deals from DealsFinders.com (from shopping_assistant.py)
def fetch_deals_data(url="https://www.dealsfinders.com/wp-json/wp/v2/posts", num_pages=2, per_page=100, use_sample_data=False):
    """
    Fetch deals data exclusively from the DealsFinders API or use sample data
    """
    # If use_sample_data is True, return the sample deals
    if use_sample_data:
        print("Using sample deals data")
        return SAMPLE_DEALS
    
    all_deals = []
    
    # Fetch from the DealsFinders API
    for page in range(1, num_pages + 1):
        try:
            # Add a user agent to avoid being blocked
            headers = {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'
            }
            response = requests.get(f"{url}?page={page}&per_page={per_page}", headers=headers)
            
            if response.status_code == 200:
                deals = response.json()
                all_deals.extend(deals)
                print(f"Fetched page {page} with {len(deals)} deals from DealsFinders API")
                
                # If we get fewer deals than requested, we've reached the end
                if len(deals) < per_page:
                    print(f"Reached the end of available deals at page {page}")
                    break
            else:
                print(f"Failed to fetch page {page} from DealsFinders API: {response.status_code}")
                print("Falling back to sample deals data")
                return SAMPLE_DEALS
        except Exception as e:
            print(f"Error fetching page {page} from DealsFinders API: {str(e)}")
            print("Falling back to sample deals data")
            return SAMPLE_DEALS
    
    # If no deals were fetched, use sample data
    if not all_deals:
        print("No deals fetched from API. Using sample deals data")
        return SAMPLE_DEALS
    
    return all_deals

# Function to process deals data (from shopping_assistant.py)
def process_deals_data(deals_data):
    """
    Process the deals data into a structured format
    """
    processed_deals = []
    
    for deal in deals_data:
        try:
            # Extract relevant information using our HTML text extractor
            content_html = deal.get('content', {}).get('rendered', '')
            excerpt_html = deal.get('excerpt', {}).get('rendered', '')
            
            clean_content = extract_text_from_html(content_html)
            clean_excerpt = extract_text_from_html(excerpt_html)
            
            processed_deal = {
                'id': deal.get('id'),
                'title': deal.get('title', {}).get('rendered', ''),
                'link': deal.get('link', ''),
                'date': deal.get('date', ''),
                'content': clean_content,
                'excerpt': clean_excerpt
            }
            processed_deals.append(processed_deal)
        except Exception as e:
            print(f"Error processing deal: {str(e)}")
    
    return processed_deals

# Define product categories
category_descriptions = {
    "electronics": "Electronic devices like headphones, speakers, TVs, smartphones, and gadgets",
    "computers": "Laptops, desktops, computer parts, monitors, and computing accessories",
    "mobile": "Mobile phones, smartphones, phone cases, screen protectors, and chargers",
    "audio": "Headphones, earbuds, speakers, microphones, and audio equipment",
    "clothing": "Clothes, shirts, pants, dresses, and fashion items",
    "footwear": "Shoes, boots, sandals, slippers, and all types of footwear",
    "home": "Home decor, furniture, bedding, and household items",
    "kitchen": "Kitchen appliances, cookware, utensils, and kitchen gadgets",
    "toys": "Toys, games, and children's entertainment items",
    "sports": "Sports equipment, fitness gear, and outdoor recreation items",
    "beauty": "Beauty products, makeup, skincare, and personal care items",
    "books": "Books, e-books, audiobooks, and reading materials"
}

# List of categories
categories = list(category_descriptions.keys())

# Try to load the recommended models
try:
    # 1. Load BART model for zero-shot classification
    from transformers import pipeline
    
    # Initialize the zero-shot classification pipeline
    classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
    print("Using facebook/bart-large-mnli for classification")
    
    # 2. Load MPNet model for semantic search
    from sentence_transformers import SentenceTransformer, util
    
    # Load the sentence transformer model
    sentence_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
    print("Using sentence-transformers/all-mpnet-base-v2 for semantic search")
    
    # Pre-compute embeddings for category descriptions
    category_texts = list(category_descriptions.values())
    category_embeddings = sentence_model.encode(category_texts, convert_to_tensor=True)
    
    # Using recommended models
    using_recommended_models = True
except Exception as e:
    # Fall back to local model if recommended models fail to load
    print(f"Error loading recommended models: {str(e)}")
    print("Falling back to local model")
    
    model_path = os.path.dirname(os.path.abspath(__file__))
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForSequenceClassification.from_pretrained(model_path)
    
    # Load the local categories
    try:
        with open(os.path.join(model_path, "categories.json"), "r") as f:
            categories = json.load(f)
    except Exception as e:
        print(f"Error loading categories: {str(e)}")
        categories = ["electronics", "clothing", "home", "kitchen", "toys", "other"]
    
    # Not using recommended models
    using_recommended_models = False

# File path for storing deals data locally
DEALS_DATA_PATH = "deals_data.json"

# Function to fetch and save a large number of deals
def fetch_and_save_deals(max_deals=10000, per_page=100):
    """
    Fetch a large number of deals and save them to a local file
    """
    print(f"Fetching up to {max_deals} deals...")
    
    all_deals = []
    num_pages = min(max_deals // per_page + (1 if max_deals % per_page > 0 else 0), 100)  # Limit to 100 pages max
    
    # Fetch from the DealsFinders API
    for page in range(1, num_pages + 1):
        try:
            # Add a user agent to avoid being blocked
            headers = {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'
            }
            response = requests.get(f"https://www.dealsfinders.com/wp-json/wp/v2/posts?page={page}&per_page={per_page}", headers=headers)
            
            if response.status_code == 200:
                deals = response.json()
                all_deals.extend(deals)
                print(f"Fetched page {page} with {len(deals)} deals from DealsFinders API")
                
                # If we get fewer deals than requested, we've reached the end
                if len(deals) < per_page:
                    print(f"Reached the end of available deals at page {page}")
                    break
                
                # If we've reached the maximum number of deals, stop
                if len(all_deals) >= max_deals:
                    all_deals = all_deals[:max_deals]  # Trim to max_deals
                    print(f"Reached the maximum number of deals ({max_deals})")
                    break
            else:
                print(f"Failed to fetch page {page} from DealsFinders API: {response.status_code}")
                break
        except Exception as e:
            print(f"Error fetching page {page} from DealsFinders API: {str(e)}")
            break
    
    # Process the deals
    processed_deals = process_deals_data(all_deals)
    
    # Save the deals to a local file
    try:
        with open(DEALS_DATA_PATH, "w") as f:
            json.dump(processed_deals, f)
        print(f"Saved {len(processed_deals)} deals to {DEALS_DATA_PATH}")
        return processed_deals
    except Exception as e:
        print(f"Error saving deals to file: {str(e)}")
        return processed_deals

# Function to load deals from the local file
def load_deals_from_file():
    """
    Load deals from the local file
    """
    try:
        if os.path.exists(DEALS_DATA_PATH):
            with open(DEALS_DATA_PATH, "r") as f:
                deals = json.load(f)
            print(f"Loaded {len(deals)} deals from {DEALS_DATA_PATH}")
            return deals
        else:
            print(f"Deals file {DEALS_DATA_PATH} does not exist")
            return None
    except Exception as e:
        print(f"Error loading deals from file: {str(e)}")
        return None

# Global variable to store deals data
deals_cache = None

# Load deals from file on startup
try:
    # Try to load from file
    deals_cache = load_deals_from_file()
    
    # If file doesn't exist or is empty, use sample data
    if deals_cache is None or len(deals_cache) == 0:
        print("No deals found in local file. Using sample data...")
        deals_cache = process_deals_data(SAMPLE_DEALS)
        
    print(f"Initialized with {len(deals_cache) if deals_cache else 0} deals")
except Exception as e:
    print(f"Error initializing deals cache: {str(e)}")
    # Fall back to sample data
    deals_cache = process_deals_data(SAMPLE_DEALS)
    print(f"Initialized with {len(deals_cache)} sample deals")

def classify_text(text, fetch_deals=True):
    """
    Classify the text using the model and fetch relevant deals
    """
    global deals_cache
    
    # Get the top categories based on the model type
    if using_recommended_models:
        # Using BART for zero-shot classification
        result = classifier(text, categories, multi_label=True)
        
        # Extract categories and scores
        top_categories = []
        for i, (category, score) in enumerate(zip(result['labels'], result['scores'])):
            if score > 0.1:  # Lower threshold for zero-shot classification
                top_categories.append((category, score))
            
            # Limit to top 3 categories
            if i >= 2:
                break
    else:
        # Using the original classification model
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
        
        # Get the model prediction
        with torch.no_grad():
            outputs = model(**inputs)
            predictions = torch.sigmoid(outputs.logits)
        
        # Get the top categories
        top_categories = []
        for i, score in enumerate(predictions[0]):
            if score > 0.5:  # Threshold for multi-label classification
                top_categories.append((categories[i], score.item()))
        
        # Sort by score
        top_categories.sort(key=lambda x: x[1], reverse=True)
    
    # Format the classification results
    if top_categories:
        result = f"Top categories for '{text}':\n\n"
        for category, score in top_categories:
            result += f"- {category}: {score:.4f}\n"
        
        result += f"\nBased on your query, I would recommend looking for deals in the **{top_categories[0][0]}** category.\n\n"
    else:
        result = f"No categories found for '{text}'. Please try a different query.\n\n"
    
    # Fetch and display deals if requested
    if fetch_deals:
        result += "## Relevant Deals from DealsFinders.com\n\n"
        
        try:
            # Fetch deals data if not already cached
            if deals_cache is None:
                # Use sample data by default in Hugging Face space environment
                deals_data = fetch_deals_data(num_pages=2, use_sample_data=True)  # Use sample data for reliability
                deals_cache = process_deals_data(deals_data)
            
            # Using MPNet for semantic search if available
            if using_recommended_models:
                # Create deal texts for semantic search
                deal_texts = []
                for deal in deals_cache:
                    # Combine title and excerpt for better matching
                    deal_text = f"{deal['title']} {deal['excerpt']}"
                    deal_texts.append(deal_text)
                
                # Encode the query and deals
                query_embedding = sentence_model.encode(text, convert_to_tensor=True)
                deal_embeddings = sentence_model.encode(deal_texts, convert_to_tensor=True)
                
                # Calculate semantic similarity
                similarities = util.cos_sim(query_embedding, deal_embeddings)[0]
                
                # Get top 5 most similar deals
                top_indices = torch.topk(similarities, k=min(5, len(deals_cache))).indices
                
                # Extract the relevant deals
                relevant_deals = [deals_cache[idx] for idx in top_indices]
            else:
                # Improved keyword-based search with category awareness
                query_terms = text.lower().split()
                expanded_terms = list(query_terms)
                
                # Get the top category from the classification results
                top_category = top_categories[0][0] if top_categories else None
                
                # Add category-specific terms
                if top_category == "electronics":
                    expanded_terms.extend(['electronic', 'device', 'gadget', 'tech', 'technology'])
                    if any(term in text.lower() for term in ['headphone', 'headphones']):
                        expanded_terms.extend(['earbuds', 'earphones', 'earpods', 'airpods', 'audio', 'bluetooth', 'wireless'])
                    elif any(term in text.lower() for term in ['laptop', 'computer']):
                        expanded_terms.extend(['notebook', 'macbook', 'chromebook', 'pc'])
                    elif any(term in text.lower() for term in ['tv', 'television']):
                        expanded_terms.extend(['smart tv', 'roku', 'streaming'])
                elif top_category == "kitchen":
                    expanded_terms.extend(['appliance', 'cookware', 'utensil', 'blender', 'mixer', 'toaster', 'microwave', 'oven'])
                elif top_category == "home":
                    expanded_terms.extend(['furniture', 'decor', 'decoration', 'bedding', 'household'])
                elif top_category == "clothing":
                    expanded_terms.extend(['clothes', 'shirt', 'pants', 'dress', 'fashion', 'wear', 'apparel'])
                elif top_category == "toys":
                    expanded_terms.extend(['game', 'play', 'children', 'kid', 'kids', 'fun'])
                
                # Score deals based on relevance to the query
                scored_deals = []
                for deal in deals_cache:
                    title = deal['title'].lower()
                    content = deal['content'].lower()
                    excerpt = deal['excerpt'].lower()
                    
                    score = 0
                    
                    # Check original query terms (higher weight)
                    for term in query_terms:
                        if term in title:
                            score += 10
                        if term in content:
                            score += 3
                        if term in excerpt:
                            score += 3
                    
                    # Check expanded terms (lower weight)
                    for term in expanded_terms:
                        if term not in query_terms:  # Skip original terms
                            if term in title:
                                score += 5
                            if term in content:
                                score += 1
                            if term in excerpt:
                                score += 1
                    
                    # Boost score for deals matching the top category
                    if top_category:
                        if top_category.lower() in title.lower():
                            score += 15
                        if top_category.lower() in content.lower():
                            score += 5
                        if top_category.lower() in excerpt.lower():
                            score += 5
                    
                    # Add to scored deals if it has any relevance
                    if score > 0:
                        scored_deals.append((deal, score))
                
                # Sort by score (descending)
                scored_deals.sort(key=lambda x: x[1], reverse=True)
                
                # Extract the deals from the scored list
                relevant_deals = [deal for deal, _ in scored_deals[:5]]
            
            if relevant_deals:
                for i, deal in enumerate(relevant_deals, 1):
                    result += f"{i}. [{deal['title']}]({deal['link']})\n\n"
            else:
                result += "No specific deals found for your query. Try a different search term or browse the recommended category.\n\n"
        
        except Exception as e:
            result += f"Error fetching deals: {str(e)}\n\n"
    
    return result

# Create the Gradio interface
demo = gr.Interface(
    fn=classify_text,
    inputs=[
        gr.Textbox(
            lines=2, 
            placeholder="Enter your shopping query here...",
            label="Shopping Query"
        ),
        gr.Checkbox(
            label="Fetch Deals",
            value=True,
            info="Check to fetch and display deals from DealsFinders.com"
        )
    ],
    outputs=gr.Markdown(label="Results"),
    title="Shopping Assistant",
    description="""
    This demo shows how to use the Shopping Assistant model to classify shopping queries into categories and find relevant deals.
    Enter a shopping query below to see which categories it belongs to and find deals from DealsFinders.com.
    
    Examples:
    - "I'm looking for headphones"
    - "Do you have any kitchen appliance deals?"
    - "Show me the best laptop deals"
    - "I need a new smart TV"
    """,
    examples=[
        ["I'm looking for headphones", True],
        ["Do you have any kitchen appliance deals?", True],
        ["Show me the best laptop deals", True],
        ["I need a new smart TV", True],
        ["headphone deals", True]
    ],
    theme=gr.themes.Soft()
)

# Launch the app
if __name__ == "__main__":
    demo.launch()