noddysnots commited on
Commit
0f58622
Β·
verified Β·
1 Parent(s): f2c6b92

Update product_recommender.py

Browse files
Files changed (1) hide show
  1. product_recommender.py +173 -43
product_recommender.py CHANGED
@@ -1,22 +1,32 @@
1
  from typing import Dict, List
2
- import requests
3
- from bs4 import BeautifulSoup
4
  import aiohttp
5
  import asyncio
6
- import json
 
7
  from sentence_transformers import SentenceTransformer
8
  import numpy as np
9
- import re
10
 
11
  class DynamicRecommender:
12
  def __init__(self):
13
  self.headers = {
14
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
 
 
 
 
15
  }
 
16
  self.model = SentenceTransformer('all-mpnet-base-v2')
17
-
 
 
 
18
  async def search_amazon(self, query: str) -> List[Dict]:
19
- """Search Amazon for products"""
 
 
 
 
20
  search_url = f"https://www.amazon.in/s?k={query}"
21
  async with aiohttp.ClientSession() as session:
22
  async with session.get(search_url, headers=self.headers) as response:
@@ -24,9 +34,46 @@ class DynamicRecommender:
24
  html = await response.text()
25
  return self._parse_amazon_results(html)
26
  return []
27
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  async def search_flipkart(self, query: str) -> List[Dict]:
29
- """Search Flipkart for products"""
 
 
 
30
  search_url = f"https://www.flipkart.com/search?q={query}"
31
  async with aiohttp.ClientSession() as session:
32
  async with session.get(search_url, headers=self.headers) as response:
@@ -35,78 +82,161 @@ class DynamicRecommender:
35
  return self._parse_flipkart_results(html)
36
  return []
37
 
38
- def _parse_amazon_results(self, html: str) -> List[Dict]:
39
  soup = BeautifulSoup(html, 'html.parser')
40
  products = []
41
- for item in soup.select('.s-result-item'):
 
 
 
 
42
  try:
43
- name = item.select_one('.a-text-normal')
44
- price = item.select_one('.a-price-whole')
45
- if name and price:
 
 
 
 
 
 
46
  products.append({
47
- 'name': name.text.strip(),
48
- 'price': price.text.strip(),
49
- 'source': 'Amazon',
50
- 'url': 'https://amazon.in' + item.select_one('a')['href']
 
51
  })
52
  except Exception:
53
  continue
 
 
54
  return products[:5]
55
 
56
- def _parse_flipkart_results(self, html: str) -> List[Dict]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  soup = BeautifulSoup(html, 'html.parser')
58
  products = []
59
- for item in soup.select('._1AtVbE'):
 
 
 
 
 
60
  try:
61
- name = item.select_one('._4rR01T')
62
- price = item.select_one('._30jeq3')
63
- if name and price:
 
 
 
 
 
 
64
  products.append({
65
- 'name': name.text.strip(),
66
- 'price': price.text.strip(),
67
- 'source': 'Flipkart',
68
- 'url': 'https://flipkart.com' + item.select_one('a')['href']
 
69
  })
70
  except Exception:
71
  continue
 
 
72
  return products[:5]
73
 
 
 
 
74
  def _extract_keywords(self, text: str) -> List[str]:
75
- """Extract relevant search keywords from input"""
76
- age_match = re.search(r'age\s+(\d+)', text.lower())
 
 
 
 
 
 
77
  age = age_match.group(1) if age_match else None
78
 
79
  interests = []
80
- if 'software' in text.lower() or 'engineer' in text.lower():
 
 
81
  interests.extend(['programming books', 'tech gadgets'])
82
- if 'books' in text.lower():
83
  interests.append('books')
84
- if 'successful' in text.lower():
 
 
85
  interests.extend(['self help books', 'business books'])
86
 
87
- return [f"{interest} for {age} year old" if age else interest for interest in interests]
 
 
 
 
 
 
 
 
 
 
88
 
89
- async def get_recommendations(self, text: str) -> Dict:
90
- """Get personalized recommendations"""
 
 
 
 
 
91
  try:
 
92
  keywords = self._extract_keywords(text)
 
 
93
  all_products = []
94
-
95
  for keyword in keywords:
96
  amazon_products = await self.search_amazon(keyword)
97
  flipkart_products = await self.search_flipkart(keyword)
98
- all_products.extend(amazon_products + flipkart_products)
99
-
100
- # Remove duplicates and sort by relevance
 
 
 
 
101
  seen = set()
102
  unique_products = []
103
  for product in all_products:
104
  if product['name'] not in seen:
105
  seen.add(product['name'])
106
  unique_products.append(product)
107
-
108
- return unique_products[:5]
109
-
 
 
 
 
 
110
  except Exception as e:
111
  print(f"Error in recommendations: {str(e)}")
112
- return []
 
1
  from typing import Dict, List
 
 
2
  import aiohttp
3
  import asyncio
4
+ import re
5
+ from bs4 import BeautifulSoup
6
  from sentence_transformers import SentenceTransformer
7
  import numpy as np
 
8
 
9
  class DynamicRecommender:
10
  def __init__(self):
11
  self.headers = {
12
+ 'User-Agent': (
13
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
14
+ 'AppleWebKit/537.36 (KHTML, like Gecko) '
15
+ 'Chrome/100.0.4896.75 Safari/537.36'
16
+ )
17
  }
18
+ # Load your model if you need it for further logic
19
  self.model = SentenceTransformer('all-mpnet-base-v2')
20
+
21
+ # ------------------------------------------------------------------
22
+ # Amazon search
23
+ # ------------------------------------------------------------------
24
  async def search_amazon(self, query: str) -> List[Dict]:
25
+ """
26
+ Search Amazon for products by building the search URL
27
+ and parsing the resulting HTML.
28
+ """
29
+ print(f"Searching Amazon for: {query}")
30
  search_url = f"https://www.amazon.in/s?k={query}"
31
  async with aiohttp.ClientSession() as session:
32
  async with session.get(search_url, headers=self.headers) as response:
 
34
  html = await response.text()
35
  return self._parse_amazon_results(html)
36
  return []
37
+
38
+ def _parse_amazon_results(self, html: str) -> List[Dict]:
39
+ soup = BeautifulSoup(html, 'html.parser')
40
+ products = []
41
+
42
+ # These selectors may need updating if Amazon changes their HTML
43
+ search_items = soup.select('.s-result-item')
44
+
45
+ for item in search_items:
46
+ try:
47
+ name_elem = item.select_one('.a-text-normal')
48
+ price_elem = item.select_one('.a-price-whole')
49
+ link_elem = item.select_one('a.a-link-normal')
50
+
51
+ if name_elem and price_elem and link_elem:
52
+ product_name = name_elem.get_text(strip=True)
53
+ product_price = price_elem.get_text(strip=True)
54
+ product_url = link_elem.get('href')
55
+
56
+ products.append({
57
+ 'name': product_name,
58
+ 'price': product_price,
59
+ 'source': 'Amazon',
60
+ 'url': 'https://www.amazon.in' + product_url,
61
+ 'description': 'Leadership/novel recommendation from Amazon'
62
+ })
63
+ except Exception:
64
+ continue
65
+
66
+ print(f"Found {len(products)} Amazon products.")
67
+ return products[:5]
68
+
69
+ # ------------------------------------------------------------------
70
+ # Flipkart search
71
+ # ------------------------------------------------------------------
72
  async def search_flipkart(self, query: str) -> List[Dict]:
73
+ """
74
+ Search Flipkart for products.
75
+ """
76
+ print(f"Searching Flipkart for: {query}")
77
  search_url = f"https://www.flipkart.com/search?q={query}"
78
  async with aiohttp.ClientSession() as session:
79
  async with session.get(search_url, headers=self.headers) as response:
 
82
  return self._parse_flipkart_results(html)
83
  return []
84
 
85
+ def _parse_flipkart_results(self, html: str) -> List[Dict]:
86
  soup = BeautifulSoup(html, 'html.parser')
87
  products = []
88
+
89
+ # These selectors may need updating if Flipkart changes their HTML
90
+ item_cards = soup.select('._1AtVbE')
91
+
92
+ for item in item_cards:
93
  try:
94
+ name_elem = item.select_one('._4rR01T')
95
+ price_elem = item.select_one('._30jeq3')
96
+ link_elem = item.select_one('a')
97
+
98
+ if name_elem and price_elem and link_elem:
99
+ product_name = name_elem.get_text(strip=True)
100
+ product_price = price_elem.get_text(strip=True)
101
+ product_url = link_elem.get('href')
102
+
103
  products.append({
104
+ 'name': product_name,
105
+ 'price': product_price,
106
+ 'source': 'Flipkart',
107
+ 'url': 'https://www.flipkart.com' + product_url,
108
+ 'description': 'Leadership/novel recommendation from Flipkart'
109
  })
110
  except Exception:
111
  continue
112
+
113
+ print(f"Found {len(products)} Flipkart products.")
114
  return products[:5]
115
 
116
+ # ------------------------------------------------------------------
117
+ # IGP search (example approach; may need updating)
118
+ # ------------------------------------------------------------------
119
+ async def search_igp(self, query: str) -> List[Dict]:
120
+ """
121
+ Search IGP for products (gift store).
122
+ Adjust the selectors or approach as needed.
123
+ """
124
+ print(f"Searching IGP for: {query}")
125
+ search_url = f"https://www.igp.com/search/{query}"
126
+ async with aiohttp.ClientSession() as session:
127
+ async with session.get(search_url, headers=self.headers) as response:
128
+ if response.status == 200:
129
+ html = await response.text()
130
+ return self._parse_igp_results(html)
131
+ return []
132
+
133
+ def _parse_igp_results(self, html: str) -> List[Dict]:
134
  soup = BeautifulSoup(html, 'html.parser')
135
  products = []
136
+
137
+ # You must figure out correct selectors for IGP
138
+ # This is just an *example*; may not match actual IGP HTML
139
+ item_cards = soup.select('.product-item')
140
+
141
+ for item in item_cards:
142
  try:
143
+ name_elem = item.select_one('.product-title')
144
+ price_elem = item.select_one('.product-price')
145
+ link_elem = item.select_one('a')
146
+
147
+ if name_elem and price_elem and link_elem:
148
+ product_name = name_elem.get_text(strip=True)
149
+ product_price = price_elem.get_text(strip=True)
150
+ product_url = link_elem.get('href')
151
+
152
  products.append({
153
+ 'name': product_name,
154
+ 'price': product_price,
155
+ 'source': 'IGP',
156
+ 'url': 'https://www.igp.com' + product_url,
157
+ 'description': 'Gift idea from IGP'
158
  })
159
  except Exception:
160
  continue
161
+
162
+ print(f"Found {len(products)} IGP products.")
163
  return products[:5]
164
 
165
+ # ------------------------------------------------------------------
166
+ # Extract keywords / fallback
167
+ # ------------------------------------------------------------------
168
  def _extract_keywords(self, text: str) -> List[str]:
169
+ """
170
+ Extract relevant search keywords from input text.
171
+ You can expand these rules or use the entire text as fallback.
172
+ """
173
+ text_lower = text.lower()
174
+
175
+ # Try to find age
176
+ age_match = re.search(r'age\s+(\d+)', text_lower)
177
  age = age_match.group(1) if age_match else None
178
 
179
  interests = []
180
+
181
+ # Some sample rules
182
+ if 'software' in text_lower or 'engineer' in text_lower:
183
  interests.extend(['programming books', 'tech gadgets'])
184
+ if 'books' in text_lower:
185
  interests.append('books')
186
+ if 'novel' in text_lower or 'leader' in text_lower or 'leadership' in text_lower:
187
+ interests.append('leadership novels')
188
+ if 'successful' in text_lower:
189
  interests.extend(['self help books', 'business books'])
190
 
191
+ # If we found no interests at all, fallback to using the entire text
192
+ if not interests:
193
+ interests.append(text)
194
+
195
+ # Optionally add "for 25 year old" context if age is found
196
+ if age:
197
+ # You can decide how exactly you want to incorporate age
198
+ interests = [f"{interest} for {age} year old" for interest in interests]
199
+
200
+ print("Extracted keywords:", interests)
201
+ return interests
202
 
203
+ # ------------------------------------------------------------------
204
+ # Main recommendations
205
+ # ------------------------------------------------------------------
206
+ async def get_recommendations(self, text: str) -> List[Dict]:
207
+ """
208
+ Get personalized recommendations from Amazon, Flipkart, and IGP.
209
+ """
210
  try:
211
+ # Step 1: Extract keywords from user input
212
  keywords = self._extract_keywords(text)
213
+
214
+ # Step 2: Search across multiple sources
215
  all_products = []
 
216
  for keyword in keywords:
217
  amazon_products = await self.search_amazon(keyword)
218
  flipkart_products = await self.search_flipkart(keyword)
219
+ igp_products = await self.search_igp(keyword) # new
220
+
221
+ all_products.extend(amazon_products)
222
+ all_products.extend(flipkart_products)
223
+ all_products.extend(igp_products)
224
+
225
+ # Step 3: De-duplicate by product name
226
  seen = set()
227
  unique_products = []
228
  for product in all_products:
229
  if product['name'] not in seen:
230
  seen.add(product['name'])
231
  unique_products.append(product)
232
+
233
+ # Step 4: Optionally, sort by "relevance" if desired
234
+ # For now, we just slice the first five
235
+ final_results = unique_products[:5]
236
+
237
+ print(f"Returning {len(final_results)} products.")
238
+ return final_results
239
+
240
  except Exception as e:
241
  print(f"Error in recommendations: {str(e)}")
242
+ return []