Spaces:
Sleeping
Sleeping
Update product_recommender.py
Browse files- product_recommender.py +173 -43
product_recommender.py
CHANGED
@@ -1,22 +1,32 @@
|
|
1 |
from typing import Dict, List
|
2 |
-
import requests
|
3 |
-
from bs4 import BeautifulSoup
|
4 |
import aiohttp
|
5 |
import asyncio
|
6 |
-
import
|
|
|
7 |
from sentence_transformers import SentenceTransformer
|
8 |
import numpy as np
|
9 |
-
import re
|
10 |
|
11 |
class DynamicRecommender:
|
12 |
def __init__(self):
|
13 |
self.headers = {
|
14 |
-
'User-Agent':
|
|
|
|
|
|
|
|
|
15 |
}
|
|
|
16 |
self.model = SentenceTransformer('all-mpnet-base-v2')
|
17 |
-
|
|
|
|
|
|
|
18 |
async def search_amazon(self, query: str) -> List[Dict]:
|
19 |
-
"""
|
|
|
|
|
|
|
|
|
20 |
search_url = f"https://www.amazon.in/s?k={query}"
|
21 |
async with aiohttp.ClientSession() as session:
|
22 |
async with session.get(search_url, headers=self.headers) as response:
|
@@ -24,9 +34,46 @@ class DynamicRecommender:
|
|
24 |
html = await response.text()
|
25 |
return self._parse_amazon_results(html)
|
26 |
return []
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
async def search_flipkart(self, query: str) -> List[Dict]:
|
29 |
-
"""
|
|
|
|
|
|
|
30 |
search_url = f"https://www.flipkart.com/search?q={query}"
|
31 |
async with aiohttp.ClientSession() as session:
|
32 |
async with session.get(search_url, headers=self.headers) as response:
|
@@ -35,78 +82,161 @@ class DynamicRecommender:
|
|
35 |
return self._parse_flipkart_results(html)
|
36 |
return []
|
37 |
|
38 |
-
def
|
39 |
soup = BeautifulSoup(html, 'html.parser')
|
40 |
products = []
|
41 |
-
|
|
|
|
|
|
|
|
|
42 |
try:
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
products.append({
|
47 |
-
'name':
|
48 |
-
'price':
|
49 |
-
'source': '
|
50 |
-
'url': 'https://
|
|
|
51 |
})
|
52 |
except Exception:
|
53 |
continue
|
|
|
|
|
54 |
return products[:5]
|
55 |
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
soup = BeautifulSoup(html, 'html.parser')
|
58 |
products = []
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
60 |
try:
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
products.append({
|
65 |
-
'name':
|
66 |
-
'price':
|
67 |
-
'source': '
|
68 |
-
'url': 'https://
|
|
|
69 |
})
|
70 |
except Exception:
|
71 |
continue
|
|
|
|
|
72 |
return products[:5]
|
73 |
|
|
|
|
|
|
|
74 |
def _extract_keywords(self, text: str) -> List[str]:
|
75 |
-
"""
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
age = age_match.group(1) if age_match else None
|
78 |
|
79 |
interests = []
|
80 |
-
|
|
|
|
|
81 |
interests.extend(['programming books', 'tech gadgets'])
|
82 |
-
if 'books' in
|
83 |
interests.append('books')
|
84 |
-
if '
|
|
|
|
|
85 |
interests.extend(['self help books', 'business books'])
|
86 |
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
91 |
try:
|
|
|
92 |
keywords = self._extract_keywords(text)
|
|
|
|
|
93 |
all_products = []
|
94 |
-
|
95 |
for keyword in keywords:
|
96 |
amazon_products = await self.search_amazon(keyword)
|
97 |
flipkart_products = await self.search_flipkart(keyword)
|
98 |
-
|
99 |
-
|
100 |
-
|
|
|
|
|
|
|
|
|
101 |
seen = set()
|
102 |
unique_products = []
|
103 |
for product in all_products:
|
104 |
if product['name'] not in seen:
|
105 |
seen.add(product['name'])
|
106 |
unique_products.append(product)
|
107 |
-
|
108 |
-
|
109 |
-
|
|
|
|
|
|
|
|
|
|
|
110 |
except Exception as e:
|
111 |
print(f"Error in recommendations: {str(e)}")
|
112 |
-
return []
|
|
|
1 |
from typing import Dict, List
|
|
|
|
|
2 |
import aiohttp
|
3 |
import asyncio
|
4 |
+
import re
|
5 |
+
from bs4 import BeautifulSoup
|
6 |
from sentence_transformers import SentenceTransformer
|
7 |
import numpy as np
|
|
|
8 |
|
9 |
class DynamicRecommender:
|
10 |
def __init__(self):
|
11 |
self.headers = {
|
12 |
+
'User-Agent': (
|
13 |
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
|
14 |
+
'AppleWebKit/537.36 (KHTML, like Gecko) '
|
15 |
+
'Chrome/100.0.4896.75 Safari/537.36'
|
16 |
+
)
|
17 |
}
|
18 |
+
# Load your model if you need it for further logic
|
19 |
self.model = SentenceTransformer('all-mpnet-base-v2')
|
20 |
+
|
21 |
+
# ------------------------------------------------------------------
|
22 |
+
# Amazon search
|
23 |
+
# ------------------------------------------------------------------
|
24 |
async def search_amazon(self, query: str) -> List[Dict]:
|
25 |
+
"""
|
26 |
+
Search Amazon for products by building the search URL
|
27 |
+
and parsing the resulting HTML.
|
28 |
+
"""
|
29 |
+
print(f"Searching Amazon for: {query}")
|
30 |
search_url = f"https://www.amazon.in/s?k={query}"
|
31 |
async with aiohttp.ClientSession() as session:
|
32 |
async with session.get(search_url, headers=self.headers) as response:
|
|
|
34 |
html = await response.text()
|
35 |
return self._parse_amazon_results(html)
|
36 |
return []
|
37 |
+
|
38 |
+
def _parse_amazon_results(self, html: str) -> List[Dict]:
|
39 |
+
soup = BeautifulSoup(html, 'html.parser')
|
40 |
+
products = []
|
41 |
+
|
42 |
+
# These selectors may need updating if Amazon changes their HTML
|
43 |
+
search_items = soup.select('.s-result-item')
|
44 |
+
|
45 |
+
for item in search_items:
|
46 |
+
try:
|
47 |
+
name_elem = item.select_one('.a-text-normal')
|
48 |
+
price_elem = item.select_one('.a-price-whole')
|
49 |
+
link_elem = item.select_one('a.a-link-normal')
|
50 |
+
|
51 |
+
if name_elem and price_elem and link_elem:
|
52 |
+
product_name = name_elem.get_text(strip=True)
|
53 |
+
product_price = price_elem.get_text(strip=True)
|
54 |
+
product_url = link_elem.get('href')
|
55 |
+
|
56 |
+
products.append({
|
57 |
+
'name': product_name,
|
58 |
+
'price': product_price,
|
59 |
+
'source': 'Amazon',
|
60 |
+
'url': 'https://www.amazon.in' + product_url,
|
61 |
+
'description': 'Leadership/novel recommendation from Amazon'
|
62 |
+
})
|
63 |
+
except Exception:
|
64 |
+
continue
|
65 |
+
|
66 |
+
print(f"Found {len(products)} Amazon products.")
|
67 |
+
return products[:5]
|
68 |
+
|
69 |
+
# ------------------------------------------------------------------
|
70 |
+
# Flipkart search
|
71 |
+
# ------------------------------------------------------------------
|
72 |
async def search_flipkart(self, query: str) -> List[Dict]:
|
73 |
+
"""
|
74 |
+
Search Flipkart for products.
|
75 |
+
"""
|
76 |
+
print(f"Searching Flipkart for: {query}")
|
77 |
search_url = f"https://www.flipkart.com/search?q={query}"
|
78 |
async with aiohttp.ClientSession() as session:
|
79 |
async with session.get(search_url, headers=self.headers) as response:
|
|
|
82 |
return self._parse_flipkart_results(html)
|
83 |
return []
|
84 |
|
85 |
+
def _parse_flipkart_results(self, html: str) -> List[Dict]:
|
86 |
soup = BeautifulSoup(html, 'html.parser')
|
87 |
products = []
|
88 |
+
|
89 |
+
# These selectors may need updating if Flipkart changes their HTML
|
90 |
+
item_cards = soup.select('._1AtVbE')
|
91 |
+
|
92 |
+
for item in item_cards:
|
93 |
try:
|
94 |
+
name_elem = item.select_one('._4rR01T')
|
95 |
+
price_elem = item.select_one('._30jeq3')
|
96 |
+
link_elem = item.select_one('a')
|
97 |
+
|
98 |
+
if name_elem and price_elem and link_elem:
|
99 |
+
product_name = name_elem.get_text(strip=True)
|
100 |
+
product_price = price_elem.get_text(strip=True)
|
101 |
+
product_url = link_elem.get('href')
|
102 |
+
|
103 |
products.append({
|
104 |
+
'name': product_name,
|
105 |
+
'price': product_price,
|
106 |
+
'source': 'Flipkart',
|
107 |
+
'url': 'https://www.flipkart.com' + product_url,
|
108 |
+
'description': 'Leadership/novel recommendation from Flipkart'
|
109 |
})
|
110 |
except Exception:
|
111 |
continue
|
112 |
+
|
113 |
+
print(f"Found {len(products)} Flipkart products.")
|
114 |
return products[:5]
|
115 |
|
116 |
+
# ------------------------------------------------------------------
|
117 |
+
# IGP search (example approach; may need updating)
|
118 |
+
# ------------------------------------------------------------------
|
119 |
+
async def search_igp(self, query: str) -> List[Dict]:
|
120 |
+
"""
|
121 |
+
Search IGP for products (gift store).
|
122 |
+
Adjust the selectors or approach as needed.
|
123 |
+
"""
|
124 |
+
print(f"Searching IGP for: {query}")
|
125 |
+
search_url = f"https://www.igp.com/search/{query}"
|
126 |
+
async with aiohttp.ClientSession() as session:
|
127 |
+
async with session.get(search_url, headers=self.headers) as response:
|
128 |
+
if response.status == 200:
|
129 |
+
html = await response.text()
|
130 |
+
return self._parse_igp_results(html)
|
131 |
+
return []
|
132 |
+
|
133 |
+
def _parse_igp_results(self, html: str) -> List[Dict]:
|
134 |
soup = BeautifulSoup(html, 'html.parser')
|
135 |
products = []
|
136 |
+
|
137 |
+
# You must figure out correct selectors for IGP
|
138 |
+
# This is just an *example*; may not match actual IGP HTML
|
139 |
+
item_cards = soup.select('.product-item')
|
140 |
+
|
141 |
+
for item in item_cards:
|
142 |
try:
|
143 |
+
name_elem = item.select_one('.product-title')
|
144 |
+
price_elem = item.select_one('.product-price')
|
145 |
+
link_elem = item.select_one('a')
|
146 |
+
|
147 |
+
if name_elem and price_elem and link_elem:
|
148 |
+
product_name = name_elem.get_text(strip=True)
|
149 |
+
product_price = price_elem.get_text(strip=True)
|
150 |
+
product_url = link_elem.get('href')
|
151 |
+
|
152 |
products.append({
|
153 |
+
'name': product_name,
|
154 |
+
'price': product_price,
|
155 |
+
'source': 'IGP',
|
156 |
+
'url': 'https://www.igp.com' + product_url,
|
157 |
+
'description': 'Gift idea from IGP'
|
158 |
})
|
159 |
except Exception:
|
160 |
continue
|
161 |
+
|
162 |
+
print(f"Found {len(products)} IGP products.")
|
163 |
return products[:5]
|
164 |
|
165 |
+
# ------------------------------------------------------------------
|
166 |
+
# Extract keywords / fallback
|
167 |
+
# ------------------------------------------------------------------
|
168 |
def _extract_keywords(self, text: str) -> List[str]:
|
169 |
+
"""
|
170 |
+
Extract relevant search keywords from input text.
|
171 |
+
You can expand these rules or use the entire text as fallback.
|
172 |
+
"""
|
173 |
+
text_lower = text.lower()
|
174 |
+
|
175 |
+
# Try to find age
|
176 |
+
age_match = re.search(r'age\s+(\d+)', text_lower)
|
177 |
age = age_match.group(1) if age_match else None
|
178 |
|
179 |
interests = []
|
180 |
+
|
181 |
+
# Some sample rules
|
182 |
+
if 'software' in text_lower or 'engineer' in text_lower:
|
183 |
interests.extend(['programming books', 'tech gadgets'])
|
184 |
+
if 'books' in text_lower:
|
185 |
interests.append('books')
|
186 |
+
if 'novel' in text_lower or 'leader' in text_lower or 'leadership' in text_lower:
|
187 |
+
interests.append('leadership novels')
|
188 |
+
if 'successful' in text_lower:
|
189 |
interests.extend(['self help books', 'business books'])
|
190 |
|
191 |
+
# If we found no interests at all, fallback to using the entire text
|
192 |
+
if not interests:
|
193 |
+
interests.append(text)
|
194 |
+
|
195 |
+
# Optionally add "for 25 year old" context if age is found
|
196 |
+
if age:
|
197 |
+
# You can decide how exactly you want to incorporate age
|
198 |
+
interests = [f"{interest} for {age} year old" for interest in interests]
|
199 |
+
|
200 |
+
print("Extracted keywords:", interests)
|
201 |
+
return interests
|
202 |
|
203 |
+
# ------------------------------------------------------------------
|
204 |
+
# Main recommendations
|
205 |
+
# ------------------------------------------------------------------
|
206 |
+
async def get_recommendations(self, text: str) -> List[Dict]:
|
207 |
+
"""
|
208 |
+
Get personalized recommendations from Amazon, Flipkart, and IGP.
|
209 |
+
"""
|
210 |
try:
|
211 |
+
# Step 1: Extract keywords from user input
|
212 |
keywords = self._extract_keywords(text)
|
213 |
+
|
214 |
+
# Step 2: Search across multiple sources
|
215 |
all_products = []
|
|
|
216 |
for keyword in keywords:
|
217 |
amazon_products = await self.search_amazon(keyword)
|
218 |
flipkart_products = await self.search_flipkart(keyword)
|
219 |
+
igp_products = await self.search_igp(keyword) # new
|
220 |
+
|
221 |
+
all_products.extend(amazon_products)
|
222 |
+
all_products.extend(flipkart_products)
|
223 |
+
all_products.extend(igp_products)
|
224 |
+
|
225 |
+
# Step 3: De-duplicate by product name
|
226 |
seen = set()
|
227 |
unique_products = []
|
228 |
for product in all_products:
|
229 |
if product['name'] not in seen:
|
230 |
seen.add(product['name'])
|
231 |
unique_products.append(product)
|
232 |
+
|
233 |
+
# Step 4: Optionally, sort by "relevance" if desired
|
234 |
+
# For now, we just slice the first five
|
235 |
+
final_results = unique_products[:5]
|
236 |
+
|
237 |
+
print(f"Returning {len(final_results)} products.")
|
238 |
+
return final_results
|
239 |
+
|
240 |
except Exception as e:
|
241 |
print(f"Error in recommendations: {str(e)}")
|
242 |
+
return []
|