Spaces:

noddysnots
/

Gift-Recommender

Running

App Files Files Community

noddysnots commited on Jan 31

Commit

0f58622

verified ·

1 Parent(s): f2c6b92

Update product_recommender.py

Browse files

Files changed (1) hide show

product_recommender.py +173 -43

product_recommender.py CHANGED Viewed

@@ -1,22 +1,32 @@
 from typing import Dict, List
-import requests
-from bs4 import BeautifulSoup
 import aiohttp
 import asyncio
-import json
 from sentence_transformers import SentenceTransformer
 import numpy as np
-import re
 class DynamicRecommender:
     def __init__(self):
         self.headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
         }
         self.model = SentenceTransformer('all-mpnet-base-v2')
     async def search_amazon(self, query: str) -> List[Dict]:
-        """Search Amazon for products"""
         search_url = f"https://www.amazon.in/s?k={query}"
         async with aiohttp.ClientSession() as session:
             async with session.get(search_url, headers=self.headers) as response:
@@ -24,9 +34,46 @@ class DynamicRecommender:
                     html = await response.text()
                     return self._parse_amazon_results(html)
         return []
     async def search_flipkart(self, query: str) -> List[Dict]:
-        """Search Flipkart for products"""
         search_url = f"https://www.flipkart.com/search?q={query}"
         async with aiohttp.ClientSession() as session:
             async with session.get(search_url, headers=self.headers) as response:
@@ -35,78 +82,161 @@ class DynamicRecommender:
                     return self._parse_flipkart_results(html)
         return []
-    def _parse_amazon_results(self, html: str) -> List[Dict]:
         soup = BeautifulSoup(html, 'html.parser')
         products = []
-        for item in soup.select('.s-result-item'):
             try:
-                name = item.select_one('.a-text-normal')
-                price = item.select_one('.a-price-whole')
-                if name and price:
                     products.append({
-                        'name': name.text.strip(),
-                        'price': price.text.strip(),
-                        'source': 'Amazon',
-                        'url': 'https://amazon.in' + item.select_one('a')['href']
                     })
             except Exception:
                 continue
         return products[:5]
-    def _parse_flipkart_results(self, html: str) -> List[Dict]:
         soup = BeautifulSoup(html, 'html.parser')
         products = []
-        for item in soup.select('._1AtVbE'):
             try:
-                name = item.select_one('._4rR01T')
-                price = item.select_one('._30jeq3')
-                if name and price:
                     products.append({
-                        'name': name.text.strip(),
-                        'price': price.text.strip(),
-                        'source': 'Flipkart',
-                        'url': 'https://flipkart.com' + item.select_one('a')['href']
                     })
             except Exception:
                 continue
         return products[:5]
     def _extract_keywords(self, text: str) -> List[str]:
-        """Extract relevant search keywords from input"""
-        age_match = re.search(r'age\s+(\d+)', text.lower())
         age = age_match.group(1) if age_match else None
         interests = []
-        if 'software' in text.lower() or 'engineer' in text.lower():
             interests.extend(['programming books', 'tech gadgets'])
-        if 'books' in text.lower():
             interests.append('books')
-        if 'successful' in text.lower():
             interests.extend(['self help books', 'business books'])
-        return [f"{interest} for {age} year old" if age else interest for interest in interests]
-    async def get_recommendations(self, text: str) -> Dict:
-        """Get personalized recommendations"""
         try:
             keywords = self._extract_keywords(text)
             all_products = []
             for keyword in keywords:
                 amazon_products = await self.search_amazon(keyword)
                 flipkart_products = await self.search_flipkart(keyword)
-                all_products.extend(amazon_products + flipkart_products)
-            # Remove duplicates and sort by relevance
             seen = set()
             unique_products = []
             for product in all_products:
                 if product['name'] not in seen:
                     seen.add(product['name'])
                     unique_products.append(product)
-            return unique_products[:5]
         except Exception as e:
             print(f"Error in recommendations: {str(e)}")
-            return []

 from typing import Dict, List
 import aiohttp
 import asyncio
+import re
+from bs4 import BeautifulSoup
 from sentence_transformers import SentenceTransformer
 import numpy as np
 class DynamicRecommender:
     def __init__(self):
         self.headers = {
+            'User-Agent': (
+                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
+                'AppleWebKit/537.36 (KHTML, like Gecko) '
+                'Chrome/100.0.4896.75 Safari/537.36'
+            )
         }
+        # Load your model if you need it for further logic
         self.model = SentenceTransformer('all-mpnet-base-v2')
+    # ------------------------------------------------------------------
+    # Amazon search
+    # ------------------------------------------------------------------
     async def search_amazon(self, query: str) -> List[Dict]:
+        """
+        Search Amazon for products by building the search URL
+        and parsing the resulting HTML.
+        """
+        print(f"Searching Amazon for: {query}")
         search_url = f"https://www.amazon.in/s?k={query}"
         async with aiohttp.ClientSession() as session:
             async with session.get(search_url, headers=self.headers) as response:
                     html = await response.text()
                     return self._parse_amazon_results(html)
         return []
+    def _parse_amazon_results(self, html: str) -> List[Dict]:
+        soup = BeautifulSoup(html, 'html.parser')
+        products = []
+        # These selectors may need updating if Amazon changes their HTML
+        search_items = soup.select('.s-result-item')
+        for item in search_items:
+            try:
+                name_elem = item.select_one('.a-text-normal')
+                price_elem = item.select_one('.a-price-whole')
+                link_elem = item.select_one('a.a-link-normal')
+                if name_elem and price_elem and link_elem:
+                    product_name = name_elem.get_text(strip=True)
+                    product_price = price_elem.get_text(strip=True)
+                    product_url = link_elem.get('href')
+                    products.append({
+                        'name': product_name,
+                        'price': product_price,
+                        'source': 'Amazon',
+                        'url': 'https://www.amazon.in' + product_url,
+                        'description': 'Leadership/novel recommendation from Amazon'
+                    })
+            except Exception:
+                continue
+        print(f"Found {len(products)} Amazon products.")
+        return products[:5]
+    # ------------------------------------------------------------------
+    # Flipkart search
+    # ------------------------------------------------------------------
     async def search_flipkart(self, query: str) -> List[Dict]:
+        """
+        Search Flipkart for products.
+        """
+        print(f"Searching Flipkart for: {query}")
         search_url = f"https://www.flipkart.com/search?q={query}"
         async with aiohttp.ClientSession() as session:
             async with session.get(search_url, headers=self.headers) as response:
                     return self._parse_flipkart_results(html)
         return []
+    def _parse_flipkart_results(self, html: str) -> List[Dict]:
         soup = BeautifulSoup(html, 'html.parser')
         products = []
+        # These selectors may need updating if Flipkart changes their HTML
+        item_cards = soup.select('._1AtVbE')
+        for item in item_cards:
             try:
+                name_elem = item.select_one('._4rR01T')
+                price_elem = item.select_one('._30jeq3')
+                link_elem = item.select_one('a')
+                if name_elem and price_elem and link_elem:
+                    product_name = name_elem.get_text(strip=True)
+                    product_price = price_elem.get_text(strip=True)
+                    product_url = link_elem.get('href')
                     products.append({
+                        'name': product_name,
+                        'price': product_price,
+                        'source': 'Flipkart',
+                        'url': 'https://www.flipkart.com' + product_url,
+                        'description': 'Leadership/novel recommendation from Flipkart'
                     })
             except Exception:
                 continue
+        print(f"Found {len(products)} Flipkart products.")
         return products[:5]
+    # ------------------------------------------------------------------
+    # IGP search (example approach; may need updating)
+    # ------------------------------------------------------------------
+    async def search_igp(self, query: str) -> List[Dict]:
+        """
+        Search IGP for products (gift store).
+        Adjust the selectors or approach as needed.
+        """
+        print(f"Searching IGP for: {query}")
+        search_url = f"https://www.igp.com/search/{query}"
+        async with aiohttp.ClientSession() as session:
+            async with session.get(search_url, headers=self.headers) as response:
+                if response.status == 200:
+                    html = await response.text()
+                    return self._parse_igp_results(html)
+        return []
+    def _parse_igp_results(self, html: str) -> List[Dict]:
         soup = BeautifulSoup(html, 'html.parser')
         products = []
+        # You must figure out correct selectors for IGP
+        # This is just an *example*; may not match actual IGP HTML
+        item_cards = soup.select('.product-item')
+        for item in item_cards:
             try:
+                name_elem = item.select_one('.product-title')
+                price_elem = item.select_one('.product-price')
+                link_elem = item.select_one('a')
+                if name_elem and price_elem and link_elem:
+                    product_name = name_elem.get_text(strip=True)
+                    product_price = price_elem.get_text(strip=True)
+                    product_url = link_elem.get('href')
                     products.append({
+                        'name': product_name,
+                        'price': product_price,
+                        'source': 'IGP',
+                        'url': 'https://www.igp.com' + product_url,
+                        'description': 'Gift idea from IGP'
                     })
             except Exception:
                 continue
+        print(f"Found {len(products)} IGP products.")
         return products[:5]
+    # ------------------------------------------------------------------
+    # Extract keywords / fallback
+    # ------------------------------------------------------------------
     def _extract_keywords(self, text: str) -> List[str]:
+        """
+        Extract relevant search keywords from input text.
+        You can expand these rules or use the entire text as fallback.
+        """
+        text_lower = text.lower()
+        # Try to find age
+        age_match = re.search(r'age\s+(\d+)', text_lower)
         age = age_match.group(1) if age_match else None
         interests = []
+        # Some sample rules
+        if 'software' in text_lower or 'engineer' in text_lower:
             interests.extend(['programming books', 'tech gadgets'])
+        if 'books' in text_lower:
             interests.append('books')
+        if 'novel' in text_lower or 'leader' in text_lower or 'leadership' in text_lower:
+            interests.append('leadership novels')
+        if 'successful' in text_lower:
             interests.extend(['self help books', 'business books'])
+        # If we found no interests at all, fallback to using the entire text
+        if not interests:
+            interests.append(text)
+        # Optionally add "for 25 year old" context if age is found
+        if age:
+            # You can decide how exactly you want to incorporate age
+            interests = [f"{interest} for {age} year old" for interest in interests]
+        print("Extracted keywords:", interests)
+        return interests
+    # ------------------------------------------------------------------
+    # Main recommendations
+    # ------------------------------------------------------------------
+    async def get_recommendations(self, text: str) -> List[Dict]:
+        """
+        Get personalized recommendations from Amazon, Flipkart, and IGP.
+        """
         try:
+            # Step 1: Extract keywords from user input
             keywords = self._extract_keywords(text)
+            # Step 2: Search across multiple sources
             all_products = []
             for keyword in keywords:
                 amazon_products = await self.search_amazon(keyword)
                 flipkart_products = await self.search_flipkart(keyword)
+                igp_products = await self.search_igp(keyword)  # new
+                all_products.extend(amazon_products)
+                all_products.extend(flipkart_products)
+                all_products.extend(igp_products)
+            # Step 3: De-duplicate by product name
             seen = set()
             unique_products = []
             for product in all_products:
                 if product['name'] not in seen:
                     seen.add(product['name'])
                     unique_products.append(product)
+            # Step 4: Optionally, sort by "relevance" if desired
+            # For now, we just slice the first five
+            final_results = unique_products[:5]
+            print(f"Returning {len(final_results)} products.")
+            return final_results
         except Exception as e:
             print(f"Error in recommendations: {str(e)}")
+            return []