noddysnots commited on
Commit
e7b9fde
Β·
verified Β·
1 Parent(s): 5e6436a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -224
app.py CHANGED
@@ -1,249 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
1
  from typing import Dict, List, Optional
2
- import aiohttp
3
- import asyncio
4
- from bs4 import BeautifulSoup
5
- from transformers import pipeline
6
- import pandas as pd
7
- from datetime import datetime
8
  import json
9
- import sqlite3
10
- import re
11
- import urllib.parse
12
 
13
- class ProductKnowledgeBase:
14
- def __init__(self, db_path: str = "product_knowledge.db"):
15
- self.db_path = db_path
16
- self.setup_database()
17
-
18
- def setup_database(self):
19
- """Initialize the SQLite database with required tables"""
20
- conn = sqlite3.connect(self.db_path)
21
- cursor = conn.cursor()
22
-
23
- # Create products table
24
- cursor.execute("""
25
- CREATE TABLE IF NOT EXISTS products (
26
- id INTEGER PRIMARY KEY,
27
- name TEXT NOT NULL,
28
- category TEXT NOT NULL,
29
- subcategory TEXT,
30
- features TEXT,
31
- target_audience TEXT,
32
- price_range TEXT,
33
- created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
34
- )
35
- """)
36
-
37
- # Create price history table
38
- cursor.execute("""
39
- CREATE TABLE IF NOT EXISTS price_history (
40
- id INTEGER PRIMARY KEY,
41
- product_id INTEGER,
42
- platform TEXT NOT NULL,
43
- price REAL NOT NULL,
44
- timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
45
- FOREIGN KEY (product_id) REFERENCES products (id)
46
  )
47
- """)
48
-
49
- # Create recommendations table for feedback
50
- cursor.execute("""
51
- CREATE TABLE IF NOT EXISTS recommendations (
52
- id INTEGER PRIMARY KEY,
53
- user_input TEXT NOT NULL,
54
- product_id INTEGER,
55
- success_rating INTEGER,
56
- timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
57
- FOREIGN KEY (product_id) REFERENCES products (id)
58
  )
59
- """)
60
 
61
- conn.commit()
62
- conn.close()
63
-
64
- class PriceFetcher:
65
- def __init__(self):
66
- self.headers = {
67
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
68
- }
69
-
70
- async def fetch_price(self, url: str) -> Optional[float]:
71
- """Fetch price from a given URL"""
72
- try:
73
- async with aiohttp.ClientSession() as session:
74
- async with session.get(url, headers=self.headers) as response:
75
- if response.status == 200:
76
- html = await response.text()
77
- return self._extract_price(html)
78
- return None
79
- except Exception as e:
80
- print(f"Error fetching price: {str(e)}")
81
- return None
82
-
83
- def _extract_price(self, html: str) -> Optional[float]:
84
- """Extract price from HTML content"""
85
- soup = BeautifulSoup(html, 'html.parser')
86
- # Add platform-specific price extraction logic here
87
- return None
88
-
89
- class RecommendationEngine:
90
- def __init__(self, knowledge_base: ProductKnowledgeBase, price_fetcher: PriceFetcher):
91
- self.kb = knowledge_base
92
- self.price_fetcher = price_fetcher
93
- self.nlp = pipeline("text-generation", model="gpt2", device_map="auto")
94
-
95
- def analyze_user_input(self, text: str) -> Dict:
96
- """Analyze user input for context and requirements"""
97
- # Extract age if mentioned
98
- age_match = re.search(r'age\s+(?:is\s+)?(\d+)', text.lower())
99
- age = age_match.group(1) if age_match else None
100
 
101
- # Extract budget if mentioned
102
- budget_match = re.search(r'(?:budget|cost|price)\s*(?:is|:)?\s*(?:rs|β‚Ή)?\s*(\d+)', text.lower())
103
- budget = budget_match.group(1) if budget_match else None
 
 
 
 
 
 
 
 
 
104
 
105
- # Generate categories and context
106
- prompt = f"Extract gift categories and context from: {text}\nFormat: category1, category2 | context"
107
- response = self.nlp(prompt, max_new_tokens=50)[0]['generated_text']
108
 
109
- categories, context = response.split('|')
110
- categories = [c.strip() for c in categories.split(',')]
111
 
112
  return {
113
- "age": age,
114
- "budget": budget,
115
- "categories": categories,
116
- "context": context.strip()
117
  }
118
 
119
- def find_matching_products(self, analysis: Dict) -> List[Dict]:
120
- """Find products matching the analysis"""
121
- conn = sqlite3.connect(self.kb.db_path)
122
- cursor = conn.cursor()
123
-
124
- query = """
125
- SELECT p.*, GROUP_CONCAT(DISTINCT ph.price) as prices
126
- FROM products p
127
- LEFT JOIN price_history ph ON p.id = ph.product_id
128
- WHERE p.category IN ({})
129
- GROUP BY p.id
130
- """.format(','.join('?' * len(analysis['categories'])))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
- cursor.execute(query, analysis['categories'])
133
- products = cursor.fetchall()
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
- conn.close()
 
 
 
 
 
136
 
137
- return [self._format_product(p, analysis) for p in products]
138
-
139
- def _format_product(self, product_data: tuple, analysis: Dict) -> Dict:
140
- """Format product data with explanation"""
141
- return {
142
- "name": product_data[1],
143
- "category": product_data[2],
144
- "features": json.loads(product_data[4]),
145
- "why_recommended": self._generate_explanation(product_data, analysis),
146
- "price_info": self._process_price_info(product_data[-1]),
147
- "target_audience": json.loads(product_data[5])
148
- }
149
 
150
- def _generate_explanation(self, product_data: tuple, analysis: Dict) -> str:
151
- """Generate personalized explanation for recommendation"""
152
- prompt = f"""
153
- Product: {product_data[1]}
154
- Category: {product_data[2]}
155
- User Context: {analysis['context']}
156
-
157
- Generate a brief explanation why this product is recommended:
158
- """
159
-
160
- response = self.nlp(prompt, max_new_tokens=100)[0]['generated_text']
161
- return response.split('Generate a brief explanation why this product is recommended:')[-1].strip()
162
-
163
- def _process_price_info(self, prices: str) -> Dict:
164
- """Process and format price information"""
165
- if not prices:
166
- return {"min": None, "max": None, "average": None}
167
-
168
- price_list = [float(p) for p in prices.split(',')]
169
- return {
170
- "min": min(price_list),
171
- "max": max(price_list),
172
- "average": sum(price_list) / len(price_list)
173
- }
174
 
175
- class GiftRecommenderAPI:
176
  def __init__(self):
177
- self.kb = ProductKnowledgeBase()
178
- self.price_fetcher = PriceFetcher()
179
- self.engine = RecommendationEngine(self.kb, self.price_fetcher)
 
 
 
 
 
 
 
 
180
 
181
- async def get_recommendations(self, text: str) -> Dict:
182
- """Main method to get gift recommendations"""
183
- try:
184
- # Analyze user input
185
- analysis = self.engine.analyze_user_input(text)
 
 
 
 
186
 
187
- # Find matching products
188
- recommendations = self.engine.find_matching_products(analysis)
189
-
190
- # Fetch current prices
191
- for rec in recommendations:
192
- current_prices = await self._fetch_current_prices(rec['name'])
193
- rec['current_prices'] = current_prices
194
-
195
- return {
196
- "analysis": analysis,
197
- "recommendations": recommendations
198
- }
199
 
200
- except Exception as e:
201
- return {"error": f"An error occurred: {str(e)}"}
202
-
203
- async def _fetch_current_prices(self, product_name: str) -> Dict:
204
- """Fetch current prices from various platforms"""
205
- encoded_name = urllib.parse.quote(product_name)
206
- urls = {
207
- "amazon": f"https://www.amazon.in/s?k={encoded_name}",
208
- "flipkart": f"https://www.flipkart.com/search?q={encoded_name}",
209
- "igp": f"https://www.igp.com/search?q={encoded_name}"
210
- }
211
 
212
- prices = {}
213
- for platform, url in urls.items():
214
- price = await self.price_fetcher.fetch_price(url)
215
- if price:
216
- prices[platform] = price
217
 
218
- return prices
219
-
220
- # Create Gradio interface
221
- import gradio as gr
222
-
223
- def create_gradio_interface():
224
- recommender = GiftRecommenderAPI()
225
-
226
- def recommend(text: str) -> Dict:
227
- return asyncio.run(recommender.get_recommendations(text))
228
-
229
- demo = gr.Interface(
230
- fn=recommend,
231
- inputs=gr.Textbox(
232
- lines=3,
233
- placeholder="Describe who you're buying a gift for (age, interests, occasion, etc.)"
234
- ),
235
- outputs=gr.JSON(),
236
- title="🎁 Smart Gift Recommender",
237
- description="Get personalized gift suggestions with real-time prices and explanations!",
238
- examples=[
239
- ["need a fifa latest game of EA"],
240
- ["a small kid of age 3 want him to have something like toy that teaches alphabets"],
241
- ["Looking for a gift for my mom who enjoys gardening and cooking"]
242
- ]
243
- )
244
 
245
- return demo
246
-
247
- if __name__ == "__main__":
248
- demo = create_gradio_interface()
249
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import (
2
+ AutoModelForSequenceClassification,
3
+ AutoTokenizer,
4
+ AutoModelForTokenClassification,
5
+ TrainingArguments,
6
+ Trainer
7
+ )
8
+ from sentence_transformers import SentenceTransformer
9
+ from datasets import Dataset
10
+ import torch
11
+ import numpy as np
12
  from typing import Dict, List, Optional
 
 
 
 
 
 
13
  import json
 
 
 
14
 
15
+ class MultiModelAnalyzer:
16
+ def __init__(self):
17
+ # Initialize different models for different tasks
18
+
19
+ # 1. Category Understanding Model
20
+ self.category_model = AutoModelForSequenceClassification.from_pretrained(
21
+ "EMBEDDIA/sloberta-commerce"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  )
23
+ self.category_tokenizer = AutoTokenizer.from_pretrained(
24
+ "EMBEDDIA/sloberta-commerce"
 
 
 
 
 
 
 
 
 
25
  )
 
26
 
27
+ # 2. Semantic Understanding Model
28
+ self.semantic_model = SentenceTransformer('all-mpnet-base-v2')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ # 3. Feature Extraction Model
31
+ self.feature_model = AutoModelForTokenClassification.from_pretrained(
32
+ "bert-base-multilingual-uncased"
33
+ )
34
+ self.feature_tokenizer = AutoTokenizer.from_pretrained(
35
+ "bert-base-multilingual-uncased"
36
+ )
37
+
38
+ def analyze_text(self, text: str) -> Dict:
39
+ """Combine analysis from all models"""
40
+ # Get category prediction
41
+ category = self._predict_category(text)
42
 
43
+ # Get semantic embedding
44
+ embedding = self._get_semantic_embedding(text)
 
45
 
46
+ # Extract features
47
+ features = self._extract_features(text)
48
 
49
  return {
50
+ "category": category,
51
+ "embedding": embedding,
52
+ "features": features
 
53
  }
54
 
55
+ def _predict_category(self, text: str) -> str:
56
+ """Predict product category"""
57
+ inputs = self.category_tokenizer(
58
+ text,
59
+ return_tensors="pt",
60
+ truncation=True,
61
+ max_length=512
62
+ )
63
+ outputs = self.category_model(**inputs)
64
+ predictions = torch.nn.functional.softmax(outputs.logits, dim=1)
65
+ return predictions.argmax().item()
66
+
67
+ def _get_semantic_embedding(self, text: str) -> np.ndarray:
68
+ """Get semantic embedding of text"""
69
+ return self.semantic_model.encode(text)
70
+
71
+ def _extract_features(self, text: str) -> List[str]:
72
+ """Extract relevant features from text"""
73
+ inputs = self.feature_tokenizer(
74
+ text,
75
+ return_tensors="pt",
76
+ truncation=True,
77
+ max_length=512
78
+ )
79
+ outputs = self.feature_model(**inputs)
80
+ predictions = outputs.logits.argmax(dim=2)
81
+ return self._convert_predictions_to_features(predictions, inputs)
82
+
83
+ class ModelTrainer:
84
+ def __init__(self, model_analyzer: MultiModelAnalyzer):
85
+ self.analyzer = model_analyzer
86
+
87
+ def prepare_training_data(self, product_data: List[Dict]) -> Dataset:
88
+ """Prepare data for fine-tuning"""
89
+ training_data = []
90
+
91
+ for product in product_data:
92
+ # Format data for training
93
+ item = {
94
+ "text": product["description"],
95
+ "category": product["category"],
96
+ "features": product["features"],
97
+ "price": product["price"]
98
+ }
99
+ training_data.append(item)
100
 
101
+ return Dataset.from_list(training_data)
102
+
103
+ def fine_tune_category_model(self, training_data: Dataset):
104
+ """Fine-tune the category prediction model"""
105
+ training_args = TrainingArguments(
106
+ output_dir="./results",
107
+ num_train_epochs=3,
108
+ per_device_train_batch_size=8,
109
+ per_device_eval_batch_size=8,
110
+ warmup_steps=500,
111
+ weight_decay=0.01,
112
+ logging_dir="./logs",
113
+ logging_steps=10,
114
+ )
115
 
116
+ trainer = Trainer(
117
+ model=self.analyzer.category_model,
118
+ args=training_args,
119
+ train_dataset=training_data,
120
+ tokenizer=self.analyzer.category_tokenizer
121
+ )
122
 
123
+ trainer.train()
 
 
 
 
 
 
 
 
 
 
 
124
 
125
+ def fine_tune_feature_model(self, training_data: Dataset):
126
+ """Fine-tune the feature extraction model"""
127
+ training_args = TrainingArguments(
128
+ output_dir="./results_feature",
129
+ num_train_epochs=3,
130
+ per_device_train_batch_size=8,
131
+ per_device_eval_batch_size=8,
132
+ warmup_steps=500,
133
+ weight_decay=0.01,
134
+ logging_dir="./logs_feature",
135
+ logging_steps=10,
136
+ )
137
+
138
+ trainer = Trainer(
139
+ model=self.analyzer.feature_model,
140
+ args=training_args,
141
+ train_dataset=training_data,
142
+ tokenizer=self.analyzer.feature_tokenizer
143
+ )
144
+
145
+ trainer.train()
 
 
 
146
 
147
+ class ProductRecommender:
148
  def __init__(self):
149
+ self.model_analyzer = MultiModelAnalyzer()
150
+ self.trainer = ModelTrainer(self.model_analyzer)
151
+
152
+ def train_on_product_data(self, product_data: List[Dict]):
153
+ """Train models on product data"""
154
+ # Prepare training data
155
+ training_dataset = self.trainer.prepare_training_data(product_data)
156
+
157
+ # Fine-tune models
158
+ self.trainer.fine_tune_category_model(training_dataset)
159
+ self.trainer.fine_tune_feature_model(training_dataset)
160
 
161
+ def get_recommendations(self, query: str, product_database: List[Dict]) -> List[Dict]:
162
+ """Get product recommendations"""
163
+ # Analyze query
164
+ query_analysis = self.model_analyzer.analyze_text(query)
165
+
166
+ # Find matching products
167
+ matches = []
168
+ for product in product_database:
169
+ product_analysis = self.model_analyzer.analyze_text(product['description'])
170
 
171
+ # Calculate similarity score
172
+ similarity = self._calculate_similarity(
173
+ query_analysis,
174
+ product_analysis
175
+ )
 
 
 
 
 
 
 
176
 
177
+ matches.append({
178
+ "product": product,
179
+ "similarity": similarity
180
+ })
 
 
 
 
 
 
 
181
 
182
+ # Sort by similarity
183
+ matches.sort(key=lambda x: x['similarity'], reverse=True)
 
 
 
184
 
185
+ # Return top 5 matches
186
+ return [match['product'] for match in matches[:5]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
+ def _calculate_similarity(self, query_analysis: Dict, product_analysis: Dict) -> float:
189
+ """Calculate similarity between query and product"""
190
+ # Combine multiple similarity factors
191
+ category_match = query_analysis['category'] == product_analysis['category']
192
+ embedding_similarity = np.dot(
193
+ query_analysis['embedding'],
194
+ product_analysis['embedding']
195
+ )
196
+ feature_overlap = len(
197
+ set(query_analysis['features']) & set(product_analysis['features'])
198
+ )
199
+
200
+ # Weight and combine scores
201
+ total_score = (
202
+ 0.4 * category_match +
203
+ 0.4 * embedding_similarity +
204
+ 0.2 * feature_overlap
205
+ )
206
+
207
+ return total_score