noddysnots commited on
Commit
d47196a
Β·
verified Β·
1 Parent(s): f9a368d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -200
app.py CHANGED
@@ -1,207 +1,28 @@
1
- from transformers import (
2
- AutoModelForSequenceClassification,
3
- AutoTokenizer,
4
- AutoModelForTokenClassification,
5
- TrainingArguments,
6
- Trainer
7
- )
8
- from sentence_transformers import SentenceTransformer
9
- from datasets import Dataset
10
- import torch
11
- import numpy as np
12
- from typing import Dict, List, Optional
13
- import json
14
 
15
- class MultiModelAnalyzer:
16
- def __init__(self):
17
- # Initialize different models for different tasks
18
-
19
- # 1. Category Understanding Model
20
- self.category_model = AutoModelForSequenceClassification.from_pretrained(
21
- "EMBEDDIA/sloberta-commerce"
22
- )
23
- self.category_tokenizer = AutoTokenizer.from_pretrained(
24
- "EMBEDDIA/sloberta-commerce"
25
- )
26
-
27
- # 2. Semantic Understanding Model
28
- self.semantic_model = SentenceTransformer('all-mpnet-base-v2')
29
-
30
- # 3. Feature Extraction Model
31
- self.feature_model = AutoModelForTokenClassification.from_pretrained(
32
- "bert-base-multilingual-uncased"
33
- )
34
- self.feature_tokenizer = AutoTokenizer.from_pretrained(
35
- "bert-base-multilingual-uncased"
36
- )
37
 
38
- def analyze_text(self, text: str) -> Dict:
39
- """Combine analysis from all models"""
40
- # Get category prediction
41
- category = self._predict_category(text)
42
-
43
- # Get semantic embedding
44
- embedding = self._get_semantic_embedding(text)
45
-
46
- # Extract features
47
- features = self._extract_features(text)
48
 
49
  return {
50
- "category": category,
51
- "embedding": embedding,
52
- "features": features
53
  }
54
-
55
- def _predict_category(self, text: str) -> str:
56
- """Predict product category"""
57
- inputs = self.category_tokenizer(
58
- text,
59
- return_tensors="pt",
60
- truncation=True,
61
- max_length=512
62
- )
63
- outputs = self.category_model(**inputs)
64
- predictions = torch.nn.functional.softmax(outputs.logits, dim=1)
65
- return predictions.argmax().item()
66
-
67
- def _get_semantic_embedding(self, text: str) -> np.ndarray:
68
- """Get semantic embedding of text"""
69
- return self.semantic_model.encode(text)
70
-
71
- def _extract_features(self, text: str) -> List[str]:
72
- """Extract relevant features from text"""
73
- inputs = self.feature_tokenizer(
74
- text,
75
- return_tensors="pt",
76
- truncation=True,
77
- max_length=512
78
- )
79
- outputs = self.feature_model(**inputs)
80
- predictions = outputs.logits.argmax(dim=2)
81
- return self._convert_predictions_to_features(predictions, inputs)
82
 
83
- class ModelTrainer:
84
- def __init__(self, model_analyzer: MultiModelAnalyzer):
85
- self.analyzer = model_analyzer
86
-
87
- def prepare_training_data(self, product_data: List[Dict]) -> Dataset:
88
- """Prepare data for fine-tuning"""
89
- training_data = []
90
-
91
- for product in product_data:
92
- # Format data for training
93
- item = {
94
- "text": product["description"],
95
- "category": product["category"],
96
- "features": product["features"],
97
- "price": product["price"]
98
- }
99
- training_data.append(item)
100
-
101
- return Dataset.from_list(training_data)
102
-
103
- def fine_tune_category_model(self, training_data: Dataset):
104
- """Fine-tune the category prediction model"""
105
- training_args = TrainingArguments(
106
- output_dir="./results",
107
- num_train_epochs=3,
108
- per_device_train_batch_size=8,
109
- per_device_eval_batch_size=8,
110
- warmup_steps=500,
111
- weight_decay=0.01,
112
- logging_dir="./logs",
113
- logging_steps=10,
114
- )
115
-
116
- trainer = Trainer(
117
- model=self.analyzer.category_model,
118
- args=training_args,
119
- train_dataset=training_data,
120
- tokenizer=self.analyzer.category_tokenizer
121
- )
122
-
123
- trainer.train()
124
-
125
- def fine_tune_feature_model(self, training_data: Dataset):
126
- """Fine-tune the feature extraction model"""
127
- training_args = TrainingArguments(
128
- output_dir="./results_feature",
129
- num_train_epochs=3,
130
- per_device_train_batch_size=8,
131
- per_device_eval_batch_size=8,
132
- warmup_steps=500,
133
- weight_decay=0.01,
134
- logging_dir="./logs_feature",
135
- logging_steps=10,
136
- )
137
-
138
- trainer = Trainer(
139
- model=self.analyzer.feature_model,
140
- args=training_args,
141
- train_dataset=training_data,
142
- tokenizer=self.analyzer.feature_tokenizer
143
- )
144
-
145
- trainer.train()
146
 
147
- class ProductRecommender:
148
- def __init__(self):
149
- self.model_analyzer = MultiModelAnalyzer()
150
- self.trainer = ModelTrainer(self.model_analyzer)
151
-
152
- def train_on_product_data(self, product_data: List[Dict]):
153
- """Train models on product data"""
154
- # Prepare training data
155
- training_dataset = self.trainer.prepare_training_data(product_data)
156
-
157
- # Fine-tune models
158
- self.trainer.fine_tune_category_model(training_dataset)
159
- self.trainer.fine_tune_feature_model(training_dataset)
160
-
161
- def get_recommendations(self, query: str, product_database: List[Dict]) -> List[Dict]:
162
- """Get product recommendations"""
163
- # Analyze query
164
- query_analysis = self.model_analyzer.analyze_text(query)
165
-
166
- # Find matching products
167
- matches = []
168
- for product in product_database:
169
- product_analysis = self.model_analyzer.analyze_text(product['description'])
170
-
171
- # Calculate similarity score
172
- similarity = self._calculate_similarity(
173
- query_analysis,
174
- product_analysis
175
- )
176
-
177
- matches.append({
178
- "product": product,
179
- "similarity": similarity
180
- })
181
-
182
- # Sort by similarity
183
- matches.sort(key=lambda x: x['similarity'], reverse=True)
184
-
185
- # Return top 5 matches
186
- return [match['product'] for match in matches[:5]]
187
-
188
- def _calculate_similarity(self, query_analysis: Dict, product_analysis: Dict) -> float:
189
- """Calculate similarity between query and product"""
190
- # Combine multiple similarity factors
191
- category_match = query_analysis['category'] == product_analysis['category']
192
- embedding_similarity = np.dot(
193
- query_analysis['embedding'],
194
- product_analysis['embedding']
195
- )
196
- feature_overlap = len(
197
- set(query_analysis['features']) & set(product_analysis['features'])
198
- )
199
-
200
- # Weight and combine scores
201
- total_score = (
202
- 0.4 * category_match +
203
- 0.4 * embedding_similarity +
204
- 0.2 * feature_overlap
205
- )
206
-
207
- return total_score
 
1
+ import gradio as gr
2
+ from product_recommender import ProductRecommender
 
 
 
 
 
 
 
 
 
 
 
3
 
4
+ recommender = ProductRecommender()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ def get_gift_recommendations(text: str) -> dict:
7
+ try:
8
+ recommendations = recommender.get_recommendations(text, []) # Empty list as placeholder
 
 
 
 
 
 
 
9
 
10
  return {
11
+ "recommendations": recommendations,
12
+ "status": "success"
 
13
  }
14
+ except Exception as e:
15
+ return {"error": str(e)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ demo = gr.Interface(
18
+ fn=get_gift_recommendations,
19
+ inputs=gr.Textbox(lines=3),
20
+ outputs=gr.JSON(),
21
+ title="🎁 Smart Gift Recommender",
22
+ description="Get personalized gift suggestions!"
23
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ if __name__ == "__main__":
26
+ demo.launch(server_name="0.0.0.0", server_port=7860)
27
+ else:
28
+ app = demo.app