Upload 21 files
Browse files- models/parallel_processor.py +324 -0
- models/performance_optimizer.py +178 -0
- models/property_summary.py +97 -11
- models/trust_score.py +50 -41
models/parallel_processor.py
ADDED
@@ -0,0 +1,324 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# models/parallel_processor.py
|
2 |
+
|
3 |
+
import multiprocessing as mp
|
4 |
+
import concurrent.futures
|
5 |
+
import asyncio
|
6 |
+
import threading
|
7 |
+
from functools import partial
|
8 |
+
from typing import Dict, Any, List, Tuple
|
9 |
+
from .logging_config import logger
|
10 |
+
|
11 |
+
class ParallelProcessor:
|
12 |
+
"""Handles parallel processing of property verification analyses"""
|
13 |
+
|
14 |
+
def __init__(self, max_workers=None):
|
15 |
+
self.max_workers = max_workers or min(mp.cpu_count(), 8)
|
16 |
+
self.thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers)
|
17 |
+
self.process_pool = concurrent.futures.ProcessPoolExecutor(max_workers=min(4, mp.cpu_count()))
|
18 |
+
|
19 |
+
def __del__(self):
|
20 |
+
self.thread_pool.shutdown(wait=True)
|
21 |
+
self.process_pool.shutdown(wait=True)
|
22 |
+
|
23 |
+
def process_images_parallel(self, image_files):
|
24 |
+
"""Process multiple images in parallel"""
|
25 |
+
try:
|
26 |
+
with concurrent.futures.ThreadPoolExecutor(max_workers=min(4, len(image_files))) as executor:
|
27 |
+
futures = []
|
28 |
+
for img_file in image_files:
|
29 |
+
future = executor.submit(self._process_single_image, img_file)
|
30 |
+
futures.append(future)
|
31 |
+
|
32 |
+
results = []
|
33 |
+
for future in concurrent.futures.as_completed(futures):
|
34 |
+
try:
|
35 |
+
result = future.result(timeout=30)
|
36 |
+
results.append(result)
|
37 |
+
except Exception as e:
|
38 |
+
logger.error(f"Error processing image: {str(e)}")
|
39 |
+
results.append({'error': str(e), 'is_property_related': False})
|
40 |
+
|
41 |
+
return results
|
42 |
+
except Exception as e:
|
43 |
+
logger.error(f"Error in parallel image processing: {str(e)}")
|
44 |
+
return []
|
45 |
+
|
46 |
+
def _process_single_image(self, img_file):
|
47 |
+
"""Process a single image"""
|
48 |
+
try:
|
49 |
+
from PIL import Image
|
50 |
+
import base64
|
51 |
+
import io
|
52 |
+
from .image_analysis import analyze_image
|
53 |
+
|
54 |
+
img = Image.open(img_file)
|
55 |
+
buffered = io.BytesIO()
|
56 |
+
img.save(buffered, format="JPEG")
|
57 |
+
img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
|
58 |
+
|
59 |
+
analysis = analyze_image(img)
|
60 |
+
return {
|
61 |
+
'image_data': img_str,
|
62 |
+
'analysis': analysis
|
63 |
+
}
|
64 |
+
except Exception as e:
|
65 |
+
logger.error(f"Error processing image {img_file.filename}: {str(e)}")
|
66 |
+
return {'error': str(e), 'is_property_related': False}
|
67 |
+
|
68 |
+
def process_pdfs_parallel(self, pdf_files):
|
69 |
+
"""Process multiple PDFs in parallel"""
|
70 |
+
try:
|
71 |
+
with concurrent.futures.ThreadPoolExecutor(max_workers=min(4, len(pdf_files))) as executor:
|
72 |
+
futures = []
|
73 |
+
for pdf_file in pdf_files:
|
74 |
+
future = executor.submit(self._process_single_pdf, pdf_file)
|
75 |
+
futures.append(future)
|
76 |
+
|
77 |
+
results = []
|
78 |
+
for future in concurrent.futures.as_completed(futures):
|
79 |
+
try:
|
80 |
+
result = future.result(timeout=60)
|
81 |
+
results.append(result)
|
82 |
+
except Exception as e:
|
83 |
+
logger.error(f"Error processing PDF: {str(e)}")
|
84 |
+
results.append({'error': str(e)})
|
85 |
+
|
86 |
+
return results
|
87 |
+
except Exception as e:
|
88 |
+
logger.error(f"Error in parallel PDF processing: {str(e)}")
|
89 |
+
return []
|
90 |
+
|
91 |
+
def _process_single_pdf(self, pdf_file):
|
92 |
+
"""Process a single PDF"""
|
93 |
+
try:
|
94 |
+
from .pdf_analysis import extract_pdf_text, analyze_pdf_content
|
95 |
+
|
96 |
+
pdf_text = extract_pdf_text(pdf_file)
|
97 |
+
analysis = analyze_pdf_content(pdf_text, {})
|
98 |
+
|
99 |
+
return {
|
100 |
+
'filename': pdf_file.filename,
|
101 |
+
'text': pdf_text,
|
102 |
+
'analysis': analysis
|
103 |
+
}
|
104 |
+
except Exception as e:
|
105 |
+
logger.error(f"Error processing PDF {pdf_file.filename}: {str(e)}")
|
106 |
+
return {'error': str(e)}
|
107 |
+
|
108 |
+
async def run_analyses_parallel(self, data, consolidated_text, image_analysis, pdf_analysis):
|
109 |
+
"""Run all analyses in parallel using asyncio and thread pools"""
|
110 |
+
try:
|
111 |
+
# Prepare property data for price analysis
|
112 |
+
property_data = self._prepare_property_data(data)
|
113 |
+
price_context = self._create_price_context(data)
|
114 |
+
|
115 |
+
# Define analysis tasks with their respective functions
|
116 |
+
analysis_tasks = [
|
117 |
+
('summary', self._run_summary_analysis, data),
|
118 |
+
('fraud', self._run_fraud_analysis, consolidated_text, data),
|
119 |
+
('trust', self._run_trust_analysis, consolidated_text, image_analysis, pdf_analysis),
|
120 |
+
('suggestions', self._run_suggestions_analysis, consolidated_text, data),
|
121 |
+
('quality', self._run_quality_analysis, data.get('description_translated', '')),
|
122 |
+
('address', self._run_address_analysis, data),
|
123 |
+
('cross_validation', self._run_cross_validation_analysis, data),
|
124 |
+
('location', self._run_location_analysis, data),
|
125 |
+
('price', self._run_price_analysis, data, price_context, property_data),
|
126 |
+
('legal', self._run_legal_analysis, data.get('legal_details', '')),
|
127 |
+
('specs', self._run_specs_analysis, data),
|
128 |
+
('market', self._run_market_analysis, data)
|
129 |
+
]
|
130 |
+
|
131 |
+
# Run tasks in parallel with timeout
|
132 |
+
loop = asyncio.get_event_loop()
|
133 |
+
tasks = []
|
134 |
+
|
135 |
+
for task_name, func, *args in analysis_tasks:
|
136 |
+
task = loop.run_in_executor(
|
137 |
+
self.thread_pool,
|
138 |
+
func,
|
139 |
+
*args
|
140 |
+
)
|
141 |
+
tasks.append((task_name, task))
|
142 |
+
|
143 |
+
# Wait for all tasks to complete with timeout
|
144 |
+
results = {}
|
145 |
+
for task_name, task in tasks:
|
146 |
+
try:
|
147 |
+
result = await asyncio.wait_for(task, timeout=120) # 2 minutes timeout per task
|
148 |
+
results[task_name] = result
|
149 |
+
except asyncio.TimeoutError:
|
150 |
+
logger.error(f"Task {task_name} timed out")
|
151 |
+
results[task_name] = self._get_error_result(f"Task {task_name} timed out")
|
152 |
+
except Exception as e:
|
153 |
+
logger.error(f"Task {task_name} failed: {str(e)}")
|
154 |
+
results[task_name] = self._get_error_result(f"Task {task_name} failed: {str(e)}")
|
155 |
+
|
156 |
+
return results
|
157 |
+
|
158 |
+
except Exception as e:
|
159 |
+
logger.error(f"Error in parallel analyses: {str(e)}")
|
160 |
+
return self._get_all_error_results(str(e))
|
161 |
+
|
162 |
+
def _prepare_property_data(self, data):
|
163 |
+
"""Prepare property data for price analysis"""
|
164 |
+
property_data = {}
|
165 |
+
try:
|
166 |
+
if data.get('sq_ft'):
|
167 |
+
property_data['size'] = float(data['sq_ft'])
|
168 |
+
if data.get('market_value'):
|
169 |
+
property_data['price'] = float(data['market_value'].replace('₹', '').replace(',', ''))
|
170 |
+
if data.get('year_built'):
|
171 |
+
from datetime import datetime
|
172 |
+
current_year = datetime.now().year
|
173 |
+
property_data['property_age'] = current_year - int(data['year_built'])
|
174 |
+
except Exception as e:
|
175 |
+
logger.warning(f"Error preparing property data: {str(e)}")
|
176 |
+
return property_data
|
177 |
+
|
178 |
+
def _create_price_context(self, data):
|
179 |
+
"""Create context text for price analysis"""
|
180 |
+
return f"""
|
181 |
+
Property: {data.get('property_name', '')}
|
182 |
+
Type: {data.get('property_type', '')}
|
183 |
+
Location: {data.get('address', '')}, {data.get('city', '')}, {data.get('state', '')}
|
184 |
+
Size: {data.get('sq_ft', '')} sq ft
|
185 |
+
Market Value: ₹{data.get('market_value', '')}
|
186 |
+
Description: {data.get('description', '')}
|
187 |
+
Amenities: {data.get('amenities', '')}
|
188 |
+
"""
|
189 |
+
|
190 |
+
def _run_summary_analysis(self, data):
|
191 |
+
"""Run property summary analysis"""
|
192 |
+
try:
|
193 |
+
from .property_summary import generate_property_summary
|
194 |
+
return generate_property_summary(data)
|
195 |
+
except Exception as e:
|
196 |
+
logger.error(f"Error in summary analysis: {str(e)}")
|
197 |
+
return "Property summary unavailable."
|
198 |
+
|
199 |
+
def _run_fraud_analysis(self, consolidated_text, data):
|
200 |
+
"""Run fraud classification analysis"""
|
201 |
+
try:
|
202 |
+
from .fraud_classification import classify_fraud
|
203 |
+
return classify_fraud(data, consolidated_text)
|
204 |
+
except Exception as e:
|
205 |
+
logger.error(f"Error in fraud analysis: {str(e)}")
|
206 |
+
return self._get_error_result("Fraud analysis failed")
|
207 |
+
|
208 |
+
def _run_trust_analysis(self, consolidated_text, image_analysis, pdf_analysis):
|
209 |
+
"""Run trust score analysis"""
|
210 |
+
try:
|
211 |
+
from .trust_score import generate_trust_score
|
212 |
+
return generate_trust_score(consolidated_text, image_analysis, pdf_analysis)
|
213 |
+
except Exception as e:
|
214 |
+
logger.error(f"Error in trust analysis: {str(e)}")
|
215 |
+
return (0.0, "Trust analysis failed")
|
216 |
+
|
217 |
+
def _run_suggestions_analysis(self, consolidated_text, data):
|
218 |
+
"""Run suggestions analysis"""
|
219 |
+
try:
|
220 |
+
from .suggestions import generate_suggestions
|
221 |
+
return generate_suggestions(consolidated_text, data)
|
222 |
+
except Exception as e:
|
223 |
+
logger.error(f"Error in suggestions analysis: {str(e)}")
|
224 |
+
return self._get_error_result("Suggestions analysis failed")
|
225 |
+
|
226 |
+
def _run_quality_analysis(self, description):
|
227 |
+
"""Run text quality analysis"""
|
228 |
+
try:
|
229 |
+
from .text_quality import assess_text_quality
|
230 |
+
return assess_text_quality(description)
|
231 |
+
except Exception as e:
|
232 |
+
logger.error(f"Error in quality analysis: {str(e)}")
|
233 |
+
return self._get_error_result("Quality analysis failed")
|
234 |
+
|
235 |
+
def _run_address_analysis(self, data):
|
236 |
+
"""Run address verification analysis"""
|
237 |
+
try:
|
238 |
+
from .address_verification import verify_address
|
239 |
+
return verify_address(data)
|
240 |
+
except Exception as e:
|
241 |
+
logger.error(f"Error in address analysis: {str(e)}")
|
242 |
+
return self._get_error_result("Address analysis failed")
|
243 |
+
|
244 |
+
def _run_cross_validation_analysis(self, data):
|
245 |
+
"""Run cross validation analysis"""
|
246 |
+
try:
|
247 |
+
from .cross_validation import perform_cross_validation
|
248 |
+
return perform_cross_validation(data)
|
249 |
+
except Exception as e:
|
250 |
+
logger.error(f"Error in cross validation analysis: {str(e)}")
|
251 |
+
return self._get_error_result("Cross validation analysis failed")
|
252 |
+
|
253 |
+
def _run_location_analysis(self, data):
|
254 |
+
"""Run location analysis"""
|
255 |
+
try:
|
256 |
+
from .location_analysis import analyze_location
|
257 |
+
return analyze_location(data)
|
258 |
+
except Exception as e:
|
259 |
+
logger.error(f"Error in location analysis: {str(e)}")
|
260 |
+
return self._get_error_result("Location analysis failed")
|
261 |
+
|
262 |
+
def _run_price_analysis(self, data, price_context, property_data):
|
263 |
+
"""Run price analysis"""
|
264 |
+
try:
|
265 |
+
from .price_analysis import analyze_price
|
266 |
+
return analyze_price(data, price_context, data.get('latitude'), data.get('longitude'), property_data)
|
267 |
+
except Exception as e:
|
268 |
+
logger.error(f"Error in price analysis: {str(e)}")
|
269 |
+
return self._get_error_result("Price analysis failed")
|
270 |
+
|
271 |
+
def _run_legal_analysis(self, legal_details):
|
272 |
+
"""Run legal analysis"""
|
273 |
+
try:
|
274 |
+
from .legal_analysis import analyze_legal_details
|
275 |
+
return analyze_legal_details(legal_details)
|
276 |
+
except Exception as e:
|
277 |
+
logger.error(f"Error in legal analysis: {str(e)}")
|
278 |
+
return self._get_error_result("Legal analysis failed")
|
279 |
+
|
280 |
+
def _run_specs_analysis(self, data):
|
281 |
+
"""Run property specs analysis"""
|
282 |
+
try:
|
283 |
+
from .property_specs import verify_property_specs
|
284 |
+
return verify_property_specs(data)
|
285 |
+
except Exception as e:
|
286 |
+
logger.error(f"Error in specs analysis: {str(e)}")
|
287 |
+
return self._get_error_result("Specs analysis failed")
|
288 |
+
|
289 |
+
def _run_market_analysis(self, data):
|
290 |
+
"""Run market value analysis"""
|
291 |
+
try:
|
292 |
+
from .market_value import analyze_market_value
|
293 |
+
return analyze_market_value(data)
|
294 |
+
except Exception as e:
|
295 |
+
logger.error(f"Error in market analysis: {str(e)}")
|
296 |
+
return self._get_error_result("Market analysis failed")
|
297 |
+
|
298 |
+
def _get_error_result(self, error_message):
|
299 |
+
"""Get a standardized error result"""
|
300 |
+
return {
|
301 |
+
'error': error_message,
|
302 |
+
'status': 'error',
|
303 |
+
'confidence': 0.0
|
304 |
+
}
|
305 |
+
|
306 |
+
def _get_all_error_results(self, error_message):
|
307 |
+
"""Get error results for all analyses"""
|
308 |
+
return {
|
309 |
+
'summary': "Analysis failed",
|
310 |
+
'fraud': self._get_error_result(error_message),
|
311 |
+
'trust': (0.0, error_message),
|
312 |
+
'suggestions': self._get_error_result(error_message),
|
313 |
+
'quality': self._get_error_result(error_message),
|
314 |
+
'address': self._get_error_result(error_message),
|
315 |
+
'cross_validation': self._get_error_result(error_message),
|
316 |
+
'location': self._get_error_result(error_message),
|
317 |
+
'price': self._get_error_result(error_message),
|
318 |
+
'legal': self._get_error_result(error_message),
|
319 |
+
'specs': self._get_error_result(error_message),
|
320 |
+
'market': self._get_error_result(error_message)
|
321 |
+
}
|
322 |
+
|
323 |
+
# Global instance for easy import
|
324 |
+
parallel_processor = ParallelProcessor()
|
models/performance_optimizer.py
ADDED
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# models/performance_optimizer.py
|
2 |
+
|
3 |
+
import functools
|
4 |
+
import time
|
5 |
+
import threading
|
6 |
+
from typing import Dict, Any, Optional
|
7 |
+
from .logging_config import logger
|
8 |
+
|
9 |
+
class PerformanceOptimizer:
|
10 |
+
"""Performance optimization utilities for the property verification system"""
|
11 |
+
|
12 |
+
def __init__(self):
|
13 |
+
self._cache = {}
|
14 |
+
self._cache_lock = threading.Lock()
|
15 |
+
self._cache_ttl = 300 # 5 minutes cache TTL
|
16 |
+
self._cache_timestamps = {}
|
17 |
+
|
18 |
+
def cache_result(self, key: str, result: Any, ttl: int = None) -> None:
|
19 |
+
"""Cache a result with TTL"""
|
20 |
+
with self._cache_lock:
|
21 |
+
self._cache[key] = result
|
22 |
+
self._cache_timestamps[key] = time.time() + (ttl or self._cache_ttl)
|
23 |
+
|
24 |
+
def get_cached_result(self, key: str) -> Optional[Any]:
|
25 |
+
"""Get cached result if not expired"""
|
26 |
+
with self._cache_lock:
|
27 |
+
if key in self._cache:
|
28 |
+
if time.time() < self._cache_timestamps.get(key, 0):
|
29 |
+
return self._cache[key]
|
30 |
+
else:
|
31 |
+
# Remove expired cache entry
|
32 |
+
del self._cache[key]
|
33 |
+
if key in self._cache_timestamps:
|
34 |
+
del self._cache_timestamps[key]
|
35 |
+
return None
|
36 |
+
|
37 |
+
def clear_cache(self) -> None:
|
38 |
+
"""Clear all cached results"""
|
39 |
+
with self._cache_lock:
|
40 |
+
self._cache.clear()
|
41 |
+
self._cache_timestamps.clear()
|
42 |
+
|
43 |
+
def get_cache_stats(self) -> Dict[str, Any]:
|
44 |
+
"""Get cache statistics"""
|
45 |
+
with self._cache_lock:
|
46 |
+
return {
|
47 |
+
'cache_size': len(self._cache),
|
48 |
+
'cache_keys': list(self._cache.keys()),
|
49 |
+
'cache_ttl': self._cache_ttl
|
50 |
+
}
|
51 |
+
|
52 |
+
# Global performance optimizer instance
|
53 |
+
performance_optimizer = PerformanceOptimizer()
|
54 |
+
|
55 |
+
def timed_function(func):
|
56 |
+
"""Decorator to time function execution"""
|
57 |
+
@functools.wraps(func)
|
58 |
+
def wrapper(*args, **kwargs):
|
59 |
+
start_time = time.time()
|
60 |
+
try:
|
61 |
+
result = func(*args, **kwargs)
|
62 |
+
execution_time = time.time() - start_time
|
63 |
+
logger.info(f"{func.__name__} executed in {execution_time:.2f} seconds")
|
64 |
+
return result
|
65 |
+
except Exception as e:
|
66 |
+
execution_time = time.time() - start_time
|
67 |
+
logger.error(f"{func.__name__} failed after {execution_time:.2f} seconds: {str(e)}")
|
68 |
+
raise
|
69 |
+
return wrapper
|
70 |
+
|
71 |
+
def cached_function(ttl: int = 300):
|
72 |
+
"""Decorator to cache function results"""
|
73 |
+
def decorator(func):
|
74 |
+
@functools.wraps(func)
|
75 |
+
def wrapper(*args, **kwargs):
|
76 |
+
# Create cache key from function name and arguments
|
77 |
+
cache_key = f"{func.__name__}:{hash(str(args) + str(sorted(kwargs.items())))}"
|
78 |
+
|
79 |
+
# Try to get cached result
|
80 |
+
cached_result = performance_optimizer.get_cached_result(cache_key)
|
81 |
+
if cached_result is not None:
|
82 |
+
logger.debug(f"Cache hit for {func.__name__}")
|
83 |
+
return cached_result
|
84 |
+
|
85 |
+
# Execute function and cache result
|
86 |
+
result = func(*args, **kwargs)
|
87 |
+
performance_optimizer.cache_result(cache_key, result, ttl)
|
88 |
+
logger.debug(f"Cached result for {func.__name__}")
|
89 |
+
return result
|
90 |
+
return wrapper
|
91 |
+
return decorator
|
92 |
+
|
93 |
+
def optimize_model_loading():
|
94 |
+
"""Optimize model loading for better performance"""
|
95 |
+
try:
|
96 |
+
from .model_loader import load_model
|
97 |
+
|
98 |
+
# Pre-load models in background threads
|
99 |
+
import concurrent.futures
|
100 |
+
import threading
|
101 |
+
|
102 |
+
def load_model_async(model_name):
|
103 |
+
try:
|
104 |
+
model = load_model(model_name)
|
105 |
+
logger.info(f"Pre-loaded model: {model_name}")
|
106 |
+
return model
|
107 |
+
except Exception as e:
|
108 |
+
logger.warning(f"Failed to pre-load model {model_name}: {str(e)}")
|
109 |
+
return None
|
110 |
+
|
111 |
+
# Load models in parallel
|
112 |
+
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
|
113 |
+
model_names = [
|
114 |
+
"zero-shot-classification",
|
115 |
+
"summarization",
|
116 |
+
"text-classification",
|
117 |
+
"text-generation"
|
118 |
+
]
|
119 |
+
|
120 |
+
futures = {executor.submit(load_model_async, name): name for name in model_names}
|
121 |
+
|
122 |
+
for future in concurrent.futures.as_completed(futures, timeout=60):
|
123 |
+
model_name = futures[future]
|
124 |
+
try:
|
125 |
+
future.result()
|
126 |
+
except Exception as e:
|
127 |
+
logger.error(f"Error pre-loading {model_name}: {str(e)}")
|
128 |
+
|
129 |
+
logger.info("Model pre-loading optimization completed")
|
130 |
+
|
131 |
+
except Exception as e:
|
132 |
+
logger.error(f"Error in model loading optimization: {str(e)}")
|
133 |
+
|
134 |
+
def optimize_image_processing():
|
135 |
+
"""Optimize image processing for better performance"""
|
136 |
+
try:
|
137 |
+
from PIL import Image
|
138 |
+
import io
|
139 |
+
|
140 |
+
# Set PIL to use optimized settings
|
141 |
+
Image.MAX_IMAGE_PIXELS = None # Allow large images
|
142 |
+
|
143 |
+
# Optimize JPEG quality for faster processing
|
144 |
+
def optimize_image(img, max_size=1024):
|
145 |
+
"""Optimize image for faster processing"""
|
146 |
+
if max(img.size) > max_size:
|
147 |
+
img.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
|
148 |
+
return img
|
149 |
+
|
150 |
+
return optimize_image
|
151 |
+
|
152 |
+
except Exception as e:
|
153 |
+
logger.error(f"Error in image processing optimization: {str(e)}")
|
154 |
+
return lambda img, max_size=1024: img
|
155 |
+
|
156 |
+
def get_performance_metrics():
|
157 |
+
"""Get current performance metrics"""
|
158 |
+
import psutil
|
159 |
+
import os
|
160 |
+
|
161 |
+
try:
|
162 |
+
process = psutil.Process(os.getpid())
|
163 |
+
memory_info = process.memory_info()
|
164 |
+
|
165 |
+
return {
|
166 |
+
'memory_usage_mb': memory_info.rss / 1024 / 1024,
|
167 |
+
'cpu_percent': process.cpu_percent(),
|
168 |
+
'cache_stats': performance_optimizer.get_cache_stats(),
|
169 |
+
'thread_count': threading.active_count()
|
170 |
+
}
|
171 |
+
except Exception as e:
|
172 |
+
logger.error(f"Error getting performance metrics: {str(e)}")
|
173 |
+
return {
|
174 |
+
'memory_usage_mb': 0,
|
175 |
+
'cpu_percent': 0,
|
176 |
+
'cache_stats': {},
|
177 |
+
'thread_count': 0
|
178 |
+
}
|
models/property_summary.py
CHANGED
@@ -273,16 +273,102 @@ def generate_fallback_summary(data):
|
|
273 |
def generate_property_summary(data):
|
274 |
"""Main function to generate property summary using SLM"""
|
275 |
try:
|
276 |
-
#
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
283 |
except Exception as e:
|
284 |
logger.error(f"Error generating property summary: {str(e)}")
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
273 |
def generate_property_summary(data):
|
274 |
"""Main function to generate property summary using SLM"""
|
275 |
try:
|
276 |
+
# Validate input data
|
277 |
+
if not data or not isinstance(data, dict):
|
278 |
+
return "A beautiful property with excellent features and prime location. Contact us for detailed information and exclusive offers."
|
279 |
+
|
280 |
+
# Create a more robust fallback summary for any data
|
281 |
+
try:
|
282 |
+
# Use the new dynamic SLM-based approach
|
283 |
+
summary = generate_dynamic_summary_with_slm(data)
|
284 |
+
|
285 |
+
# Ensure summary is a proper string
|
286 |
+
if not summary or not isinstance(summary, str):
|
287 |
+
summary = generate_fallback_summary(data)
|
288 |
+
|
289 |
+
if not summary or not summary.strip():
|
290 |
+
summary = generate_fallback_summary(data)
|
291 |
+
|
292 |
+
# Final fallback - always return something meaningful
|
293 |
+
if not summary or not summary.strip():
|
294 |
+
summary = "A beautiful property with excellent features and prime location. Contact us for detailed information and exclusive offers."
|
295 |
+
|
296 |
+
# Ensure it's a string and clean it up
|
297 |
+
summary = str(summary).strip()
|
298 |
+
if summary == '[object Object]' or summary == 'null' or summary == 'undefined':
|
299 |
+
summary = generate_fallback_summary(data)
|
300 |
+
|
301 |
+
# If still no valid summary, create a basic one
|
302 |
+
if not summary or len(summary) < 50:
|
303 |
+
summary = create_basic_summary(data)
|
304 |
+
|
305 |
+
return summary
|
306 |
+
|
307 |
+
except Exception as e:
|
308 |
+
logger.error(f"Error in summary generation: {str(e)}")
|
309 |
+
return create_basic_summary(data)
|
310 |
+
|
311 |
except Exception as e:
|
312 |
logger.error(f"Error generating property summary: {str(e)}")
|
313 |
+
return "A beautiful property with excellent features and prime location. Contact us for detailed information and exclusive offers."
|
314 |
+
|
315 |
+
def create_basic_summary(data):
|
316 |
+
"""Create a basic summary even for invalid data"""
|
317 |
+
try:
|
318 |
+
# Extract basic information
|
319 |
+
property_type = data.get('property_type', 'Property')
|
320 |
+
city = data.get('city', 'Prime Location')
|
321 |
+
state = data.get('state', '')
|
322 |
+
bedrooms = data.get('bedrooms', '')
|
323 |
+
bathrooms = data.get('bathrooms', '')
|
324 |
+
sq_ft = data.get('sq_ft', '')
|
325 |
+
market_value = data.get('market_value', '')
|
326 |
+
|
327 |
+
# Create location string
|
328 |
+
location_parts = []
|
329 |
+
if city:
|
330 |
+
location_parts.append(city)
|
331 |
+
if state:
|
332 |
+
location_parts.append(state)
|
333 |
+
location = ', '.join(location_parts) if location_parts else 'Prime Location'
|
334 |
+
|
335 |
+
# Create features string
|
336 |
+
features = []
|
337 |
+
if bedrooms:
|
338 |
+
features.append(f"{bedrooms} bedroom{'s' if str(bedrooms) != '1' else ''}")
|
339 |
+
if bathrooms:
|
340 |
+
features.append(f"{bathrooms} bathroom{'s' if str(bathrooms) != '1' else ''}")
|
341 |
+
if sq_ft:
|
342 |
+
features.append(f"{sq_ft} sq. ft.")
|
343 |
+
|
344 |
+
features_str = ', '.join(features) if features else 'excellent features'
|
345 |
+
|
346 |
+
# Create price string
|
347 |
+
price_str = ""
|
348 |
+
if market_value:
|
349 |
+
try:
|
350 |
+
price_val = float(str(market_value).replace(',', '').replace('₹', ''))
|
351 |
+
if price_val > 0:
|
352 |
+
price_str = f" at ₹{price_val:,.0f}"
|
353 |
+
except:
|
354 |
+
pass
|
355 |
+
|
356 |
+
# Create property name - use a generic name if the original is invalid
|
357 |
+
property_name = data.get('property_name', '')
|
358 |
+
if property_name in ['2', '0', '1', 'test', 'sample', 'dummy'] or len(str(property_name)) < 3:
|
359 |
+
property_name = f"Beautiful {property_type}"
|
360 |
+
|
361 |
+
# Build the summary
|
362 |
+
summary_parts = [
|
363 |
+
f"Discover this exceptional {property_type.lower()} located in {location}.",
|
364 |
+
f"This property features {features_str} and offers excellent value for money.",
|
365 |
+
f"Perfect for families and investors alike, this property combines modern amenities with strategic location.",
|
366 |
+
f"Don't miss this opportunity to own a piece of prime real estate{price_str}.",
|
367 |
+
"Contact us today for a detailed viewing and exclusive offers."
|
368 |
+
]
|
369 |
+
|
370 |
+
return " ".join(summary_parts)
|
371 |
+
|
372 |
+
except Exception as e:
|
373 |
+
logger.error(f"Error creating basic summary: {str(e)}")
|
374 |
+
return "A beautiful property with excellent features and prime location. Contact us for detailed information and exclusive offers."
|
models/trust_score.py
CHANGED
@@ -9,7 +9,7 @@ def generate_trust_score(text, image_analysis, pdf_analysis):
|
|
9 |
classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")
|
10 |
except Exception as e:
|
11 |
logger.error(f"Error loading model in trust score: {str(e)}")
|
12 |
-
return
|
13 |
aspects = [
|
14 |
"complete information provided",
|
15 |
"verified location",
|
@@ -24,16 +24,16 @@ def generate_trust_score(text, image_analysis, pdf_analysis):
|
|
24 |
result = classifier(str(text)[:1000], aspects, multi_label=True)
|
25 |
except Exception as e:
|
26 |
logger.error(f"Error in trust score model inference: {str(e)}")
|
27 |
-
return
|
28 |
|
29 |
-
#
|
30 |
weights = {
|
31 |
-
"complete information provided": 0.
|
32 |
"verified location": 0.20,
|
33 |
"consistent data": 0.15,
|
34 |
"authentic documents": 0.15,
|
35 |
"authentic images": 0.10,
|
36 |
-
"reasonable pricing": 0.
|
37 |
"verified ownership": 0.05,
|
38 |
"proper documentation": 0.05
|
39 |
}
|
@@ -41,88 +41,97 @@ def generate_trust_score(text, image_analysis, pdf_analysis):
|
|
41 |
score = 0
|
42 |
reasoning_parts = []
|
43 |
|
44 |
-
#
|
45 |
for label, confidence in zip(result['labels'], result['scores']):
|
46 |
adjusted_confidence = confidence
|
47 |
|
48 |
-
#
|
49 |
if label == "authentic documents":
|
50 |
if not pdf_analysis or len(pdf_analysis) == 0:
|
51 |
-
adjusted_confidence = 0.
|
52 |
else:
|
53 |
doc_scores = [p.get('verification_score', 0) for p in pdf_analysis]
|
54 |
adjusted_confidence = sum(doc_scores) / max(1, len(doc_scores))
|
55 |
-
#
|
56 |
-
if any(score < 0.
|
57 |
-
adjusted_confidence *= 0.
|
58 |
-
#
|
59 |
if len(doc_scores) < 2:
|
60 |
-
adjusted_confidence *= 0.
|
61 |
|
62 |
-
#
|
63 |
elif label == "authentic images":
|
64 |
if not image_analysis or len(image_analysis) == 0:
|
65 |
-
adjusted_confidence = 0.
|
66 |
else:
|
67 |
img_scores = [i.get('authenticity_score', 0) for i in image_analysis]
|
68 |
adjusted_confidence = sum(img_scores) / max(1, len(img_scores))
|
69 |
-
#
|
70 |
-
if any(score < 0.
|
71 |
-
adjusted_confidence *= 0.
|
72 |
-
#
|
73 |
if any(i.get('is_ai_generated', False) for i in image_analysis):
|
74 |
-
adjusted_confidence *= 0.
|
75 |
-
#
|
76 |
if any(not i.get('is_property_related', False) for i in image_analysis):
|
77 |
-
adjusted_confidence *= 0.
|
78 |
|
79 |
-
#
|
80 |
elif label == "consistent data":
|
81 |
# Check for inconsistencies in the data
|
82 |
if "inconsistent" in text.lower() or "suspicious" in text.lower():
|
83 |
-
adjusted_confidence *= 0.
|
84 |
# Check for impossible values
|
85 |
if "impossible" in text.lower() or "invalid" in text.lower():
|
86 |
-
adjusted_confidence *= 0.
|
87 |
# Check for missing critical information
|
88 |
if "missing" in text.lower() or "not provided" in text.lower():
|
89 |
-
adjusted_confidence *= 0.
|
90 |
|
91 |
-
#
|
92 |
elif label == "complete information provided":
|
93 |
# Check for missing critical information
|
94 |
-
if len(text) <
|
95 |
-
adjusted_confidence *= 0.
|
96 |
# Check for vague or generic descriptions
|
97 |
if "generic" in text.lower() or "vague" in text.lower():
|
98 |
-
adjusted_confidence *= 0.
|
99 |
# Check for suspiciously short descriptions
|
100 |
-
if len(text) <
|
101 |
-
adjusted_confidence *= 0.
|
102 |
|
103 |
score += adjusted_confidence * weights.get(label, 0.1)
|
104 |
reasoning_parts.append(f"{label} ({adjusted_confidence:.0%})")
|
105 |
|
106 |
-
# Apply
|
107 |
if "suspicious" in text.lower() or "fraudulent" in text.lower():
|
108 |
-
score *= 0.
|
109 |
|
110 |
-
# Apply penalties for suspiciously low values
|
111 |
if "suspiciously low" in text.lower() or "unusually small" in text.lower():
|
112 |
-
score *= 0.
|
113 |
|
114 |
-
# Apply penalties for inconsistencies
|
115 |
if "inconsistent" in text.lower() or "mismatch" in text.lower():
|
116 |
-
score *= 0.
|
117 |
|
118 |
-
# Apply penalties for missing critical information
|
119 |
if "missing critical" in text.lower() or "incomplete" in text.lower():
|
120 |
-
score *= 0.
|
|
|
|
|
|
|
|
|
121 |
|
122 |
# Ensure score is between 0 and 100
|
123 |
score = min(100, max(0, int(score * 100)))
|
|
|
|
|
|
|
|
|
|
|
124 |
reasoning = f"Based on: {', '.join(reasoning_parts)}"
|
125 |
return score, reasoning
|
126 |
except Exception as e:
|
127 |
logger.error(f"Error generating trust score: {str(e)}")
|
128 |
-
return
|
|
|
9 |
classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")
|
10 |
except Exception as e:
|
11 |
logger.error(f"Error loading model in trust score: {str(e)}")
|
12 |
+
return 35, f"Model loading error: {str(e)}"
|
13 |
aspects = [
|
14 |
"complete information provided",
|
15 |
"verified location",
|
|
|
24 |
result = classifier(str(text)[:1000], aspects, multi_label=True)
|
25 |
except Exception as e:
|
26 |
logger.error(f"Error in trust score model inference: {str(e)}")
|
27 |
+
return 35, f"Model inference error: {str(e)}"
|
28 |
|
29 |
+
# More balanced weights
|
30 |
weights = {
|
31 |
+
"complete information provided": 0.20,
|
32 |
"verified location": 0.20,
|
33 |
"consistent data": 0.15,
|
34 |
"authentic documents": 0.15,
|
35 |
"authentic images": 0.10,
|
36 |
+
"reasonable pricing": 0.10,
|
37 |
"verified ownership": 0.05,
|
38 |
"proper documentation": 0.05
|
39 |
}
|
|
|
41 |
score = 0
|
42 |
reasoning_parts = []
|
43 |
|
44 |
+
# More reasonable scoring for each aspect
|
45 |
for label, confidence in zip(result['labels'], result['scores']):
|
46 |
adjusted_confidence = confidence
|
47 |
|
48 |
+
# Document verification
|
49 |
if label == "authentic documents":
|
50 |
if not pdf_analysis or len(pdf_analysis) == 0:
|
51 |
+
adjusted_confidence = 0.3 # Base score for no documents
|
52 |
else:
|
53 |
doc_scores = [p.get('verification_score', 0) for p in pdf_analysis]
|
54 |
adjusted_confidence = sum(doc_scores) / max(1, len(doc_scores))
|
55 |
+
# Moderate penalty for low verification scores
|
56 |
+
if any(score < 0.5 for score in doc_scores):
|
57 |
+
adjusted_confidence *= 0.7
|
58 |
+
# Small penalty for missing documents
|
59 |
if len(doc_scores) < 2:
|
60 |
+
adjusted_confidence *= 0.8
|
61 |
|
62 |
+
# Image verification
|
63 |
elif label == "authentic images":
|
64 |
if not image_analysis or len(image_analysis) == 0:
|
65 |
+
adjusted_confidence = 0.3 # Base score for no images
|
66 |
else:
|
67 |
img_scores = [i.get('authenticity_score', 0) for i in image_analysis]
|
68 |
adjusted_confidence = sum(img_scores) / max(1, len(img_scores))
|
69 |
+
# Moderate penalty for low authenticity scores
|
70 |
+
if any(score < 0.6 for score in img_scores):
|
71 |
+
adjusted_confidence *= 0.7
|
72 |
+
# Small penalty for AI-generated images
|
73 |
if any(i.get('is_ai_generated', False) for i in image_analysis):
|
74 |
+
adjusted_confidence *= 0.8
|
75 |
+
# Small penalty for non-property related images
|
76 |
if any(not i.get('is_property_related', False) for i in image_analysis):
|
77 |
+
adjusted_confidence *= 0.8
|
78 |
|
79 |
+
# Consistency check
|
80 |
elif label == "consistent data":
|
81 |
# Check for inconsistencies in the data
|
82 |
if "inconsistent" in text.lower() or "suspicious" in text.lower():
|
83 |
+
adjusted_confidence *= 0.6
|
84 |
# Check for impossible values
|
85 |
if "impossible" in text.lower() or "invalid" in text.lower():
|
86 |
+
adjusted_confidence *= 0.5
|
87 |
# Check for missing critical information
|
88 |
if "missing" in text.lower() or "not provided" in text.lower():
|
89 |
+
adjusted_confidence *= 0.7
|
90 |
|
91 |
+
# Completeness check
|
92 |
elif label == "complete information provided":
|
93 |
# Check for missing critical information
|
94 |
+
if len(text) < 200 or "not provided" in text.lower() or "missing" in text.lower():
|
95 |
+
adjusted_confidence *= 0.7
|
96 |
# Check for vague or generic descriptions
|
97 |
if "generic" in text.lower() or "vague" in text.lower():
|
98 |
+
adjusted_confidence *= 0.8
|
99 |
# Check for suspiciously short descriptions
|
100 |
+
if len(text) < 100:
|
101 |
+
adjusted_confidence *= 0.6
|
102 |
|
103 |
score += adjusted_confidence * weights.get(label, 0.1)
|
104 |
reasoning_parts.append(f"{label} ({adjusted_confidence:.0%})")
|
105 |
|
106 |
+
# Apply moderate penalties for suspicious patterns
|
107 |
if "suspicious" in text.lower() or "fraudulent" in text.lower():
|
108 |
+
score *= 0.7
|
109 |
|
110 |
+
# Apply moderate penalties for suspiciously low values
|
111 |
if "suspiciously low" in text.lower() or "unusually small" in text.lower():
|
112 |
+
score *= 0.8
|
113 |
|
114 |
+
# Apply moderate penalties for inconsistencies
|
115 |
if "inconsistent" in text.lower() or "mismatch" in text.lower():
|
116 |
+
score *= 0.8
|
117 |
|
118 |
+
# Apply moderate penalties for missing critical information
|
119 |
if "missing critical" in text.lower() or "incomplete" in text.lower():
|
120 |
+
score *= 0.8
|
121 |
+
|
122 |
+
# Ensure minimum score for any valid data
|
123 |
+
if score < 0.1:
|
124 |
+
score = 0.1 # Minimum 10% score for any data
|
125 |
|
126 |
# Ensure score is between 0 and 100
|
127 |
score = min(100, max(0, int(score * 100)))
|
128 |
+
|
129 |
+
# Ensure minimum score of 25% for any valid data
|
130 |
+
if score < 25:
|
131 |
+
score = 25
|
132 |
+
|
133 |
reasoning = f"Based on: {', '.join(reasoning_parts)}"
|
134 |
return score, reasoning
|
135 |
except Exception as e:
|
136 |
logger.error(f"Error generating trust score: {str(e)}")
|
137 |
+
return 35, "Could not assess trust."
|