sksameermujahid commited on
Commit
1049797
·
verified ·
1 Parent(s): 9a51c57

Upload 21 files

Browse files
models/parallel_processor.py ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # models/parallel_processor.py
2
+
3
+ import multiprocessing as mp
4
+ import concurrent.futures
5
+ import asyncio
6
+ import threading
7
+ from functools import partial
8
+ from typing import Dict, Any, List, Tuple
9
+ from .logging_config import logger
10
+
11
+ class ParallelProcessor:
12
+ """Handles parallel processing of property verification analyses"""
13
+
14
+ def __init__(self, max_workers=None):
15
+ self.max_workers = max_workers or min(mp.cpu_count(), 8)
16
+ self.thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers)
17
+ self.process_pool = concurrent.futures.ProcessPoolExecutor(max_workers=min(4, mp.cpu_count()))
18
+
19
+ def __del__(self):
20
+ self.thread_pool.shutdown(wait=True)
21
+ self.process_pool.shutdown(wait=True)
22
+
23
+ def process_images_parallel(self, image_files):
24
+ """Process multiple images in parallel"""
25
+ try:
26
+ with concurrent.futures.ThreadPoolExecutor(max_workers=min(4, len(image_files))) as executor:
27
+ futures = []
28
+ for img_file in image_files:
29
+ future = executor.submit(self._process_single_image, img_file)
30
+ futures.append(future)
31
+
32
+ results = []
33
+ for future in concurrent.futures.as_completed(futures):
34
+ try:
35
+ result = future.result(timeout=30)
36
+ results.append(result)
37
+ except Exception as e:
38
+ logger.error(f"Error processing image: {str(e)}")
39
+ results.append({'error': str(e), 'is_property_related': False})
40
+
41
+ return results
42
+ except Exception as e:
43
+ logger.error(f"Error in parallel image processing: {str(e)}")
44
+ return []
45
+
46
+ def _process_single_image(self, img_file):
47
+ """Process a single image"""
48
+ try:
49
+ from PIL import Image
50
+ import base64
51
+ import io
52
+ from .image_analysis import analyze_image
53
+
54
+ img = Image.open(img_file)
55
+ buffered = io.BytesIO()
56
+ img.save(buffered, format="JPEG")
57
+ img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
58
+
59
+ analysis = analyze_image(img)
60
+ return {
61
+ 'image_data': img_str,
62
+ 'analysis': analysis
63
+ }
64
+ except Exception as e:
65
+ logger.error(f"Error processing image {img_file.filename}: {str(e)}")
66
+ return {'error': str(e), 'is_property_related': False}
67
+
68
+ def process_pdfs_parallel(self, pdf_files):
69
+ """Process multiple PDFs in parallel"""
70
+ try:
71
+ with concurrent.futures.ThreadPoolExecutor(max_workers=min(4, len(pdf_files))) as executor:
72
+ futures = []
73
+ for pdf_file in pdf_files:
74
+ future = executor.submit(self._process_single_pdf, pdf_file)
75
+ futures.append(future)
76
+
77
+ results = []
78
+ for future in concurrent.futures.as_completed(futures):
79
+ try:
80
+ result = future.result(timeout=60)
81
+ results.append(result)
82
+ except Exception as e:
83
+ logger.error(f"Error processing PDF: {str(e)}")
84
+ results.append({'error': str(e)})
85
+
86
+ return results
87
+ except Exception as e:
88
+ logger.error(f"Error in parallel PDF processing: {str(e)}")
89
+ return []
90
+
91
+ def _process_single_pdf(self, pdf_file):
92
+ """Process a single PDF"""
93
+ try:
94
+ from .pdf_analysis import extract_pdf_text, analyze_pdf_content
95
+
96
+ pdf_text = extract_pdf_text(pdf_file)
97
+ analysis = analyze_pdf_content(pdf_text, {})
98
+
99
+ return {
100
+ 'filename': pdf_file.filename,
101
+ 'text': pdf_text,
102
+ 'analysis': analysis
103
+ }
104
+ except Exception as e:
105
+ logger.error(f"Error processing PDF {pdf_file.filename}: {str(e)}")
106
+ return {'error': str(e)}
107
+
108
+ async def run_analyses_parallel(self, data, consolidated_text, image_analysis, pdf_analysis):
109
+ """Run all analyses in parallel using asyncio and thread pools"""
110
+ try:
111
+ # Prepare property data for price analysis
112
+ property_data = self._prepare_property_data(data)
113
+ price_context = self._create_price_context(data)
114
+
115
+ # Define analysis tasks with their respective functions
116
+ analysis_tasks = [
117
+ ('summary', self._run_summary_analysis, data),
118
+ ('fraud', self._run_fraud_analysis, consolidated_text, data),
119
+ ('trust', self._run_trust_analysis, consolidated_text, image_analysis, pdf_analysis),
120
+ ('suggestions', self._run_suggestions_analysis, consolidated_text, data),
121
+ ('quality', self._run_quality_analysis, data.get('description_translated', '')),
122
+ ('address', self._run_address_analysis, data),
123
+ ('cross_validation', self._run_cross_validation_analysis, data),
124
+ ('location', self._run_location_analysis, data),
125
+ ('price', self._run_price_analysis, data, price_context, property_data),
126
+ ('legal', self._run_legal_analysis, data.get('legal_details', '')),
127
+ ('specs', self._run_specs_analysis, data),
128
+ ('market', self._run_market_analysis, data)
129
+ ]
130
+
131
+ # Run tasks in parallel with timeout
132
+ loop = asyncio.get_event_loop()
133
+ tasks = []
134
+
135
+ for task_name, func, *args in analysis_tasks:
136
+ task = loop.run_in_executor(
137
+ self.thread_pool,
138
+ func,
139
+ *args
140
+ )
141
+ tasks.append((task_name, task))
142
+
143
+ # Wait for all tasks to complete with timeout
144
+ results = {}
145
+ for task_name, task in tasks:
146
+ try:
147
+ result = await asyncio.wait_for(task, timeout=120) # 2 minutes timeout per task
148
+ results[task_name] = result
149
+ except asyncio.TimeoutError:
150
+ logger.error(f"Task {task_name} timed out")
151
+ results[task_name] = self._get_error_result(f"Task {task_name} timed out")
152
+ except Exception as e:
153
+ logger.error(f"Task {task_name} failed: {str(e)}")
154
+ results[task_name] = self._get_error_result(f"Task {task_name} failed: {str(e)}")
155
+
156
+ return results
157
+
158
+ except Exception as e:
159
+ logger.error(f"Error in parallel analyses: {str(e)}")
160
+ return self._get_all_error_results(str(e))
161
+
162
+ def _prepare_property_data(self, data):
163
+ """Prepare property data for price analysis"""
164
+ property_data = {}
165
+ try:
166
+ if data.get('sq_ft'):
167
+ property_data['size'] = float(data['sq_ft'])
168
+ if data.get('market_value'):
169
+ property_data['price'] = float(data['market_value'].replace('₹', '').replace(',', ''))
170
+ if data.get('year_built'):
171
+ from datetime import datetime
172
+ current_year = datetime.now().year
173
+ property_data['property_age'] = current_year - int(data['year_built'])
174
+ except Exception as e:
175
+ logger.warning(f"Error preparing property data: {str(e)}")
176
+ return property_data
177
+
178
+ def _create_price_context(self, data):
179
+ """Create context text for price analysis"""
180
+ return f"""
181
+ Property: {data.get('property_name', '')}
182
+ Type: {data.get('property_type', '')}
183
+ Location: {data.get('address', '')}, {data.get('city', '')}, {data.get('state', '')}
184
+ Size: {data.get('sq_ft', '')} sq ft
185
+ Market Value: ₹{data.get('market_value', '')}
186
+ Description: {data.get('description', '')}
187
+ Amenities: {data.get('amenities', '')}
188
+ """
189
+
190
+ def _run_summary_analysis(self, data):
191
+ """Run property summary analysis"""
192
+ try:
193
+ from .property_summary import generate_property_summary
194
+ return generate_property_summary(data)
195
+ except Exception as e:
196
+ logger.error(f"Error in summary analysis: {str(e)}")
197
+ return "Property summary unavailable."
198
+
199
+ def _run_fraud_analysis(self, consolidated_text, data):
200
+ """Run fraud classification analysis"""
201
+ try:
202
+ from .fraud_classification import classify_fraud
203
+ return classify_fraud(data, consolidated_text)
204
+ except Exception as e:
205
+ logger.error(f"Error in fraud analysis: {str(e)}")
206
+ return self._get_error_result("Fraud analysis failed")
207
+
208
+ def _run_trust_analysis(self, consolidated_text, image_analysis, pdf_analysis):
209
+ """Run trust score analysis"""
210
+ try:
211
+ from .trust_score import generate_trust_score
212
+ return generate_trust_score(consolidated_text, image_analysis, pdf_analysis)
213
+ except Exception as e:
214
+ logger.error(f"Error in trust analysis: {str(e)}")
215
+ return (0.0, "Trust analysis failed")
216
+
217
+ def _run_suggestions_analysis(self, consolidated_text, data):
218
+ """Run suggestions analysis"""
219
+ try:
220
+ from .suggestions import generate_suggestions
221
+ return generate_suggestions(consolidated_text, data)
222
+ except Exception as e:
223
+ logger.error(f"Error in suggestions analysis: {str(e)}")
224
+ return self._get_error_result("Suggestions analysis failed")
225
+
226
+ def _run_quality_analysis(self, description):
227
+ """Run text quality analysis"""
228
+ try:
229
+ from .text_quality import assess_text_quality
230
+ return assess_text_quality(description)
231
+ except Exception as e:
232
+ logger.error(f"Error in quality analysis: {str(e)}")
233
+ return self._get_error_result("Quality analysis failed")
234
+
235
+ def _run_address_analysis(self, data):
236
+ """Run address verification analysis"""
237
+ try:
238
+ from .address_verification import verify_address
239
+ return verify_address(data)
240
+ except Exception as e:
241
+ logger.error(f"Error in address analysis: {str(e)}")
242
+ return self._get_error_result("Address analysis failed")
243
+
244
+ def _run_cross_validation_analysis(self, data):
245
+ """Run cross validation analysis"""
246
+ try:
247
+ from .cross_validation import perform_cross_validation
248
+ return perform_cross_validation(data)
249
+ except Exception as e:
250
+ logger.error(f"Error in cross validation analysis: {str(e)}")
251
+ return self._get_error_result("Cross validation analysis failed")
252
+
253
+ def _run_location_analysis(self, data):
254
+ """Run location analysis"""
255
+ try:
256
+ from .location_analysis import analyze_location
257
+ return analyze_location(data)
258
+ except Exception as e:
259
+ logger.error(f"Error in location analysis: {str(e)}")
260
+ return self._get_error_result("Location analysis failed")
261
+
262
+ def _run_price_analysis(self, data, price_context, property_data):
263
+ """Run price analysis"""
264
+ try:
265
+ from .price_analysis import analyze_price
266
+ return analyze_price(data, price_context, data.get('latitude'), data.get('longitude'), property_data)
267
+ except Exception as e:
268
+ logger.error(f"Error in price analysis: {str(e)}")
269
+ return self._get_error_result("Price analysis failed")
270
+
271
+ def _run_legal_analysis(self, legal_details):
272
+ """Run legal analysis"""
273
+ try:
274
+ from .legal_analysis import analyze_legal_details
275
+ return analyze_legal_details(legal_details)
276
+ except Exception as e:
277
+ logger.error(f"Error in legal analysis: {str(e)}")
278
+ return self._get_error_result("Legal analysis failed")
279
+
280
+ def _run_specs_analysis(self, data):
281
+ """Run property specs analysis"""
282
+ try:
283
+ from .property_specs import verify_property_specs
284
+ return verify_property_specs(data)
285
+ except Exception as e:
286
+ logger.error(f"Error in specs analysis: {str(e)}")
287
+ return self._get_error_result("Specs analysis failed")
288
+
289
+ def _run_market_analysis(self, data):
290
+ """Run market value analysis"""
291
+ try:
292
+ from .market_value import analyze_market_value
293
+ return analyze_market_value(data)
294
+ except Exception as e:
295
+ logger.error(f"Error in market analysis: {str(e)}")
296
+ return self._get_error_result("Market analysis failed")
297
+
298
+ def _get_error_result(self, error_message):
299
+ """Get a standardized error result"""
300
+ return {
301
+ 'error': error_message,
302
+ 'status': 'error',
303
+ 'confidence': 0.0
304
+ }
305
+
306
+ def _get_all_error_results(self, error_message):
307
+ """Get error results for all analyses"""
308
+ return {
309
+ 'summary': "Analysis failed",
310
+ 'fraud': self._get_error_result(error_message),
311
+ 'trust': (0.0, error_message),
312
+ 'suggestions': self._get_error_result(error_message),
313
+ 'quality': self._get_error_result(error_message),
314
+ 'address': self._get_error_result(error_message),
315
+ 'cross_validation': self._get_error_result(error_message),
316
+ 'location': self._get_error_result(error_message),
317
+ 'price': self._get_error_result(error_message),
318
+ 'legal': self._get_error_result(error_message),
319
+ 'specs': self._get_error_result(error_message),
320
+ 'market': self._get_error_result(error_message)
321
+ }
322
+
323
+ # Global instance for easy import
324
+ parallel_processor = ParallelProcessor()
models/performance_optimizer.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # models/performance_optimizer.py
2
+
3
+ import functools
4
+ import time
5
+ import threading
6
+ from typing import Dict, Any, Optional
7
+ from .logging_config import logger
8
+
9
+ class PerformanceOptimizer:
10
+ """Performance optimization utilities for the property verification system"""
11
+
12
+ def __init__(self):
13
+ self._cache = {}
14
+ self._cache_lock = threading.Lock()
15
+ self._cache_ttl = 300 # 5 minutes cache TTL
16
+ self._cache_timestamps = {}
17
+
18
+ def cache_result(self, key: str, result: Any, ttl: int = None) -> None:
19
+ """Cache a result with TTL"""
20
+ with self._cache_lock:
21
+ self._cache[key] = result
22
+ self._cache_timestamps[key] = time.time() + (ttl or self._cache_ttl)
23
+
24
+ def get_cached_result(self, key: str) -> Optional[Any]:
25
+ """Get cached result if not expired"""
26
+ with self._cache_lock:
27
+ if key in self._cache:
28
+ if time.time() < self._cache_timestamps.get(key, 0):
29
+ return self._cache[key]
30
+ else:
31
+ # Remove expired cache entry
32
+ del self._cache[key]
33
+ if key in self._cache_timestamps:
34
+ del self._cache_timestamps[key]
35
+ return None
36
+
37
+ def clear_cache(self) -> None:
38
+ """Clear all cached results"""
39
+ with self._cache_lock:
40
+ self._cache.clear()
41
+ self._cache_timestamps.clear()
42
+
43
+ def get_cache_stats(self) -> Dict[str, Any]:
44
+ """Get cache statistics"""
45
+ with self._cache_lock:
46
+ return {
47
+ 'cache_size': len(self._cache),
48
+ 'cache_keys': list(self._cache.keys()),
49
+ 'cache_ttl': self._cache_ttl
50
+ }
51
+
52
+ # Global performance optimizer instance
53
+ performance_optimizer = PerformanceOptimizer()
54
+
55
+ def timed_function(func):
56
+ """Decorator to time function execution"""
57
+ @functools.wraps(func)
58
+ def wrapper(*args, **kwargs):
59
+ start_time = time.time()
60
+ try:
61
+ result = func(*args, **kwargs)
62
+ execution_time = time.time() - start_time
63
+ logger.info(f"{func.__name__} executed in {execution_time:.2f} seconds")
64
+ return result
65
+ except Exception as e:
66
+ execution_time = time.time() - start_time
67
+ logger.error(f"{func.__name__} failed after {execution_time:.2f} seconds: {str(e)}")
68
+ raise
69
+ return wrapper
70
+
71
+ def cached_function(ttl: int = 300):
72
+ """Decorator to cache function results"""
73
+ def decorator(func):
74
+ @functools.wraps(func)
75
+ def wrapper(*args, **kwargs):
76
+ # Create cache key from function name and arguments
77
+ cache_key = f"{func.__name__}:{hash(str(args) + str(sorted(kwargs.items())))}"
78
+
79
+ # Try to get cached result
80
+ cached_result = performance_optimizer.get_cached_result(cache_key)
81
+ if cached_result is not None:
82
+ logger.debug(f"Cache hit for {func.__name__}")
83
+ return cached_result
84
+
85
+ # Execute function and cache result
86
+ result = func(*args, **kwargs)
87
+ performance_optimizer.cache_result(cache_key, result, ttl)
88
+ logger.debug(f"Cached result for {func.__name__}")
89
+ return result
90
+ return wrapper
91
+ return decorator
92
+
93
+ def optimize_model_loading():
94
+ """Optimize model loading for better performance"""
95
+ try:
96
+ from .model_loader import load_model
97
+
98
+ # Pre-load models in background threads
99
+ import concurrent.futures
100
+ import threading
101
+
102
+ def load_model_async(model_name):
103
+ try:
104
+ model = load_model(model_name)
105
+ logger.info(f"Pre-loaded model: {model_name}")
106
+ return model
107
+ except Exception as e:
108
+ logger.warning(f"Failed to pre-load model {model_name}: {str(e)}")
109
+ return None
110
+
111
+ # Load models in parallel
112
+ with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
113
+ model_names = [
114
+ "zero-shot-classification",
115
+ "summarization",
116
+ "text-classification",
117
+ "text-generation"
118
+ ]
119
+
120
+ futures = {executor.submit(load_model_async, name): name for name in model_names}
121
+
122
+ for future in concurrent.futures.as_completed(futures, timeout=60):
123
+ model_name = futures[future]
124
+ try:
125
+ future.result()
126
+ except Exception as e:
127
+ logger.error(f"Error pre-loading {model_name}: {str(e)}")
128
+
129
+ logger.info("Model pre-loading optimization completed")
130
+
131
+ except Exception as e:
132
+ logger.error(f"Error in model loading optimization: {str(e)}")
133
+
134
+ def optimize_image_processing():
135
+ """Optimize image processing for better performance"""
136
+ try:
137
+ from PIL import Image
138
+ import io
139
+
140
+ # Set PIL to use optimized settings
141
+ Image.MAX_IMAGE_PIXELS = None # Allow large images
142
+
143
+ # Optimize JPEG quality for faster processing
144
+ def optimize_image(img, max_size=1024):
145
+ """Optimize image for faster processing"""
146
+ if max(img.size) > max_size:
147
+ img.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
148
+ return img
149
+
150
+ return optimize_image
151
+
152
+ except Exception as e:
153
+ logger.error(f"Error in image processing optimization: {str(e)}")
154
+ return lambda img, max_size=1024: img
155
+
156
+ def get_performance_metrics():
157
+ """Get current performance metrics"""
158
+ import psutil
159
+ import os
160
+
161
+ try:
162
+ process = psutil.Process(os.getpid())
163
+ memory_info = process.memory_info()
164
+
165
+ return {
166
+ 'memory_usage_mb': memory_info.rss / 1024 / 1024,
167
+ 'cpu_percent': process.cpu_percent(),
168
+ 'cache_stats': performance_optimizer.get_cache_stats(),
169
+ 'thread_count': threading.active_count()
170
+ }
171
+ except Exception as e:
172
+ logger.error(f"Error getting performance metrics: {str(e)}")
173
+ return {
174
+ 'memory_usage_mb': 0,
175
+ 'cpu_percent': 0,
176
+ 'cache_stats': {},
177
+ 'thread_count': 0
178
+ }
models/property_summary.py CHANGED
@@ -273,16 +273,102 @@ def generate_fallback_summary(data):
273
  def generate_property_summary(data):
274
  """Main function to generate property summary using SLM"""
275
  try:
276
- # Use the new dynamic SLM-based approach
277
- summary = generate_dynamic_summary_with_slm(data)
278
- if not summary or not summary.strip():
279
- summary = generate_fallback_summary(data)
280
- if not summary or not summary.strip():
281
- summary = "A beautiful property with excellent features and location. Contact us for more details."
282
- return summary
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  except Exception as e:
284
  logger.error(f"Error generating property summary: {str(e)}")
285
- summary = generate_fallback_summary(data)
286
- if not summary or not summary.strip():
287
- summary = "A beautiful property with excellent features and location. Contact us for more details."
288
- return summary
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  def generate_property_summary(data):
274
  """Main function to generate property summary using SLM"""
275
  try:
276
+ # Validate input data
277
+ if not data or not isinstance(data, dict):
278
+ return "A beautiful property with excellent features and prime location. Contact us for detailed information and exclusive offers."
279
+
280
+ # Create a more robust fallback summary for any data
281
+ try:
282
+ # Use the new dynamic SLM-based approach
283
+ summary = generate_dynamic_summary_with_slm(data)
284
+
285
+ # Ensure summary is a proper string
286
+ if not summary or not isinstance(summary, str):
287
+ summary = generate_fallback_summary(data)
288
+
289
+ if not summary or not summary.strip():
290
+ summary = generate_fallback_summary(data)
291
+
292
+ # Final fallback - always return something meaningful
293
+ if not summary or not summary.strip():
294
+ summary = "A beautiful property with excellent features and prime location. Contact us for detailed information and exclusive offers."
295
+
296
+ # Ensure it's a string and clean it up
297
+ summary = str(summary).strip()
298
+ if summary == '[object Object]' or summary == 'null' or summary == 'undefined':
299
+ summary = generate_fallback_summary(data)
300
+
301
+ # If still no valid summary, create a basic one
302
+ if not summary or len(summary) < 50:
303
+ summary = create_basic_summary(data)
304
+
305
+ return summary
306
+
307
+ except Exception as e:
308
+ logger.error(f"Error in summary generation: {str(e)}")
309
+ return create_basic_summary(data)
310
+
311
  except Exception as e:
312
  logger.error(f"Error generating property summary: {str(e)}")
313
+ return "A beautiful property with excellent features and prime location. Contact us for detailed information and exclusive offers."
314
+
315
+ def create_basic_summary(data):
316
+ """Create a basic summary even for invalid data"""
317
+ try:
318
+ # Extract basic information
319
+ property_type = data.get('property_type', 'Property')
320
+ city = data.get('city', 'Prime Location')
321
+ state = data.get('state', '')
322
+ bedrooms = data.get('bedrooms', '')
323
+ bathrooms = data.get('bathrooms', '')
324
+ sq_ft = data.get('sq_ft', '')
325
+ market_value = data.get('market_value', '')
326
+
327
+ # Create location string
328
+ location_parts = []
329
+ if city:
330
+ location_parts.append(city)
331
+ if state:
332
+ location_parts.append(state)
333
+ location = ', '.join(location_parts) if location_parts else 'Prime Location'
334
+
335
+ # Create features string
336
+ features = []
337
+ if bedrooms:
338
+ features.append(f"{bedrooms} bedroom{'s' if str(bedrooms) != '1' else ''}")
339
+ if bathrooms:
340
+ features.append(f"{bathrooms} bathroom{'s' if str(bathrooms) != '1' else ''}")
341
+ if sq_ft:
342
+ features.append(f"{sq_ft} sq. ft.")
343
+
344
+ features_str = ', '.join(features) if features else 'excellent features'
345
+
346
+ # Create price string
347
+ price_str = ""
348
+ if market_value:
349
+ try:
350
+ price_val = float(str(market_value).replace(',', '').replace('₹', ''))
351
+ if price_val > 0:
352
+ price_str = f" at ₹{price_val:,.0f}"
353
+ except:
354
+ pass
355
+
356
+ # Create property name - use a generic name if the original is invalid
357
+ property_name = data.get('property_name', '')
358
+ if property_name in ['2', '0', '1', 'test', 'sample', 'dummy'] or len(str(property_name)) < 3:
359
+ property_name = f"Beautiful {property_type}"
360
+
361
+ # Build the summary
362
+ summary_parts = [
363
+ f"Discover this exceptional {property_type.lower()} located in {location}.",
364
+ f"This property features {features_str} and offers excellent value for money.",
365
+ f"Perfect for families and investors alike, this property combines modern amenities with strategic location.",
366
+ f"Don't miss this opportunity to own a piece of prime real estate{price_str}.",
367
+ "Contact us today for a detailed viewing and exclusive offers."
368
+ ]
369
+
370
+ return " ".join(summary_parts)
371
+
372
+ except Exception as e:
373
+ logger.error(f"Error creating basic summary: {str(e)}")
374
+ return "A beautiful property with excellent features and prime location. Contact us for detailed information and exclusive offers."
models/trust_score.py CHANGED
@@ -9,7 +9,7 @@ def generate_trust_score(text, image_analysis, pdf_analysis):
9
  classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")
10
  except Exception as e:
11
  logger.error(f"Error loading model in trust score: {str(e)}")
12
- return 20, f"Model loading error: {str(e)}"
13
  aspects = [
14
  "complete information provided",
15
  "verified location",
@@ -24,16 +24,16 @@ def generate_trust_score(text, image_analysis, pdf_analysis):
24
  result = classifier(str(text)[:1000], aspects, multi_label=True)
25
  except Exception as e:
26
  logger.error(f"Error in trust score model inference: {str(e)}")
27
- return 20, f"Model inference error: {str(e)}"
28
 
29
- # Much stricter weights with higher emphasis on critical aspects
30
  weights = {
31
- "complete information provided": 0.25,
32
  "verified location": 0.20,
33
  "consistent data": 0.15,
34
  "authentic documents": 0.15,
35
  "authentic images": 0.10,
36
- "reasonable pricing": 0.05,
37
  "verified ownership": 0.05,
38
  "proper documentation": 0.05
39
  }
@@ -41,88 +41,97 @@ def generate_trust_score(text, image_analysis, pdf_analysis):
41
  score = 0
42
  reasoning_parts = []
43
 
44
- # Much stricter scoring for each aspect
45
  for label, confidence in zip(result['labels'], result['scores']):
46
  adjusted_confidence = confidence
47
 
48
- # Stricter document verification
49
  if label == "authentic documents":
50
  if not pdf_analysis or len(pdf_analysis) == 0:
51
- adjusted_confidence = 0.0
52
  else:
53
  doc_scores = [p.get('verification_score', 0) for p in pdf_analysis]
54
  adjusted_confidence = sum(doc_scores) / max(1, len(doc_scores))
55
- # Heavily penalize if any document has low verification score
56
- if any(score < 0.7 for score in doc_scores):
57
- adjusted_confidence *= 0.4
58
- # Additional penalty for missing documents
59
  if len(doc_scores) < 2:
60
- adjusted_confidence *= 0.5
61
 
62
- # Stricter image verification
63
  elif label == "authentic images":
64
  if not image_analysis or len(image_analysis) == 0:
65
- adjusted_confidence = 0.0
66
  else:
67
  img_scores = [i.get('authenticity_score', 0) for i in image_analysis]
68
  adjusted_confidence = sum(img_scores) / max(1, len(img_scores))
69
- # Heavily penalize if any image has low authenticity score
70
- if any(score < 0.8 for score in img_scores):
71
- adjusted_confidence *= 0.4
72
- # Additional penalty for AI-generated images
73
  if any(i.get('is_ai_generated', False) for i in image_analysis):
74
- adjusted_confidence *= 0.5
75
- # Additional penalty for non-property related images
76
  if any(not i.get('is_property_related', False) for i in image_analysis):
77
- adjusted_confidence *= 0.6
78
 
79
- # Stricter consistency check
80
  elif label == "consistent data":
81
  # Check for inconsistencies in the data
82
  if "inconsistent" in text.lower() or "suspicious" in text.lower():
83
- adjusted_confidence *= 0.3
84
  # Check for impossible values
85
  if "impossible" in text.lower() or "invalid" in text.lower():
86
- adjusted_confidence *= 0.2
87
  # Check for missing critical information
88
  if "missing" in text.lower() or "not provided" in text.lower():
89
- adjusted_confidence *= 0.4
90
 
91
- # Stricter completeness check
92
  elif label == "complete information provided":
93
  # Check for missing critical information
94
- if len(text) < 300 or "not provided" in text.lower() or "missing" in text.lower():
95
- adjusted_confidence *= 0.4
96
  # Check for vague or generic descriptions
97
  if "generic" in text.lower() or "vague" in text.lower():
98
- adjusted_confidence *= 0.5
99
  # Check for suspiciously short descriptions
100
- if len(text) < 150:
101
- adjusted_confidence *= 0.3
102
 
103
  score += adjusted_confidence * weights.get(label, 0.1)
104
  reasoning_parts.append(f"{label} ({adjusted_confidence:.0%})")
105
 
106
- # Apply additional penalties for suspicious patterns
107
  if "suspicious" in text.lower() or "fraudulent" in text.lower():
108
- score *= 0.5
109
 
110
- # Apply penalties for suspiciously low values
111
  if "suspiciously low" in text.lower() or "unusually small" in text.lower():
112
- score *= 0.6
113
 
114
- # Apply penalties for inconsistencies
115
  if "inconsistent" in text.lower() or "mismatch" in text.lower():
116
- score *= 0.6
117
 
118
- # Apply penalties for missing critical information
119
  if "missing critical" in text.lower() or "incomplete" in text.lower():
120
- score *= 0.7
 
 
 
 
121
 
122
  # Ensure score is between 0 and 100
123
  score = min(100, max(0, int(score * 100)))
 
 
 
 
 
124
  reasoning = f"Based on: {', '.join(reasoning_parts)}"
125
  return score, reasoning
126
  except Exception as e:
127
  logger.error(f"Error generating trust score: {str(e)}")
128
- return 20, "Could not assess trust."
 
9
  classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")
10
  except Exception as e:
11
  logger.error(f"Error loading model in trust score: {str(e)}")
12
+ return 35, f"Model loading error: {str(e)}"
13
  aspects = [
14
  "complete information provided",
15
  "verified location",
 
24
  result = classifier(str(text)[:1000], aspects, multi_label=True)
25
  except Exception as e:
26
  logger.error(f"Error in trust score model inference: {str(e)}")
27
+ return 35, f"Model inference error: {str(e)}"
28
 
29
+ # More balanced weights
30
  weights = {
31
+ "complete information provided": 0.20,
32
  "verified location": 0.20,
33
  "consistent data": 0.15,
34
  "authentic documents": 0.15,
35
  "authentic images": 0.10,
36
+ "reasonable pricing": 0.10,
37
  "verified ownership": 0.05,
38
  "proper documentation": 0.05
39
  }
 
41
  score = 0
42
  reasoning_parts = []
43
 
44
+ # More reasonable scoring for each aspect
45
  for label, confidence in zip(result['labels'], result['scores']):
46
  adjusted_confidence = confidence
47
 
48
+ # Document verification
49
  if label == "authentic documents":
50
  if not pdf_analysis or len(pdf_analysis) == 0:
51
+ adjusted_confidence = 0.3 # Base score for no documents
52
  else:
53
  doc_scores = [p.get('verification_score', 0) for p in pdf_analysis]
54
  adjusted_confidence = sum(doc_scores) / max(1, len(doc_scores))
55
+ # Moderate penalty for low verification scores
56
+ if any(score < 0.5 for score in doc_scores):
57
+ adjusted_confidence *= 0.7
58
+ # Small penalty for missing documents
59
  if len(doc_scores) < 2:
60
+ adjusted_confidence *= 0.8
61
 
62
+ # Image verification
63
  elif label == "authentic images":
64
  if not image_analysis or len(image_analysis) == 0:
65
+ adjusted_confidence = 0.3 # Base score for no images
66
  else:
67
  img_scores = [i.get('authenticity_score', 0) for i in image_analysis]
68
  adjusted_confidence = sum(img_scores) / max(1, len(img_scores))
69
+ # Moderate penalty for low authenticity scores
70
+ if any(score < 0.6 for score in img_scores):
71
+ adjusted_confidence *= 0.7
72
+ # Small penalty for AI-generated images
73
  if any(i.get('is_ai_generated', False) for i in image_analysis):
74
+ adjusted_confidence *= 0.8
75
+ # Small penalty for non-property related images
76
  if any(not i.get('is_property_related', False) for i in image_analysis):
77
+ adjusted_confidence *= 0.8
78
 
79
+ # Consistency check
80
  elif label == "consistent data":
81
  # Check for inconsistencies in the data
82
  if "inconsistent" in text.lower() or "suspicious" in text.lower():
83
+ adjusted_confidence *= 0.6
84
  # Check for impossible values
85
  if "impossible" in text.lower() or "invalid" in text.lower():
86
+ adjusted_confidence *= 0.5
87
  # Check for missing critical information
88
  if "missing" in text.lower() or "not provided" in text.lower():
89
+ adjusted_confidence *= 0.7
90
 
91
+ # Completeness check
92
  elif label == "complete information provided":
93
  # Check for missing critical information
94
+ if len(text) < 200 or "not provided" in text.lower() or "missing" in text.lower():
95
+ adjusted_confidence *= 0.7
96
  # Check for vague or generic descriptions
97
  if "generic" in text.lower() or "vague" in text.lower():
98
+ adjusted_confidence *= 0.8
99
  # Check for suspiciously short descriptions
100
+ if len(text) < 100:
101
+ adjusted_confidence *= 0.6
102
 
103
  score += adjusted_confidence * weights.get(label, 0.1)
104
  reasoning_parts.append(f"{label} ({adjusted_confidence:.0%})")
105
 
106
+ # Apply moderate penalties for suspicious patterns
107
  if "suspicious" in text.lower() or "fraudulent" in text.lower():
108
+ score *= 0.7
109
 
110
+ # Apply moderate penalties for suspiciously low values
111
  if "suspiciously low" in text.lower() or "unusually small" in text.lower():
112
+ score *= 0.8
113
 
114
+ # Apply moderate penalties for inconsistencies
115
  if "inconsistent" in text.lower() or "mismatch" in text.lower():
116
+ score *= 0.8
117
 
118
+ # Apply moderate penalties for missing critical information
119
  if "missing critical" in text.lower() or "incomplete" in text.lower():
120
+ score *= 0.8
121
+
122
+ # Ensure minimum score for any valid data
123
+ if score < 0.1:
124
+ score = 0.1 # Minimum 10% score for any data
125
 
126
  # Ensure score is between 0 and 100
127
  score = min(100, max(0, int(score * 100)))
128
+
129
+ # Ensure minimum score of 25% for any valid data
130
+ if score < 25:
131
+ score = 25
132
+
133
  reasoning = f"Based on: {', '.join(reasoning_parts)}"
134
  return score, reasoning
135
  except Exception as e:
136
  logger.error(f"Error generating trust score: {str(e)}")
137
+ return 35, "Could not assess trust."