sksameermujahid commited on
Commit
38d6cf7
·
verified ·
1 Parent(s): c8259ae

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +343 -225
app.py CHANGED
@@ -15,6 +15,8 @@ from datetime import datetime
15
  # from deep_translator import GoogleTranslator
16
  from models.logging_config import logger
17
  from models.model_loader import load_model, clear_model_cache
 
 
18
  from models.image_analysis import analyze_image
19
  from models.pdf_analysis import extract_pdf_text, analyze_pdf_content
20
  from models.property_summary import generate_property_summary
@@ -43,28 +45,16 @@ CORS(app) # Enable CORS for frontend
43
  geocoder = Nominatim(user_agent="indian_property_verifier", timeout=10)
44
 
45
  # Pre-load models to avoid loading delays during requests
 
46
  def preload_models():
47
  """Pre-load essential models to improve response times."""
48
  try:
49
- logger.info("Pre-loading essential models...")
50
 
51
- # Load models in parallel
52
- with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
53
- futures = {
54
- executor.submit(load_model, "zero-shot-classification"): "zero-shot-classification",
55
- executor.submit(load_model, "summarization"): "summarization",
56
- executor.submit(load_model, "text-classification"): "text-classification"
57
- }
58
-
59
- for future in concurrent.futures.as_completed(futures, timeout=60):
60
- model_type = futures[future]
61
- try:
62
- model = future.result()
63
- logger.info(f"Successfully pre-loaded {model_type} model")
64
- except Exception as e:
65
- logger.warning(f"Failed to pre-load {model_type} model: {str(e)}")
66
 
67
- logger.info("Model pre-loading completed")
68
  except Exception as e:
69
  logger.error(f"Error during model pre-loading: {str(e)}")
70
 
@@ -189,6 +179,42 @@ def get_location():
189
  'message': str(e)
190
  }), 500
191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  def calculate_final_verdict(results):
193
  """
194
  Calculate a comprehensive final verdict based on all analysis results.
@@ -198,140 +224,255 @@ def calculate_final_verdict(results):
198
  try:
199
  # Defensive: ensure results is a dict
200
  if not isinstance(results, dict):
201
- logger.warning(f"Input to calculate_final_verdict is not a dict: {type(results)}")
202
- results = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
 
204
- # Extract key components from results, defaulting to safe values
205
- trust_score = results.get('trust_score', {}).get('score', 0) or 0
206
- fraud_classification = results.get('fraud_classification', {}) or {}
207
- quality_assessment = results.get('quality_assessment', {}) or {}
208
- specs_verification = results.get('specs_verification', {}) or {}
209
- cross_validation = results.get('cross_validation', []) or []
210
- location_analysis = results.get('location_analysis', {}) or {}
211
- price_analysis = results.get('price_analysis', {}) or {}
212
- legal_analysis = results.get('legal_analysis', {}) or {}
213
- document_analysis = results.get('document_analysis', {}) or {}
214
- image_analysis = results.get('image_analysis', {}) or {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
- # Count red flags from cross-validation
217
- red_flags_count = 0
218
- critical_issues = []
219
- warnings = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
- for check in cross_validation:
222
- if check.get('status') in ['invalid', 'suspicious', 'inconsistent']:
223
- red_flags_count += 1
224
- if check.get('status') == 'invalid':
225
- critical_issues.append(check.get('message', 'Invalid data detected'))
226
- else:
227
- warnings.append(check.get('message', 'Suspicious data detected'))
228
 
229
- # Count additional red flags
230
- if quality_assessment.get('score', 0) < 30:
231
- red_flags_count += 1
232
- warnings.append("Low content quality score")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
- if price_analysis.get('assessment') in ['below_market', 'suspicious']:
235
- red_flags_count += 1
236
- warnings.append("Suspicious pricing detected")
237
 
238
- if document_analysis.get('pdf_count', 0) == 0:
239
- red_flags_count += 1
240
- warnings.append("No documents provided")
241
 
242
- if image_analysis.get('image_count', 0) == 0:
243
- red_flags_count += 1
244
- warnings.append("No images provided")
245
 
246
- # Calculate component scores (0-100) with red flag penalties
247
- component_scores = {
248
- 'trust': max(0, trust_score - (red_flags_count * 10)),
249
- 'fraud': max(0, 100 - (fraud_classification.get('alert_score', 0) * 100) - (red_flags_count * 15)),
250
- 'quality': max(0, quality_assessment.get('score', 0) - (red_flags_count * 5)),
251
- 'specs': max(0, specs_verification.get('verification_score', 0) - (red_flags_count * 10)),
252
- 'location': max(0, location_analysis.get('completeness_score', 0) - (red_flags_count * 5)),
253
- 'price': max(0, (price_analysis.get('confidence', 0) * 100) - (red_flags_count * 10)) if price_analysis.get('has_price') else 0,
254
- 'legal': max(0, legal_analysis.get('completeness_score', 0) - (red_flags_count * 5)),
255
- 'documents': max(0, min(100, (document_analysis.get('pdf_count', 0) / 3) * 100) - (red_flags_count * 10)),
256
- 'images': max(0, min(100, (image_analysis.get('image_count', 0) / 5) * 100) - (red_flags_count * 10))
257
- }
258
 
259
- # Calculate weighted final score with adjusted weights
260
- weights = {
261
- 'trust': 0.20,
262
- 'fraud': 0.25, # Increased weight for fraud detection
263
- 'quality': 0.15,
264
- 'specs': 0.10,
265
- 'location': 0.10,
266
- 'price': 0.05,
267
- 'legal': 0.05,
268
- 'documents': 0.05,
269
- 'images': 0.05
270
- }
271
 
272
- final_score = sum(component_scores[component] * weights[component] for component in weights)
 
273
 
274
- # Ensure final score is capped at 100%
275
- final_score = min(100, max(0, final_score))
276
 
277
- # Determine verdict based on final score, fraud indicators, and red flags
278
- fraud_level = fraud_classification.get('alert_level', 'minimal')
279
- high_risk_indicators = len(fraud_classification.get('high_risk', []))
280
- medium_risk_indicators = len(fraud_classification.get('medium_risk', []))
281
 
282
- # Strong red flag penalties
283
- if red_flags_count >= 5 or len(critical_issues) >= 2:
284
- verdict = 'fraudulent'
285
- confidence = 'high'
286
- elif fraud_level in ['critical', 'high'] or high_risk_indicators > 0:
287
- verdict = 'fraudulent'
288
- confidence = 'high'
289
- elif fraud_level == 'medium' or medium_risk_indicators > 2 or red_flags_count >= 3:
290
- verdict = 'suspicious'
291
- confidence = 'medium'
292
- elif final_score >= 80 and red_flags_count <= 1:
293
- verdict = 'legitimate'
294
- confidence = 'high'
295
- elif final_score >= 60 and red_flags_count <= 2:
296
- verdict = 'legitimate'
297
- confidence = 'medium'
298
- elif final_score >= 40 and red_flags_count <= 3:
299
- verdict = 'suspicious'
300
- confidence = 'medium'
301
- else:
302
- verdict = 'suspicious'
303
- confidence = 'high'
304
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  return {
306
  'verdict': verdict,
307
- 'confidence': confidence,
308
- 'final_score': round(final_score, 2),
309
- 'component_scores': component_scores,
310
- 'fraud_level': fraud_level,
311
- 'risk_indicators': {
312
- 'high': high_risk_indicators,
313
- 'medium': medium_risk_indicators,
314
- 'total_red_flags': red_flags_count
315
- },
316
- 'critical_issues': critical_issues,
317
- 'warnings': warnings
 
 
 
 
 
 
318
  }
 
319
  except Exception as e:
320
  logger.error(f"Error calculating final verdict: {str(e)}")
321
  return {
322
- 'verdict': 'suspicious',
323
- 'confidence': 'low',
324
- 'final_score': 0,
325
- 'component_scores': {},
326
- 'fraud_level': 'unknown',
327
- 'risk_indicators': {'high': 0, 'medium': 0, 'total_red_flags': 0},
328
- 'critical_issues': ['Error in calculation'],
329
- 'warnings': ['System error occurred']
330
  }
331
 
332
  @app.route('/verify', methods=['POST'])
333
  def verify_property():
334
  try:
 
 
335
  if not request.form and not request.files:
336
  logger.warning("No form data or files provided")
337
  return jsonify({
@@ -374,51 +515,46 @@ def verify_property():
374
  'status': 'error'
375
  }), 400
376
 
377
- # Process images
378
  images = []
379
  image_analysis = []
380
  if 'images' in request.files:
381
- # Get unique image files by filename to prevent duplicates
382
- image_files = {}
383
  for img_file in request.files.getlist('images'):
384
  if img_file.filename and img_file.filename.lower().endswith(('.jpg', '.jpeg', '.png')):
385
- image_files[img_file.filename] = img_file
386
-
387
- # Process unique images
388
- for img_file in image_files.values():
389
- try:
390
- img = Image.open(img_file)
391
- buffered = io.BytesIO()
392
- img.save(buffered, format="JPEG")
393
- img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
394
- images.append(img_str)
395
- image_analysis.append(analyze_image(img))
396
- except Exception as e:
397
- logger.error(f"Error processing image {img_file.filename}: {str(e)}")
398
- image_analysis.append({'error': str(e), 'is_property_related': False})
399
-
400
- # Process PDFs
401
  pdf_texts = []
402
  pdf_analysis = []
403
  if 'documents' in request.files:
404
- # Get unique PDF files by filename to prevent duplicates
405
- pdf_files = {}
406
  for pdf_file in request.files.getlist('documents'):
407
  if pdf_file.filename and pdf_file.filename.lower().endswith('.pdf'):
408
- pdf_files[pdf_file.filename] = pdf_file
409
-
410
- # Process unique PDFs
411
- for pdf_file in pdf_files.values():
412
- try:
413
- pdf_text = extract_pdf_text(pdf_file)
 
414
  pdf_texts.append({
415
- 'filename': pdf_file.filename,
416
- 'text': pdf_text
417
  })
418
- pdf_analysis.append(analyze_pdf_content(pdf_text, data))
419
- except Exception as e:
420
- logger.error(f"Error processing PDF {pdf_file.filename}: {str(e)}")
421
- pdf_analysis.append({'error': str(e)})
422
 
423
  # Create consolidated text for analysis
424
  consolidated_text = f"""
@@ -442,13 +578,6 @@ def verify_property():
442
  try:
443
  description = data['description']
444
  if description and len(description) > 10:
445
- # Temporarily disable translation to avoid import errors
446
- # text_language = detect(description)
447
- # if text_language != 'en':
448
- # translated_description = GoogleTranslator(source=text_language, target='en').translate(description)
449
- # data['description_translated'] = translated_description
450
- # else:
451
- # data['description_translated'] = description
452
  data['description_translated'] = description
453
  else:
454
  data['description_translated'] = description
@@ -456,68 +585,50 @@ def verify_property():
456
  logger.error(f"Error in language detection/translation: {str(e)}")
457
  data['description_translated'] = data['description']
458
 
459
- # Run all analyses in parallel using asyncio
460
- async def run_analyses():
461
- with concurrent.futures.ThreadPoolExecutor() as executor:
462
- loop = asyncio.get_event_loop()
463
-
464
- # Prepare property data for price analysis
465
- property_data = {}
466
- if data.get('sq_ft'):
467
- try:
468
- property_data['size'] = float(data['sq_ft'])
469
- except ValueError:
470
- pass
471
- if data.get('market_value'):
472
- try:
473
- property_data['price'] = float(data['market_value'].replace('₹', '').replace(',', ''))
474
- except ValueError:
475
- pass
476
- if data.get('year_built'):
477
- try:
478
- current_year = datetime.now().year
479
- property_data['property_age'] = current_year - int(data['year_built'])
480
- except ValueError:
481
- pass
482
-
483
- # Create context text for price analysis
484
- price_context = f"""
485
- Property: {data['property_name']}
486
- Type: {data['property_type']}
487
- Location: {data['address']}, {data['city']}, {data['state']}
488
- Size: {data['sq_ft']} sq ft
489
- Market Value: ₹{data['market_value']}
490
- Description: {data['description']}
491
- Amenities: {data['amenities']}
492
- """
493
-
494
- tasks = [
495
- loop.run_in_executor(executor, generate_property_summary, data),
496
- loop.run_in_executor(executor, classify_fraud, consolidated_text, data),
497
- loop.run_in_executor(executor, generate_trust_score, consolidated_text, image_analysis, pdf_analysis),
498
- loop.run_in_executor(executor, generate_suggestions, consolidated_text, data),
499
- loop.run_in_executor(executor, assess_text_quality, data['description_translated']),
500
- loop.run_in_executor(executor, verify_address, data),
501
- loop.run_in_executor(executor, perform_cross_validation, data),
502
- loop.run_in_executor(executor, analyze_location, data),
503
- loop.run_in_executor(executor, lambda: analyze_price(data, price_context, data.get('latitude'), data.get('longitude'), property_data)),
504
- loop.run_in_executor(executor, analyze_legal_details, data['legal_details']),
505
- loop.run_in_executor(executor, verify_property_specs, data),
506
- loop.run_in_executor(executor, analyze_market_value, data)
507
- ]
508
- results = await asyncio.gather(*tasks)
509
- return results
510
-
511
- # Run analyses and get results
512
  loop = asyncio.new_event_loop()
513
  asyncio.set_event_loop(loop)
514
- analysis_results = loop.run_until_complete(run_analyses())
515
- loop.close()
 
 
 
 
 
 
 
 
516
 
517
  # Unpack results
518
- summary, fraud_classification, (trust_score, trust_reasoning), suggestions, quality_assessment, \
519
- address_verification, cross_validation, location_analysis, price_analysis, legal_analysis, \
520
- specs_verification, market_analysis = analysis_results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
521
 
522
  # Prepare response
523
  document_analysis = {
@@ -553,13 +664,20 @@ def verify_property():
553
  'image_analysis': image_results,
554
  'specs_verification': specs_verification,
555
  'market_analysis': market_analysis,
556
- 'images': images
 
 
 
 
557
  }
558
 
559
  # Calculate final verdict
560
  final_verdict = calculate_final_verdict(results)
561
  results['final_verdict'] = final_verdict
562
 
 
 
 
563
  return jsonify(make_json_serializable(results))
564
 
565
  except Exception as e:
 
15
  # from deep_translator import GoogleTranslator
16
  from models.logging_config import logger
17
  from models.model_loader import load_model, clear_model_cache
18
+ from models.parallel_processor import parallel_processor
19
+ from models.performance_optimizer import performance_optimizer, optimize_model_loading, timed_function
20
  from models.image_analysis import analyze_image
21
  from models.pdf_analysis import extract_pdf_text, analyze_pdf_content
22
  from models.property_summary import generate_property_summary
 
45
  geocoder = Nominatim(user_agent="indian_property_verifier", timeout=10)
46
 
47
  # Pre-load models to avoid loading delays during requests
48
+ @timed_function
49
  def preload_models():
50
  """Pre-load essential models to improve response times."""
51
  try:
52
+ logger.info("Pre-loading essential models with performance optimization...")
53
 
54
+ # Use the performance optimizer for model loading
55
+ optimize_model_loading()
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
+ logger.info("Model pre-loading completed with optimization")
58
  except Exception as e:
59
  logger.error(f"Error during model pre-loading: {str(e)}")
60
 
 
179
  'message': str(e)
180
  }), 500
181
 
182
+ @app.route('/performance', methods=['GET'])
183
+ def get_performance_metrics():
184
+ """Get system performance metrics and cache statistics"""
185
+ try:
186
+ from models.performance_optimizer import get_performance_metrics
187
+
188
+ metrics = get_performance_metrics()
189
+
190
+ return jsonify({
191
+ 'status': 'success',
192
+ 'metrics': metrics,
193
+ 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
194
+ })
195
+ except Exception as e:
196
+ logger.error(f"Error getting performance metrics: {str(e)}")
197
+ return jsonify({
198
+ 'status': 'error',
199
+ 'message': str(e)
200
+ }), 500
201
+
202
+ @app.route('/clear-cache', methods=['POST'])
203
+ def clear_cache():
204
+ """Clear all cached results"""
205
+ try:
206
+ performance_optimizer.clear_cache()
207
+ return jsonify({
208
+ 'status': 'success',
209
+ 'message': 'Cache cleared successfully'
210
+ })
211
+ except Exception as e:
212
+ logger.error(f"Error clearing cache: {str(e)}")
213
+ return jsonify({
214
+ 'status': 'error',
215
+ 'message': str(e)
216
+ }), 500
217
+
218
  def calculate_final_verdict(results):
219
  """
220
  Calculate a comprehensive final verdict based on all analysis results.
 
224
  try:
225
  # Defensive: ensure results is a dict
226
  if not isinstance(results, dict):
227
+ logger.warning(f"Results is not a dict: {type(results)}")
228
+ return {
229
+ 'verdict': 'VERIFICATION REQUIRED',
230
+ 'confidence': 0.0,
231
+ 'reasoning': 'Insufficient data for verification',
232
+ 'risk_level': 'medium',
233
+ 'overall_score': 25
234
+ }
235
+
236
+ # Extract key metrics with defensive programming
237
+ fraud_classification = results.get('fraud_classification', {})
238
+ trust_score_data = results.get('trust_score', {})
239
+ address_verification = results.get('address_verification', {})
240
+ cross_validation = results.get('cross_validation', [])
241
+ location_analysis = results.get('location_analysis', {})
242
+ price_analysis = results.get('price_analysis', {})
243
+ legal_analysis = results.get('legal_analysis', {})
244
+ specs_verification = results.get('specs_verification', {})
245
+ quality_assessment = results.get('quality_assessment', {})
246
+
247
+ # Calculate fraud risk score
248
+ fraud_score = 0.0
249
+ fraud_level = fraud_classification.get('alert_level', 'minimal')
250
+ fraud_alert_score = fraud_classification.get('alert_score', 0.0)
251
 
252
+ fraud_score_mapping = {
253
+ 'critical': 1.0,
254
+ 'high': 0.8,
255
+ 'medium': 0.6,
256
+ 'low': 0.3,
257
+ 'minimal': 0.1
258
+ }
259
+ fraud_score = fraud_score_mapping.get(fraud_level, 0.1) * fraud_alert_score
260
+
261
+ # Calculate trust score
262
+ trust_score = 0.0
263
+ if isinstance(trust_score_data, dict):
264
+ trust_score = trust_score_data.get('score', 0.0)
265
+ # Convert percentage to decimal if needed
266
+ if trust_score > 1.0:
267
+ trust_score = trust_score / 100.0
268
+ elif isinstance(trust_score_data, tuple) and len(trust_score_data) > 0:
269
+ trust_score = trust_score_data[0]
270
+ # Convert percentage to decimal if needed
271
+ if trust_score > 1.0:
272
+ trust_score = trust_score / 100.0
273
+ else:
274
+ trust_score = 0.0
275
+
276
+ # Calculate address verification score
277
+ address_score = 0.0
278
+ if address_verification and isinstance(address_verification, dict):
279
+ verification_score = address_verification.get('verification_score', 0.0)
280
+ address_score = float(verification_score) / 100.0 if verification_score > 0 else 0.0
281
+
282
+ # Calculate location analysis score
283
+ location_score = 0.0
284
+ if location_analysis and isinstance(location_analysis, dict):
285
+ completeness_score = location_analysis.get('completeness_score', 0.0)
286
+ location_score = float(completeness_score) / 100.0 if completeness_score > 0 else 0.0
287
+
288
+ # Calculate price analysis score
289
+ price_score = 0.0
290
+ if price_analysis and isinstance(price_analysis, dict):
291
+ confidence = price_analysis.get('confidence', 0.0)
292
+ price_score = float(confidence) if confidence > 0 else 0.0
293
+
294
+ # Calculate legal analysis score
295
+ legal_score = 0.0
296
+ if legal_analysis and isinstance(legal_analysis, dict):
297
+ confidence = legal_analysis.get('confidence', 0.0)
298
+ legal_score = float(confidence) if confidence > 0 else 0.0
299
+
300
+ # Calculate specs verification score
301
+ specs_score = 0.0
302
+ if specs_verification and isinstance(specs_verification, dict):
303
+ verification_score = specs_verification.get('verification_score', 0.0)
304
+ specs_score = float(verification_score) / 100.0 if verification_score > 0 else 0.0
305
+
306
+ # Calculate quality assessment score
307
+ quality_score = 0.0
308
+ if quality_assessment and isinstance(quality_assessment, dict):
309
+ score = quality_assessment.get('score', 0.0)
310
+ quality_score = float(score) / 100.0 if score > 0 else 0.0
311
+
312
+ # Calculate cross validation issues
313
+ cross_validation_issues = 0
314
+ high_severity_issues = 0
315
+ medium_severity_issues = 0
316
 
317
+ if isinstance(cross_validation, list):
318
+ cross_validation_issues = len(cross_validation)
319
+ for issue in cross_validation:
320
+ if isinstance(issue, dict):
321
+ severity = issue.get('severity', 'low')
322
+ if severity == 'high':
323
+ high_severity_issues += 1
324
+ elif severity == 'medium':
325
+ medium_severity_issues += 1
326
+
327
+ # Weighted scoring system with improved weights
328
+ weights = {
329
+ 'fraud': 0.30, # Increased weight for fraud detection
330
+ 'trust': 0.25, # Increased weight for trust score
331
+ 'address': 0.15, # Address verification
332
+ 'location': 0.10, # Location analysis
333
+ 'price': 0.10, # Price analysis
334
+ 'legal': 0.05, # Legal analysis
335
+ 'specs': 0.03, # Specs verification
336
+ 'quality': 0.02 # Quality assessment
337
+ }
338
+
339
+ # Calculate weighted score
340
+ weighted_score = (
341
+ (1.0 - fraud_score) * weights['fraud'] +
342
+ trust_score * weights['trust'] +
343
+ address_score * weights['address'] +
344
+ location_score * weights['location'] +
345
+ price_score * weights['price'] +
346
+ legal_score * weights['legal'] +
347
+ specs_score * weights['specs'] +
348
+ quality_score * weights['quality']
349
+ )
350
+
351
+ # Debug logging
352
+ logger.info(f"Score components: fraud={fraud_score:.3f}, trust={trust_score:.3f}, address={address_score:.3f}, location={location_score:.3f}, price={price_score:.3f}, legal={legal_score:.3f}, specs={specs_score:.3f}, quality={quality_score:.3f}")
353
+ logger.info(f"Weighted score before penalty: {weighted_score:.3f}")
354
+
355
+ # Adjust score based on cross validation issues
356
+ issue_penalty = 0.0
357
+ if high_severity_issues > 0:
358
+ issue_penalty += high_severity_issues * 0.08 # Reduced from 0.15 to 0.08 (8% penalty per high severity issue)
359
+ if medium_severity_issues > 0:
360
+ issue_penalty += medium_severity_issues * 0.04 # Reduced from 0.08 to 0.04 (4% penalty per medium severity issue)
361
 
362
+ weighted_score = max(0.0, weighted_score - issue_penalty)
 
 
 
 
 
 
363
 
364
+ logger.info(f"Issue penalty: {issue_penalty:.3f}, Final weighted score: {weighted_score:.3f}")
365
+
366
+ # Ensure minimum score for any valid data
367
+ if any([trust_score > 0, address_score > 0, location_score > 0, price_score > 0]):
368
+ weighted_score = max(0.15, weighted_score) # Increased minimum from 0.1 to 0.15 (15% minimum)
369
+
370
+ # Determine verdict and risk level with improved logic
371
+ if weighted_score >= 0.75 and fraud_score < 0.2 and high_severity_issues == 0:
372
+ verdict = 'VERIFIED REAL ESTATE LISTING'
373
+ risk_level = 'low'
374
+ elif weighted_score >= 0.60 and fraud_score < 0.4 and high_severity_issues <= 1:
375
+ verdict = 'LIKELY LEGITIMATE'
376
+ risk_level = 'low'
377
+ elif weighted_score >= 0.40 and fraud_score < 0.6 and high_severity_issues <= 2:
378
+ verdict = 'SUSPICIOUS LISTING'
379
+ risk_level = 'medium'
380
+ elif fraud_score >= 0.6 or weighted_score < 0.20 or high_severity_issues >= 3:
381
+ verdict = 'HIGH RISK LISTING'
382
+ risk_level = 'high'
383
+ elif weighted_score >= 0.20:
384
+ verdict = 'VERIFICATION REQUIRED'
385
+ risk_level = 'medium'
386
+ else:
387
+ verdict = 'INSUFFICIENT DATA'
388
+ risk_level = 'medium'
389
+
390
+ # Generate detailed reasoning
391
+ reasoning_parts = []
392
 
393
+ if fraud_score > 0.3:
394
+ reasoning_parts.append(f"Fraud risk detected (level: {fraud_level})")
 
395
 
396
+ if trust_score < 0.5:
397
+ reasoning_parts.append(f"Low trust score ({trust_score:.1%})")
 
398
 
399
+ if address_score < 0.5:
400
+ reasoning_parts.append("Address verification issues")
 
401
 
402
+ if location_score < 0.5:
403
+ reasoning_parts.append("Location verification issues")
 
 
 
 
 
 
 
 
 
 
404
 
405
+ if price_score < 0.5:
406
+ reasoning_parts.append("Price analysis concerns")
 
 
 
 
 
 
 
 
 
 
407
 
408
+ if legal_score < 0.5:
409
+ reasoning_parts.append("Legal documentation issues")
410
 
411
+ if high_severity_issues > 0:
412
+ reasoning_parts.append(f"{high_severity_issues} critical validation issues")
413
 
414
+ if medium_severity_issues > 0:
415
+ reasoning_parts.append(f"{medium_severity_issues} moderate validation issues")
 
 
416
 
417
+ if not reasoning_parts:
418
+ reasoning_parts.append("All verification checks passed successfully")
419
+
420
+ reasoning = ". ".join(reasoning_parts)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
 
422
+ # Calculate overall score as percentage
423
+ overall_score = int(weighted_score * 100)
424
+
425
+ # Ensure score is between 0 and 100
426
+ overall_score = max(0, min(100, overall_score))
427
+
428
+ # Ensure minimum score for any valid data
429
+ if overall_score == 0 and any([trust_score > 0, address_score > 0, location_score > 0]):
430
+ overall_score = 15 # Minimum 15% score if any component is valid
431
+
432
+ # Final score adjustment based on data quality
433
+ if high_severity_issues >= 3:
434
+ overall_score = max(10, overall_score) # Minimum 10% for high risk
435
+ elif high_severity_issues >= 1:
436
+ overall_score = max(15, overall_score) # Minimum 15% for medium risk
437
+ else:
438
+ overall_score = max(20, overall_score) # Minimum 20% for low risk
439
+
440
  return {
441
  'verdict': verdict,
442
+ 'confidence': min(1.0, weighted_score),
443
+ 'reasoning': reasoning,
444
+ 'risk_level': risk_level,
445
+ 'overall_score': overall_score,
446
+ 'scores': {
447
+ 'fraud_score': fraud_score,
448
+ 'trust_score': trust_score,
449
+ 'address_score': address_score,
450
+ 'location_score': location_score,
451
+ 'price_score': price_score,
452
+ 'legal_score': legal_score,
453
+ 'specs_score': specs_score,
454
+ 'quality_score': quality_score,
455
+ 'weighted_score': weighted_score,
456
+ 'cross_validation_issues': cross_validation_issues,
457
+ 'high_severity_issues': high_severity_issues
458
+ }
459
  }
460
+
461
  except Exception as e:
462
  logger.error(f"Error calculating final verdict: {str(e)}")
463
  return {
464
+ 'verdict': 'VERIFICATION REQUIRED',
465
+ 'confidence': 0.0,
466
+ 'reasoning': f'Error in verdict calculation: {str(e)}',
467
+ 'risk_level': 'medium',
468
+ 'overall_score': 25
 
 
 
469
  }
470
 
471
  @app.route('/verify', methods=['POST'])
472
  def verify_property():
473
  try:
474
+ start_time = time.time()
475
+
476
  if not request.form and not request.files:
477
  logger.warning("No form data or files provided")
478
  return jsonify({
 
515
  'status': 'error'
516
  }), 400
517
 
518
+ # Process images in parallel
519
  images = []
520
  image_analysis = []
521
  if 'images' in request.files:
522
+ image_files = []
 
523
  for img_file in request.files.getlist('images'):
524
  if img_file.filename and img_file.filename.lower().endswith(('.jpg', '.jpeg', '.png')):
525
+ image_files.append(img_file)
526
+
527
+ if image_files:
528
+ # Process images in parallel
529
+ image_results = parallel_processor.process_images_parallel(image_files)
530
+ for result in image_results:
531
+ if 'image_data' in result:
532
+ images.append(result['image_data'])
533
+ image_analysis.append(result['analysis'])
534
+ else:
535
+ image_analysis.append(result)
536
+
537
+ # Process PDFs in parallel
 
 
 
538
  pdf_texts = []
539
  pdf_analysis = []
540
  if 'documents' in request.files:
541
+ pdf_files = []
 
542
  for pdf_file in request.files.getlist('documents'):
543
  if pdf_file.filename and pdf_file.filename.lower().endswith('.pdf'):
544
+ pdf_files.append(pdf_file)
545
+
546
+ if pdf_files:
547
+ # Process PDFs in parallel
548
+ pdf_results = parallel_processor.process_pdfs_parallel(pdf_files)
549
+ for result in pdf_results:
550
+ if 'filename' in result:
551
  pdf_texts.append({
552
+ 'filename': result['filename'],
553
+ 'text': result['text']
554
  })
555
+ pdf_analysis.append(result['analysis'])
556
+ else:
557
+ pdf_analysis.append(result)
 
558
 
559
  # Create consolidated text for analysis
560
  consolidated_text = f"""
 
578
  try:
579
  description = data['description']
580
  if description and len(description) > 10:
 
 
 
 
 
 
 
581
  data['description_translated'] = description
582
  else:
583
  data['description_translated'] = description
 
585
  logger.error(f"Error in language detection/translation: {str(e)}")
586
  data['description_translated'] = data['description']
587
 
588
+ # Run all analyses in parallel using the new parallel processor
589
+ analysis_start_time = time.time()
590
+
591
+ # Create new event loop for async operations
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
592
  loop = asyncio.new_event_loop()
593
  asyncio.set_event_loop(loop)
594
+
595
+ try:
596
+ analysis_results = loop.run_until_complete(
597
+ parallel_processor.run_analyses_parallel(data, consolidated_text, image_analysis, pdf_analysis)
598
+ )
599
+ finally:
600
+ loop.close()
601
+
602
+ analysis_time = time.time() - analysis_start_time
603
+ logger.info(f"Analysis completed in {analysis_time:.2f} seconds")
604
 
605
  # Unpack results
606
+ summary = analysis_results.get('summary', "Property summary unavailable.")
607
+
608
+ # Ensure summary is not placeholder text
609
+ if summary and isinstance(summary, str):
610
+ if "[Insert Property Description Here]" in summary or "[insert property price here]" in summary:
611
+ # Generate a basic summary if placeholder text is detected
612
+ from .property_summary import create_basic_summary
613
+ summary = create_basic_summary(data)
614
+
615
+ fraud_classification = analysis_results.get('fraud', {})
616
+ trust_result = analysis_results.get('trust', (0.0, "Trust analysis failed"))
617
+ suggestions = analysis_results.get('suggestions', {})
618
+ quality_assessment = analysis_results.get('quality', {})
619
+ address_verification = analysis_results.get('address', {})
620
+ cross_validation = analysis_results.get('cross_validation', [])
621
+ location_analysis = analysis_results.get('location', {})
622
+ price_analysis = analysis_results.get('price', {})
623
+ legal_analysis = analysis_results.get('legal', {})
624
+ specs_verification = analysis_results.get('specs', {})
625
+ market_analysis = analysis_results.get('market', {})
626
+
627
+ # Handle trust score result
628
+ if isinstance(trust_result, tuple):
629
+ trust_score, trust_reasoning = trust_result
630
+ else:
631
+ trust_score, trust_reasoning = 0.0, "Trust analysis failed"
632
 
633
  # Prepare response
634
  document_analysis = {
 
664
  'image_analysis': image_results,
665
  'specs_verification': specs_verification,
666
  'market_analysis': market_analysis,
667
+ 'images': images,
668
+ 'processing_time': {
669
+ 'total_time': time.time() - start_time,
670
+ 'analysis_time': analysis_time
671
+ }
672
  }
673
 
674
  # Calculate final verdict
675
  final_verdict = calculate_final_verdict(results)
676
  results['final_verdict'] = final_verdict
677
 
678
+ total_time = time.time() - start_time
679
+ logger.info(f"Total verification completed in {total_time:.2f} seconds")
680
+
681
  return jsonify(make_json_serializable(results))
682
 
683
  except Exception as e: