sksameermujahid commited on
Commit
0e5c14c
·
verified ·
1 Parent(s): 6e3dbdb

Upload 22 files

Browse files
Files changed (4) hide show
  1. app.py +85 -45
  2. models/cross_validation.py +333 -153
  3. models/fraud_classification.py +118 -135
  4. models/trust_score.py +136 -17
app.py CHANGED
@@ -258,20 +258,48 @@ def calculate_final_verdict(results):
258
  specs_verification = results.get('specs_verification', {})
259
  quality_assessment = results.get('quality_assessment', {})
260
 
261
- # Calculate fraud risk score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
  fraud_score = 0.0
263
  fraud_level = fraud_classification.get('alert_level', 'minimal')
264
  fraud_alert_score = fraud_classification.get('alert_score', 0.0)
265
 
266
  fraud_score_mapping = {
267
- 'critical': 1.0,
268
- 'high': 0.8,
269
- 'medium': 0.6,
270
- 'low': 0.3,
271
- 'minimal': 0.1
272
  }
273
  fraud_score = fraud_score_mapping.get(fraud_level, 0.1) * fraud_alert_score
274
 
 
 
 
 
 
275
  # Calculate trust score
276
  trust_score = 0.0
277
  if isinstance(trust_score_data, dict):
@@ -287,6 +315,10 @@ def calculate_final_verdict(results):
287
  else:
288
  trust_score = 0.0
289
 
 
 
 
 
290
  # Calculate address verification score
291
  address_score = 0.0
292
  if address_verification and isinstance(address_verification, dict):
@@ -323,31 +355,16 @@ def calculate_final_verdict(results):
323
  score = quality_assessment.get('score', 0.0)
324
  quality_score = float(score) / 100.0 if score > 0 else 0.0
325
 
326
- # Calculate cross validation issues
327
- cross_validation_issues = 0
328
- high_severity_issues = 0
329
- medium_severity_issues = 0
330
-
331
- if isinstance(cross_validation, list):
332
- cross_validation_issues = len(cross_validation)
333
- for issue in cross_validation:
334
- if isinstance(issue, dict):
335
- severity = issue.get('severity', 'low')
336
- if severity == 'high':
337
- high_severity_issues += 1
338
- elif severity == 'medium':
339
- medium_severity_issues += 1
340
-
341
- # Weighted scoring system with improved weights
342
  weights = {
343
- 'fraud': 0.30, # Increased weight for fraud detection
344
- 'trust': 0.25, # Increased weight for trust score
345
  'address': 0.15, # Address verification
346
  'location': 0.10, # Location analysis
347
  'price': 0.10, # Price analysis
348
- 'legal': 0.05, # Legal analysis
349
- 'specs': 0.03, # Specs verification
350
- 'quality': 0.02 # Quality assessment
351
  }
352
 
353
  # Calculate weighted score
@@ -366,32 +383,41 @@ def calculate_final_verdict(results):
366
  logger.info(f"Score components: fraud={fraud_score:.3f}, trust={trust_score:.3f}, address={address_score:.3f}, location={location_score:.3f}, price={price_score:.3f}, legal={legal_score:.3f}, specs={specs_score:.3f}, quality={quality_score:.3f}")
367
  logger.info(f"Weighted score before penalty: {weighted_score:.3f}")
368
 
369
- # Adjust score based on cross validation issues
370
  issue_penalty = 0.0
 
 
371
  if high_severity_issues > 0:
372
- issue_penalty += high_severity_issues * 0.08 # Reduced from 0.15 to 0.08 (8% penalty per high severity issue)
373
  if medium_severity_issues > 0:
374
- issue_penalty += medium_severity_issues * 0.04 # Reduced from 0.08 to 0.04 (4% penalty per medium severity issue)
 
 
375
 
376
  weighted_score = max(0.0, weighted_score - issue_penalty)
377
 
378
  logger.info(f"Issue penalty: {issue_penalty:.3f}, Final weighted score: {weighted_score:.3f}")
379
 
380
- # Ensure minimum score for any valid data
381
- if any([trust_score > 0, address_score > 0, location_score > 0, price_score > 0]):
382
- weighted_score = max(0.15, weighted_score) # Increased minimum from 0.1 to 0.15 (15% minimum)
 
 
383
 
384
- # Determine verdict and risk level with improved logic
385
- if weighted_score >= 0.75 and fraud_score < 0.2 and high_severity_issues == 0:
 
 
 
386
  verdict = 'VERIFIED REAL ESTATE LISTING'
387
  risk_level = 'low'
388
- elif weighted_score >= 0.60 and fraud_score < 0.4 and high_severity_issues <= 1:
389
  verdict = 'LIKELY LEGITIMATE'
390
  risk_level = 'low'
391
- elif weighted_score >= 0.40 and fraud_score < 0.6 and high_severity_issues <= 2:
392
  verdict = 'SUSPICIOUS LISTING'
393
  risk_level = 'medium'
394
- elif fraud_score >= 0.6 or weighted_score < 0.20 or high_severity_issues >= 3:
395
  verdict = 'HIGH RISK LISTING'
396
  risk_level = 'high'
397
  elif weighted_score >= 0.20:
@@ -404,10 +430,16 @@ def calculate_final_verdict(results):
404
  # Generate detailed reasoning
405
  reasoning_parts = []
406
 
 
 
 
 
 
 
407
  if fraud_score > 0.3:
408
  reasoning_parts.append(f"Fraud risk detected (level: {fraud_level})")
409
 
410
- if trust_score < 0.5:
411
  reasoning_parts.append(f"Low trust score ({trust_score:.1%})")
412
 
413
  if address_score < 0.5:
@@ -439,12 +471,16 @@ def calculate_final_verdict(results):
439
  # Ensure score is between 0 and 100
440
  overall_score = max(0, min(100, overall_score))
441
 
442
- # Ensure minimum score for any valid data
443
- if overall_score == 0 and any([trust_score > 0, address_score > 0, location_score > 0]):
 
 
444
  overall_score = 15 # Minimum 15% score if any component is valid
445
 
446
- # Final score adjustment based on data quality
447
- if high_severity_issues >= 3:
 
 
448
  overall_score = max(10, overall_score) # Minimum 10% for high risk
449
  elif high_severity_issues >= 1:
450
  overall_score = max(15, overall_score) # Minimum 15% for medium risk
@@ -467,8 +503,12 @@ def calculate_final_verdict(results):
467
  'specs_score': specs_score,
468
  'quality_score': quality_score,
469
  'weighted_score': weighted_score,
470
- 'cross_validation_issues': cross_validation_issues,
471
- 'high_severity_issues': high_severity_issues
 
 
 
 
472
  }
473
  }
474
 
 
258
  specs_verification = results.get('specs_verification', {})
259
  quality_assessment = results.get('quality_assessment', {})
260
 
261
+ # CRITICAL: Check for fake data patterns in cross validation
262
+ fake_data_detected = False
263
+ fraudulent_issues = 0
264
+ high_severity_issues = 0
265
+ medium_severity_issues = 0
266
+ low_severity_issues = 0
267
+
268
+ if isinstance(cross_validation, list):
269
+ for issue in cross_validation:
270
+ if isinstance(issue, dict):
271
+ status = issue.get('status', '')
272
+ severity = issue.get('severity', 'low')
273
+
274
+ if status == 'fraudulent':
275
+ fraudulent_issues += 1
276
+ fake_data_detected = True
277
+ elif severity == 'high':
278
+ high_severity_issues += 1
279
+ elif severity == 'medium':
280
+ medium_severity_issues += 1
281
+ elif severity == 'low':
282
+ low_severity_issues += 1
283
+
284
+ # Calculate fraud risk score - Much stricter
285
  fraud_score = 0.0
286
  fraud_level = fraud_classification.get('alert_level', 'minimal')
287
  fraud_alert_score = fraud_classification.get('alert_score', 0.0)
288
 
289
  fraud_score_mapping = {
290
+ 'critical': 1.0, # Increased back to full penalty
291
+ 'high': 0.8, # Increased back to full penalty
292
+ 'medium': 0.6, # Increased back to full penalty
293
+ 'low': 0.4, # Increased penalty
294
+ 'minimal': 0.1 # Increased penalty
295
  }
296
  fraud_score = fraud_score_mapping.get(fraud_level, 0.1) * fraud_alert_score
297
 
298
+ # CRITICAL: Heavy penalty for fake data
299
+ if fake_data_detected:
300
+ fraud_score = max(fraud_score, 0.8) # Minimum 80% fraud score for fake data
301
+ fraud_level = 'high'
302
+
303
  # Calculate trust score
304
  trust_score = 0.0
305
  if isinstance(trust_score_data, dict):
 
315
  else:
316
  trust_score = 0.0
317
 
318
+ # CRITICAL: Heavy penalty for fake data in trust score
319
+ if fake_data_detected:
320
+ trust_score = max(0.0, trust_score - 0.5) # Reduce trust score by 50% for fake data
321
+
322
  # Calculate address verification score
323
  address_score = 0.0
324
  if address_verification and isinstance(address_verification, dict):
 
355
  score = quality_assessment.get('score', 0.0)
356
  quality_score = float(score) / 100.0 if score > 0 else 0.0
357
 
358
+ # Much stricter weighted scoring system
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
  weights = {
360
+ 'fraud': 0.35, # Increased weight for fraud detection
361
+ 'trust': 0.25, # Keep trust score important
362
  'address': 0.15, # Address verification
363
  'location': 0.10, # Location analysis
364
  'price': 0.10, # Price analysis
365
+ 'legal': 0.03, # Legal analysis
366
+ 'specs': 0.01, # Specs verification
367
+ 'quality': 0.01 # Quality assessment
368
  }
369
 
370
  # Calculate weighted score
 
383
  logger.info(f"Score components: fraud={fraud_score:.3f}, trust={trust_score:.3f}, address={address_score:.3f}, location={location_score:.3f}, price={price_score:.3f}, legal={legal_score:.3f}, specs={specs_score:.3f}, quality={quality_score:.3f}")
384
  logger.info(f"Weighted score before penalty: {weighted_score:.3f}")
385
 
386
+ # Much stricter penalty system
387
  issue_penalty = 0.0
388
+ if fraudulent_issues > 0:
389
+ issue_penalty += fraudulent_issues * 0.15 # 15% penalty per fraudulent issue
390
  if high_severity_issues > 0:
391
+ issue_penalty += high_severity_issues * 0.10 # 10% penalty per high severity issue
392
  if medium_severity_issues > 0:
393
+ issue_penalty += medium_severity_issues * 0.05 # 5% penalty per medium severity issue
394
+ if low_severity_issues > 0:
395
+ issue_penalty += low_severity_issues * 0.02 # 2% penalty per low severity issue
396
 
397
  weighted_score = max(0.0, weighted_score - issue_penalty)
398
 
399
  logger.info(f"Issue penalty: {issue_penalty:.3f}, Final weighted score: {weighted_score:.3f}")
400
 
401
+ # CRITICAL: Much stricter minimum score requirements
402
+ if fake_data_detected:
403
+ weighted_score = max(0.05, weighted_score) # Maximum 5% score for fake data
404
+ elif any([trust_score > 0, address_score > 0, location_score > 0, price_score > 0]):
405
+ weighted_score = max(0.15, weighted_score) # Minimum 15% for any valid data
406
 
407
+ # Much stricter verdict determination
408
+ if fake_data_detected or fraudulent_issues > 0:
409
+ verdict = 'HIGH RISK LISTING'
410
+ risk_level = 'high'
411
+ elif weighted_score >= 0.75 and fraud_score < 0.2 and high_severity_issues == 0:
412
  verdict = 'VERIFIED REAL ESTATE LISTING'
413
  risk_level = 'low'
414
+ elif weighted_score >= 0.60 and fraud_score < 0.3 and high_severity_issues <= 1:
415
  verdict = 'LIKELY LEGITIMATE'
416
  risk_level = 'low'
417
+ elif weighted_score >= 0.40 and fraud_score < 0.5 and high_severity_issues <= 2:
418
  verdict = 'SUSPICIOUS LISTING'
419
  risk_level = 'medium'
420
+ elif fraud_score >= 0.5 or weighted_score < 0.20 or high_severity_issues >= 3:
421
  verdict = 'HIGH RISK LISTING'
422
  risk_level = 'high'
423
  elif weighted_score >= 0.20:
 
430
  # Generate detailed reasoning
431
  reasoning_parts = []
432
 
433
+ if fake_data_detected:
434
+ reasoning_parts.append("Fake data patterns detected")
435
+
436
+ if fraudulent_issues > 0:
437
+ reasoning_parts.append(f"{fraudulent_issues} fraudulent validation issues")
438
+
439
  if fraud_score > 0.3:
440
  reasoning_parts.append(f"Fraud risk detected (level: {fraud_level})")
441
 
442
+ if trust_score < 0.3:
443
  reasoning_parts.append(f"Low trust score ({trust_score:.1%})")
444
 
445
  if address_score < 0.5:
 
471
  # Ensure score is between 0 and 100
472
  overall_score = max(0, min(100, overall_score))
473
 
474
+ # CRITICAL: Much stricter minimum score for fake data
475
+ if fake_data_detected:
476
+ overall_score = max(5, min(15, overall_score)) # 5-15% range for fake data
477
+ elif overall_score == 0 and any([trust_score > 0, address_score > 0, location_score > 0]):
478
  overall_score = 15 # Minimum 15% score if any component is valid
479
 
480
+ # Final score adjustment based on data quality - Much stricter
481
+ if fake_data_detected or fraudulent_issues > 0:
482
+ overall_score = max(5, min(15, overall_score)) # 5-15% for fake/fraudulent data
483
+ elif high_severity_issues >= 3:
484
  overall_score = max(10, overall_score) # Minimum 10% for high risk
485
  elif high_severity_issues >= 1:
486
  overall_score = max(15, overall_score) # Minimum 15% for medium risk
 
503
  'specs_score': specs_score,
504
  'quality_score': quality_score,
505
  'weighted_score': weighted_score,
506
+ 'cross_validation_issues': len(cross_validation) if isinstance(cross_validation, list) else 0,
507
+ 'high_severity_issues': high_severity_issues,
508
+ 'medium_severity_issues': medium_severity_issues,
509
+ 'low_severity_issues': low_severity_issues,
510
+ 'fraudulent_issues': fraudulent_issues,
511
+ 'fake_data_detected': fake_data_detected
512
  }
513
  }
514
 
models/cross_validation.py CHANGED
@@ -69,11 +69,11 @@ def analyze_property_description(description: str, property_data: Dict[str, Any]
69
  'suspicious_patterns': []
70
  }
71
 
72
- # Check room number consistency
73
  if 'bedroom' in analysis['room_mentions']:
74
  stated_bedrooms = safe_int_convert(property_data.get('bedrooms', 0))
75
  mentioned_bedrooms = max(analysis['room_mentions']['bedroom'])
76
- if stated_bedrooms != mentioned_bedrooms:
77
  analysis['inconsistencies'].append({
78
  'type': 'bedroom_count',
79
  'stated': stated_bedrooms,
@@ -84,7 +84,7 @@ def analyze_property_description(description: str, property_data: Dict[str, Any]
84
  if 'bathroom' in analysis['room_mentions']:
85
  stated_bathrooms = safe_float_convert(property_data.get('bathrooms', 0))
86
  mentioned_bathrooms = max(analysis['room_mentions']['bathroom'])
87
- if abs(stated_bathrooms - mentioned_bathrooms) > 0.5: # Allow for half bathrooms
88
  analysis['inconsistencies'].append({
89
  'type': 'bathroom_count',
90
  'stated': stated_bathrooms,
@@ -92,30 +92,47 @@ def analyze_property_description(description: str, property_data: Dict[str, Any]
92
  'message': f'Description mentions {mentioned_bathrooms} bathrooms but listing states {stated_bathrooms} bathrooms.'
93
  })
94
 
95
- # Check property type consistency
96
  property_type = property_data.get('property_type', '').lower()
97
- if property_type and property_type not in description.lower():
98
- analysis['inconsistencies'].append({
99
- 'type': 'property_type',
100
- 'stated': property_type,
101
- 'message': f'Property type "{property_type}" not mentioned in description.'
102
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- # Check for suspicious patterns
105
- suspicious_patterns = [
106
- (r'too good to be true', 'Unrealistic claims'),
107
- (r'guaranteed.*return', 'Suspicious return promises'),
108
- (r'no.*verification', 'Avoiding verification'),
109
- (r'urgent.*sale', 'Pressure tactics'),
110
- (r'below.*market', 'Unrealistic pricing')
111
  ]
112
 
113
- for pattern, reason in suspicious_patterns:
114
- if re.search(pattern, description.lower()):
 
115
  analysis['suspicious_patterns'].append({
116
- 'pattern': pattern,
117
- 'reason': reason,
118
- 'message': f'Suspicious pattern detected: {reason}'
119
  })
120
 
121
  return analysis
@@ -425,149 +442,312 @@ def analyze_documents_and_images(data: Dict[str, Any]) -> Dict[str, Any]:
425
  return analysis
426
 
427
  def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
428
- """Perform comprehensive cross-validation of property data."""
429
- cross_checks = []
430
- classifier = None
431
-
432
  try:
433
- # Load the tiny model for classification with fallback
434
- try:
435
- classifier = load_model("zero-shot-classification")
436
- except Exception as e:
437
- logger.warning(f"Could not load classifier for cross validation: {str(e)}")
438
- classifier = None
439
-
440
- # Initialize analysis sections
441
  analysis_sections = {
442
  'basic_info': [],
443
  'location': [],
444
  'specifications': [],
445
  'documents': [],
446
- 'fraud_indicators': []
 
 
447
  }
448
-
449
- # Process and validate data
450
- processed_data = {}
451
 
452
- # Basic Information Validation
453
- property_name = str(data.get('property_name', '')).strip()
454
- if not property_name or property_name == '2':
 
 
 
 
 
 
455
  analysis_sections['basic_info'].append({
456
- 'check': 'property_name_validation',
457
- 'status': 'invalid',
458
- 'message': 'Invalid property name.',
459
- 'details': 'Please provide a descriptive name for the property.',
460
  'severity': 'high',
461
- 'recommendation': 'Add a proper name for the property.'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
  })
463
 
464
- property_type = str(data.get('property_type', '')).strip()
 
465
  if not property_type:
466
  analysis_sections['basic_info'].append({
467
- 'check': 'property_type_validation',
468
  'status': 'missing',
469
  'message': 'Property type is required.',
470
- 'details': 'Please specify the type of property.',
471
- 'severity': 'high',
472
- 'recommendation': 'Select a property type.'
473
  })
474
 
475
- status = str(data.get('status', '')).strip()
 
476
  if not status:
477
  analysis_sections['basic_info'].append({
478
- 'check': 'status_validation',
479
  'status': 'missing',
480
  'message': 'Property status is required.',
481
- 'details': 'Please specify if the property is for sale or rent.',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
482
  'severity': 'high',
483
- 'recommendation': 'Select the property status.'
484
  })
485
-
486
- # Market Value Analysis
487
- market_value = safe_float_convert(data.get('market_value', 0))
488
- if market_value <= 0:
489
- analysis_sections['basic_info'].append({
490
- 'check': 'market_value_validation',
491
- 'status': 'invalid',
492
- 'message': 'Invalid market value.',
493
- 'details': 'The market value must be a realistic amount.',
494
  'severity': 'high',
495
- 'recommendation': 'Please provide a valid market value.'
496
  })
497
-
498
- # Location Analysis
499
- location_analysis = analyze_location_consistency(data)
500
- for inconsistency in location_analysis['inconsistencies']:
501
  analysis_sections['location'].append({
502
- 'check': f'location_{inconsistency["type"]}',
503
- 'status': 'inconsistent',
504
- 'message': inconsistency['message'],
505
- 'details': f'Location data shows inconsistencies: {inconsistency["message"]}',
506
  'severity': 'high',
507
- 'recommendation': 'Please verify the location details.'
508
  })
509
-
510
- # Property Specifications Analysis
511
- specs_analysis = analyze_property_specifications(data)
512
- for inconsistency in specs_analysis['inconsistencies']:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
513
  analysis_sections['specifications'].append({
514
- 'check': f'specs_{inconsistency["type"]}',
515
- 'status': 'inconsistent',
516
- 'message': inconsistency['message'],
517
- 'details': f'Property specifications show inconsistencies: {inconsistency["message"]}',
518
- 'severity': 'high',
519
- 'recommendation': 'Please verify the property specifications.'
520
  })
521
 
522
- for suspicious in specs_analysis['suspicious_values']:
523
  analysis_sections['specifications'].append({
524
- 'check': f'specs_{suspicious["type"]}',
 
 
 
 
 
 
 
 
 
 
 
525
  'status': 'suspicious',
526
- 'message': suspicious['message'],
527
- 'details': f'Unusual property specification: {suspicious["message"]}',
528
  'severity': 'medium',
529
- 'recommendation': 'Please verify this specification is correct.'
530
  })
531
-
532
- # Description Analysis
533
- description = str(data.get('description', '')).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
534
  if description:
535
- desc_analysis = analyze_property_description(description, data)
536
- for inconsistency in desc_analysis['inconsistencies']:
537
- analysis_sections['fraud_indicators'].append({
538
- 'check': f'desc_{inconsistency["type"]}',
539
- 'status': 'inconsistent',
540
- 'message': inconsistency['message'],
541
- 'details': f'Description shows inconsistencies: {inconsistency["message"]}',
 
 
542
  'severity': 'high',
543
- 'recommendation': 'Please verify the property description.'
544
  })
545
-
546
- for suspicious in desc_analysis['suspicious_patterns']:
547
- analysis_sections['fraud_indicators'].append({
548
- 'check': f'desc_suspicious_{suspicious["type"]}',
549
- 'status': 'suspicious',
550
- 'message': suspicious['message'],
551
- 'details': f'Suspicious pattern in description: {suspicious["reason"]}',
552
- 'severity': 'high',
553
- 'recommendation': 'Please review the property description for accuracy.'
554
  })
555
-
556
- # Documents & Images Analysis
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
557
  media_analysis = analyze_documents_and_images(data)
558
 
559
- # Helper function to check if files exist in data
560
  def check_files_exist(files):
 
561
  if not files:
562
  return False
563
  if isinstance(files, str):
564
  files = [files]
565
- return any(f and isinstance(f, str) and f.strip() and not f.endswith('×') for f in files)
566
-
567
- # Add document analysis results
 
 
 
 
 
568
  if media_analysis['total_documents'] == 0:
569
- # Check if documents were actually provided in the data
570
- documents = data.get('documents', [])
571
  if check_files_exist(documents):
572
  # Files exist but couldn't be analyzed
573
  analysis_sections['documents'].append({
@@ -575,16 +755,16 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
575
  'status': 'error',
576
  'message': 'Could not analyze provided documents.',
577
  'details': 'Please ensure documents are in PDF format and are accessible.',
578
- 'severity': 'high',
579
  'recommendation': 'Please check document format and try again.'
580
  })
581
  else:
582
  analysis_sections['documents'].append({
583
  'check': 'documents_validation',
584
  'status': 'missing',
585
- 'message': 'Property documents are required.',
586
  'details': 'Please upload relevant property documents in PDF format.',
587
- 'severity': 'high',
588
  'recommendation': 'Upload property documents in PDF format.'
589
  })
590
  else:
@@ -595,7 +775,7 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
595
  'status': 'error',
596
  'message': f'Error analyzing document: {doc["error"]}',
597
  'details': doc['summary'],
598
- 'severity': 'high',
599
  'recommendation': 'Please ensure the document is a valid PDF file.'
600
  })
601
  elif doc['authenticity'] != 'verified':
@@ -604,14 +784,13 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
604
  'status': 'unverified',
605
  'message': 'Document authenticity could not be verified.',
606
  'details': doc['summary'],
607
- 'severity': 'medium',
608
  'recommendation': 'Please provide clear, legible documents.'
609
  })
610
 
611
- # Add image analysis results
 
612
  if media_analysis['total_images'] == 0:
613
- # Check if images were actually provided in the data
614
- images = data.get('images', [])
615
  if check_files_exist(images):
616
  # Files exist but couldn't be analyzed
617
  analysis_sections['documents'].append({
@@ -619,16 +798,16 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
619
  'status': 'error',
620
  'message': 'Could not analyze provided images.',
621
  'details': 'Please ensure images are in JPG or PNG format and are accessible.',
622
- 'severity': 'high',
623
  'recommendation': 'Please check image format and try again.'
624
  })
625
  else:
626
  analysis_sections['documents'].append({
627
  'check': 'images_validation',
628
  'status': 'missing',
629
- 'message': 'Property images are required.',
630
  'details': 'Please upload at least one image of the property.',
631
- 'severity': 'high',
632
  'recommendation': 'Upload property images in JPG or PNG format.'
633
  })
634
  else:
@@ -639,7 +818,7 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
639
  'status': 'error',
640
  'message': f'Error analyzing image: {img["error"]}',
641
  'details': img['description'],
642
- 'severity': 'high',
643
  'recommendation': 'Please ensure the image is in JPG or PNG format.'
644
  })
645
  elif not img['is_property_image']:
@@ -648,7 +827,7 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
648
  'status': 'unverified',
649
  'message': 'Image may not be property-related.',
650
  'details': img['description'],
651
- 'severity': 'medium',
652
  'recommendation': 'Please provide clear property images.'
653
  })
654
 
@@ -657,17 +836,10 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
657
  analysis_sections['documents'].append({
658
  'check': 'media_verification_scores',
659
  'status': 'valid',
660
- 'message': 'Media Verification Scores',
661
- 'details': {
662
- 'document_verification_score': media_analysis['document_verification_score'],
663
- 'image_verification_score': media_analysis['image_verification_score'],
664
- 'total_documents': media_analysis['total_documents'],
665
- 'total_images': media_analysis['total_images'],
666
- 'verified_documents': media_analysis['verified_documents'],
667
- 'verified_images': media_analysis['verified_images']
668
- },
669
  'severity': 'low',
670
- 'recommendation': 'Review media verification scores for property authenticity.'
671
  })
672
 
673
  # Generate Summary
@@ -686,13 +858,16 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
686
  'inconsistent': 0,
687
  'missing': 0,
688
  'error': 0,
689
- 'unverified': 0
 
690
  },
691
  'fraud_risk_level': 'low',
692
  'media_verification': {
693
  'document_score': media_analysis['document_verification_score'],
694
  'image_score': media_analysis['image_verification_score']
695
- }
 
 
696
  }
697
 
698
  # Calculate statistics
@@ -703,12 +878,16 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
703
  if check['status'] in summary['status_counts']:
704
  summary['status_counts'][check['status']] += 1
705
 
706
- # Calculate fraud risk level
707
  high_severity_issues = summary['severity_counts']['high']
708
- if high_severity_issues > 5:
 
 
709
  summary['fraud_risk_level'] = 'high'
710
- elif high_severity_issues > 2:
711
  summary['fraud_risk_level'] = 'medium'
 
 
712
 
713
  # Add summary to analysis
714
  analysis_sections['summary'] = [{
@@ -720,21 +899,22 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
720
  'recommendation': f'Fraud Risk Level: {summary["fraud_risk_level"].upper()}. Review all findings and address high severity issues first.'
721
  }]
722
 
723
- # Convert analysis sections to flat list
 
724
  for section_name, checks in analysis_sections.items():
725
  for check in checks:
726
- check['category'] = section_name
727
- cross_checks.append(check)
728
 
729
- return cross_checks
730
 
731
  except Exception as e:
732
- logger.error(f"Error performing cross validation: {str(e)}")
733
  return [{
734
  'check': 'cross_validation_error',
735
  'status': 'error',
736
- 'message': f'Error during validation: {str(e)}',
737
- 'category': 'System Error',
738
- 'severity': 'high',
739
  'recommendation': 'Please try again or contact support.'
740
  }]
 
69
  'suspicious_patterns': []
70
  }
71
 
72
+ # Check room number consistency - More lenient matching
73
  if 'bedroom' in analysis['room_mentions']:
74
  stated_bedrooms = safe_int_convert(property_data.get('bedrooms', 0))
75
  mentioned_bedrooms = max(analysis['room_mentions']['bedroom'])
76
+ if stated_bedrooms != mentioned_bedrooms and abs(stated_bedrooms - mentioned_bedrooms) > 1:
77
  analysis['inconsistencies'].append({
78
  'type': 'bedroom_count',
79
  'stated': stated_bedrooms,
 
84
  if 'bathroom' in analysis['room_mentions']:
85
  stated_bathrooms = safe_float_convert(property_data.get('bathrooms', 0))
86
  mentioned_bathrooms = max(analysis['room_mentions']['bathroom'])
87
+ if abs(stated_bathrooms - mentioned_bathrooms) > 1.0: # More lenient for bathrooms
88
  analysis['inconsistencies'].append({
89
  'type': 'bathroom_count',
90
  'stated': stated_bathrooms,
 
92
  'message': f'Description mentions {mentioned_bathrooms} bathrooms but listing states {stated_bathrooms} bathrooms.'
93
  })
94
 
95
+ # Check property type consistency - More flexible matching
96
  property_type = property_data.get('property_type', '').lower()
97
+ if property_type:
98
+ # Create flexible property type patterns
99
+ property_type_patterns = {
100
+ 'apartment': ['apartment', 'flat', 'unit', 'condo'],
101
+ 'house': ['house', 'home', 'villa', 'bungalow', 'townhouse'],
102
+ 'plot': ['plot', 'land', 'site'],
103
+ 'commercial': ['commercial', 'office', 'shop', 'retail']
104
+ }
105
+
106
+ # Check if property type is mentioned in description
107
+ description_lower = description.lower()
108
+ type_found = False
109
+
110
+ for category, patterns in property_type_patterns.items():
111
+ if property_type in category or any(pattern in property_type for pattern in patterns):
112
+ if any(pattern in description_lower for pattern in patterns):
113
+ type_found = True
114
+ break
115
+
116
+ # Only flag if property type is completely missing and description is substantial
117
+ if not type_found and len(description) > 100:
118
+ analysis['inconsistencies'].append({
119
+ 'type': 'property_type',
120
+ 'stated': property_type,
121
+ 'message': f'Property type "{property_type}" not mentioned in description.'
122
+ })
123
 
124
+ # Check for suspicious patterns - More lenient
125
+ suspicious_keywords = [
126
+ 'urgent sale', 'quick sale', 'no documents needed', 'cash only',
127
+ 'below market', 'distress sale', 'owner abroad', 'inheritance'
 
 
 
128
  ]
129
 
130
+ description_lower = description.lower()
131
+ for keyword in suspicious_keywords:
132
+ if keyword in description_lower:
133
  analysis['suspicious_patterns'].append({
134
+ 'pattern': keyword,
135
+ 'message': f'Description contains potentially suspicious phrase: "{keyword}"'
 
136
  })
137
 
138
  return analysis
 
442
  return analysis
443
 
444
  def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
445
+ """
446
+ Perform comprehensive cross-validation of property data.
447
+ """
 
448
  try:
 
 
 
 
 
 
 
 
449
  analysis_sections = {
450
  'basic_info': [],
451
  'location': [],
452
  'specifications': [],
453
  'documents': [],
454
+ 'images': [],
455
+ 'pricing': [],
456
+ 'description': []
457
  }
 
 
 
458
 
459
+ # CRITICAL: Check for obvious fake data patterns first
460
+ fake_data_detected = False
461
+ fake_indicators = []
462
+
463
+ # Check for numeric-only property names
464
+ property_name = data.get('property_name', '').strip()
465
+ if property_name.isdigit() or property_name in ['1', '2', '3', '4', '5']:
466
+ fake_data_detected = True
467
+ fake_indicators.append("Property name is just a number")
468
  analysis_sections['basic_info'].append({
469
+ 'check': 'property_name',
470
+ 'status': 'fraudulent',
471
+ 'message': 'Property name is just a number (highly suspicious).',
472
+ 'details': f'Property name: {property_name}',
473
  'severity': 'high',
474
+ 'recommendation': 'Provide a real property name'
475
+ })
476
+
477
+ # Check for suspiciously low values
478
+ market_value = safe_float_convert(data.get('market_value', 0))
479
+ if market_value <= 10: # Extremely low threshold
480
+ fake_data_detected = True
481
+ fake_indicators.append("Suspiciously low market value")
482
+ analysis_sections['pricing'].append({
483
+ 'check': 'market_value',
484
+ 'status': 'fraudulent',
485
+ 'message': 'Market value is suspiciously low.',
486
+ 'details': f'Market value: ₹{market_value:,.0f}',
487
+ 'severity': 'high',
488
+ 'recommendation': 'Provide realistic market value'
489
+ })
490
+
491
+ # Check for unrealistic property sizes
492
+ square_feet = safe_float_convert(data.get('sq_ft', 0))
493
+ if square_feet <= 10: # Extremely small
494
+ fake_data_detected = True
495
+ fake_indicators.append("Unrealistic property size")
496
+ analysis_sections['specifications'].append({
497
+ 'check': 'square_feet',
498
+ 'status': 'fraudulent',
499
+ 'message': 'Property size is unrealistically small.',
500
+ 'details': f'Square feet: {square_feet}',
501
+ 'severity': 'high',
502
+ 'recommendation': 'Provide realistic property size'
503
+ })
504
+
505
+ # Check for repeated suspicious numbers
506
+ all_values = [
507
+ str(data.get('bedrooms', '')),
508
+ str(data.get('bathrooms', '')),
509
+ str(data.get('total_rooms', '')),
510
+ str(data.get('parking', '')),
511
+ str(data.get('year_built', '')),
512
+ str(data.get('market_value', '')),
513
+ str(data.get('sq_ft', ''))
514
+ ]
515
+
516
+ numeric_values = [v for v in all_values if v.isdigit()]
517
+ if len(numeric_values) >= 3:
518
+ unique_values = set(numeric_values)
519
+ if len(unique_values) <= 2: # Most values are the same
520
+ fake_data_detected = True
521
+ fake_indicators.append("Multiple fields have same suspicious values")
522
+ analysis_sections['basic_info'].append({
523
+ 'check': 'repeated_values',
524
+ 'status': 'fraudulent',
525
+ 'message': 'Multiple fields contain the same suspicious values.',
526
+ 'details': f'Repeated values: {unique_values}',
527
+ 'severity': 'high',
528
+ 'recommendation': 'Provide realistic and varied property details'
529
+ })
530
+
531
+ # Basic information validation - Handle flat data structure
532
+ if not property_name or len(property_name) < 3:
533
+ analysis_sections['basic_info'].append({
534
+ 'check': 'property_name',
535
+ 'status': 'missing',
536
+ 'message': 'Property name is required.',
537
+ 'details': 'Please provide a valid property name.',
538
+ 'severity': 'high' if fake_data_detected else 'medium',
539
+ 'recommendation': 'Provide a valid property name (not just numbers)'
540
  })
541
 
542
+ # Property type validation
543
+ property_type = data.get('property_type', '').strip()
544
  if not property_type:
545
  analysis_sections['basic_info'].append({
546
+ 'check': 'property_type',
547
  'status': 'missing',
548
  'message': 'Property type is required.',
549
+ 'details': 'Please specify the property type.',
550
+ 'severity': 'high' if fake_data_detected else 'medium',
551
+ 'recommendation': 'Specify property type (apartment, house, etc.)'
552
  })
553
 
554
+ # Status validation
555
+ status = data.get('status', '').strip()
556
  if not status:
557
  analysis_sections['basic_info'].append({
558
+ 'check': 'status',
559
  'status': 'missing',
560
  'message': 'Property status is required.',
561
+ 'details': 'Please specify if property is for sale or rent.',
562
+ 'severity': 'high' if fake_data_detected else 'medium',
563
+ 'recommendation': 'Specify property status (for sale, for rent, etc.)'
564
+ })
565
+
566
+ # Location validation - Handle flat data structure
567
+ address = data.get('address', '').strip()
568
+ city = data.get('city', '').strip()
569
+ state = data.get('state', '').strip()
570
+ postal_code = data.get('postal_code', '').strip()
571
+
572
+ if not address:
573
+ analysis_sections['location'].append({
574
+ 'check': 'address',
575
+ 'status': 'missing',
576
+ 'message': 'Property address is required.',
577
+ 'details': 'Please provide the complete property address.',
578
  'severity': 'high',
579
+ 'recommendation': 'Provide complete property address'
580
  })
581
+
582
+ if not city:
583
+ analysis_sections['location'].append({
584
+ 'check': 'city',
585
+ 'status': 'missing',
586
+ 'message': 'City is required.',
587
+ 'details': 'Please specify the city.',
 
 
588
  'severity': 'high',
589
+ 'recommendation': 'Specify the city'
590
  })
591
+
592
+ if not state:
 
 
593
  analysis_sections['location'].append({
594
+ 'check': 'state',
595
+ 'status': 'missing',
596
+ 'message': 'State is required.',
597
+ 'details': 'Please specify the state.',
598
  'severity': 'high',
599
+ 'recommendation': 'Specify the state'
600
  })
601
+
602
+ # Postal code validation - more lenient
603
+ if postal_code:
604
+ if not postal_code.isdigit() or len(postal_code) < 5:
605
+ analysis_sections['location'].append({
606
+ 'check': 'postal_code',
607
+ 'status': 'invalid',
608
+ 'message': 'Invalid postal code format.',
609
+ 'details': f'Postal code: {postal_code}',
610
+ 'severity': 'low',
611
+ 'recommendation': 'Provide a valid postal code'
612
+ })
613
+
614
+ # Specifications validation - Handle flat data structure
615
+ bedrooms = safe_int_convert(data.get('bedrooms', 0))
616
+ bathrooms = safe_float_convert(data.get('bathrooms', 0))
617
+ year_built = safe_int_convert(data.get('year_built', 0))
618
+
619
+ # Much stricter validation ranges
620
+ if bedrooms <= 0 or bedrooms > 20:
621
  analysis_sections['specifications'].append({
622
+ 'check': 'bedrooms',
623
+ 'status': 'fraudulent' if bedrooms <= 0 else 'suspicious',
624
+ 'message': 'Unrealistic number of bedrooms.',
625
+ 'details': f'Bedrooms: {bedrooms}',
626
+ 'severity': 'high' if bedrooms <= 0 else 'medium',
627
+ 'recommendation': 'Provide realistic bedroom count'
628
  })
629
 
630
+ if bathrooms <= 0 or bathrooms > 15:
631
  analysis_sections['specifications'].append({
632
+ 'check': 'bathrooms',
633
+ 'status': 'fraudulent' if bathrooms <= 0 else 'suspicious',
634
+ 'message': 'Unrealistic number of bathrooms.',
635
+ 'details': f'Bathrooms: {bathrooms}',
636
+ 'severity': 'high' if bathrooms <= 0 else 'medium',
637
+ 'recommendation': 'Provide realistic bathroom count'
638
+ })
639
+
640
+ current_year = datetime.now().year
641
+ if year_built > current_year or year_built < 1800:
642
+ analysis_sections['specifications'].append({
643
+ 'check': 'year_built',
644
  'status': 'suspicious',
645
+ 'message': 'Unrealistic year built.',
646
+ 'details': f'Year built: {year_built}',
647
  'severity': 'medium',
648
+ 'recommendation': 'Provide realistic year built'
649
  })
650
+
651
+ # Pricing validation - Handle flat data structure
652
+ if market_value <= 0:
653
+ analysis_sections['pricing'].append({
654
+ 'check': 'market_value',
655
+ 'status': 'missing',
656
+ 'message': 'Market value is required.',
657
+ 'details': 'Please provide the property market value.',
658
+ 'severity': 'high',
659
+ 'recommendation': 'Provide property market value'
660
+ })
661
+ elif market_value < 100000: # Minimum reasonable price
662
+ analysis_sections['pricing'].append({
663
+ 'check': 'market_value',
664
+ 'status': 'fraudulent' if market_value < 10000 else 'suspicious',
665
+ 'message': 'Unusually low market value.',
666
+ 'details': f'Market value: ₹{market_value:,.0f}',
667
+ 'severity': 'high' if market_value < 10000 else 'medium',
668
+ 'recommendation': 'Verify market value is accurate'
669
+ })
670
+
671
+ # Description validation
672
+ description = data.get('description', '').strip()
673
  if description:
674
+ # Check for fake description patterns
675
+ if description.isdigit() or description in ['1', '2', '3', '4', '5']:
676
+ fake_data_detected = True
677
+ fake_indicators.append("Description is just a number")
678
+ analysis_sections['description'].append({
679
+ 'check': 'description',
680
+ 'status': 'fraudulent',
681
+ 'message': 'Description is just a number (highly suspicious).',
682
+ 'details': f'Description: {description}',
683
  'severity': 'high',
684
+ 'recommendation': 'Provide a real property description'
685
  })
686
+ elif len(description) < 50:
687
+ analysis_sections['description'].append({
688
+ 'check': 'description',
689
+ 'status': 'insufficient',
690
+ 'message': 'Property description is too short.',
691
+ 'details': f'Description length: {len(description)} characters',
692
+ 'severity': 'medium',
693
+ 'recommendation': 'Provide detailed property description'
 
694
  })
695
+ else:
696
+ # Create property data dict for description analysis
697
+ property_data = {
698
+ 'bedrooms': bedrooms,
699
+ 'bathrooms': bathrooms,
700
+ 'property_type': property_type
701
+ }
702
+ description_analysis = analyze_property_description(description, property_data)
703
+
704
+ for inconsistency in description_analysis['inconsistencies']:
705
+ analysis_sections['description'].append({
706
+ 'check': f"desc_{inconsistency['type']}",
707
+ 'status': 'inconsistent',
708
+ 'message': inconsistency['message'],
709
+ 'details': f"Stated: {inconsistency.get('stated', 'N/A')}, Mentioned: {inconsistency.get('mentioned', 'N/A')}",
710
+ 'severity': 'low',
711
+ 'recommendation': 'Review and update property description for consistency'
712
+ })
713
+
714
+ for pattern in description_analysis['suspicious_patterns']:
715
+ analysis_sections['description'].append({
716
+ 'check': 'desc_suspicious_pattern',
717
+ 'status': 'suspicious',
718
+ 'message': pattern['message'],
719
+ 'details': pattern['pattern'],
720
+ 'severity': 'medium',
721
+ 'recommendation': 'Review description for suspicious language'
722
+ })
723
+ else:
724
+ analysis_sections['description'].append({
725
+ 'check': 'description',
726
+ 'status': 'missing',
727
+ 'message': 'Property description is required.',
728
+ 'details': 'Please provide a detailed property description.',
729
+ 'severity': 'high' if fake_data_detected else 'medium',
730
+ 'recommendation': 'Add more detailed property description'
731
+ })
732
+
733
+ # Media analysis - Handle flat data structure
734
  media_analysis = analyze_documents_and_images(data)
735
 
 
736
  def check_files_exist(files):
737
+ """Improved file existence check"""
738
  if not files:
739
  return False
740
  if isinstance(files, str):
741
  files = [files]
742
+ # Check for actual file content, not just names
743
+ return any(f and isinstance(f, str) and f.strip() and
744
+ not f.endswith('×') and
745
+ (f.endswith('.pdf') or f.endswith('.jpg') or f.endswith('.jpeg') or f.endswith('.png'))
746
+ for f in files)
747
+
748
+ # Document analysis - More lenient
749
+ documents = data.get('documents', [])
750
  if media_analysis['total_documents'] == 0:
 
 
751
  if check_files_exist(documents):
752
  # Files exist but couldn't be analyzed
753
  analysis_sections['documents'].append({
 
755
  'status': 'error',
756
  'message': 'Could not analyze provided documents.',
757
  'details': 'Please ensure documents are in PDF format and are accessible.',
758
+ 'severity': 'medium',
759
  'recommendation': 'Please check document format and try again.'
760
  })
761
  else:
762
  analysis_sections['documents'].append({
763
  'check': 'documents_validation',
764
  'status': 'missing',
765
+ 'message': 'Property documents are recommended.',
766
  'details': 'Please upload relevant property documents in PDF format.',
767
+ 'severity': 'medium',
768
  'recommendation': 'Upload property documents in PDF format.'
769
  })
770
  else:
 
775
  'status': 'error',
776
  'message': f'Error analyzing document: {doc["error"]}',
777
  'details': doc['summary'],
778
+ 'severity': 'medium',
779
  'recommendation': 'Please ensure the document is a valid PDF file.'
780
  })
781
  elif doc['authenticity'] != 'verified':
 
784
  'status': 'unverified',
785
  'message': 'Document authenticity could not be verified.',
786
  'details': doc['summary'],
787
+ 'severity': 'low',
788
  'recommendation': 'Please provide clear, legible documents.'
789
  })
790
 
791
+ # Image analysis - More lenient
792
+ images = data.get('images', [])
793
  if media_analysis['total_images'] == 0:
 
 
794
  if check_files_exist(images):
795
  # Files exist but couldn't be analyzed
796
  analysis_sections['documents'].append({
 
798
  'status': 'error',
799
  'message': 'Could not analyze provided images.',
800
  'details': 'Please ensure images are in JPG or PNG format and are accessible.',
801
+ 'severity': 'medium',
802
  'recommendation': 'Please check image format and try again.'
803
  })
804
  else:
805
  analysis_sections['documents'].append({
806
  'check': 'images_validation',
807
  'status': 'missing',
808
+ 'message': 'Property images are recommended.',
809
  'details': 'Please upload at least one image of the property.',
810
+ 'severity': 'medium',
811
  'recommendation': 'Upload property images in JPG or PNG format.'
812
  })
813
  else:
 
818
  'status': 'error',
819
  'message': f'Error analyzing image: {img["error"]}',
820
  'details': img['description'],
821
+ 'severity': 'medium',
822
  'recommendation': 'Please ensure the image is in JPG or PNG format.'
823
  })
824
  elif not img['is_property_image']:
 
827
  'status': 'unverified',
828
  'message': 'Image may not be property-related.',
829
  'details': img['description'],
830
+ 'severity': 'low',
831
  'recommendation': 'Please provide clear property images.'
832
  })
833
 
 
836
  analysis_sections['documents'].append({
837
  'check': 'media_verification_scores',
838
  'status': 'valid',
839
+ 'message': 'Media verification completed.',
840
+ 'details': f'Documents: {media_analysis["total_documents"]}, Images: {media_analysis["total_images"]}',
 
 
 
 
 
 
 
841
  'severity': 'low',
842
+ 'recommendation': 'Media verification successful.'
843
  })
844
 
845
  # Generate Summary
 
858
  'inconsistent': 0,
859
  'missing': 0,
860
  'error': 0,
861
+ 'unverified': 0,
862
+ 'fraudulent': 0
863
  },
864
  'fraud_risk_level': 'low',
865
  'media_verification': {
866
  'document_score': media_analysis['document_verification_score'],
867
  'image_score': media_analysis['image_verification_score']
868
+ },
869
+ 'fake_data_detected': fake_data_detected,
870
+ 'fake_indicators': fake_indicators
871
  }
872
 
873
  # Calculate statistics
 
878
  if check['status'] in summary['status_counts']:
879
  summary['status_counts'][check['status']] += 1
880
 
881
+ # Calculate fraud risk level - Much stricter
882
  high_severity_issues = summary['severity_counts']['high']
883
+ fraudulent_issues = summary['status_counts']['fraudulent']
884
+
885
+ if fake_data_detected or fraudulent_issues > 0 or high_severity_issues > 3:
886
  summary['fraud_risk_level'] = 'high'
887
+ elif high_severity_issues > 1:
888
  summary['fraud_risk_level'] = 'medium'
889
+ else:
890
+ summary['fraud_risk_level'] = 'low'
891
 
892
  # Add summary to analysis
893
  analysis_sections['summary'] = [{
 
899
  'recommendation': f'Fraud Risk Level: {summary["fraud_risk_level"].upper()}. Review all findings and address high severity issues first.'
900
  }]
901
 
902
+ # Flatten all sections into a single list
903
+ all_checks = []
904
  for section_name, checks in analysis_sections.items():
905
  for check in checks:
906
+ check['section'] = section_name
907
+ all_checks.append(check)
908
 
909
+ return all_checks
910
 
911
  except Exception as e:
912
+ logger.error(f"Error in cross validation: {str(e)}")
913
  return [{
914
  'check': 'cross_validation_error',
915
  'status': 'error',
916
+ 'message': f'Cross validation failed: {str(e)}',
917
+ 'details': 'An error occurred during cross validation.',
918
+ 'severity': 'medium',
919
  'recommendation': 'Please try again or contact support.'
920
  }]
models/fraud_classification.py CHANGED
@@ -1,178 +1,161 @@
1
  # models/fraud_classification.py
2
 
3
- import re
4
  from .model_loader import load_model
5
  from .logging_config import logger
 
6
 
7
  def classify_fraud(property_details, description):
8
  """
9
- Classify the risk of fraud in a property listing using zero-shot classification.
10
- This function analyzes property details and description to identify potential fraud indicators.
11
  """
12
  try:
13
- # Initialize fraud classification result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  fraud_classification = {
15
  'alert_level': 'minimal',
16
  'alert_score': 0.0,
 
17
  'high_risk': [],
18
  'medium_risk': [],
19
  'low_risk': [],
20
- 'confidence_scores': {}
21
  }
22
-
23
- # Accept property_details as dict or str
24
- if isinstance(property_details, dict):
25
- details_str = '\n'.join(f"{k}: {v}" for k, v in property_details.items())
26
- else:
27
- details_str = str(property_details)
28
- text_to_analyze = f"{details_str}\n{description if description else ''}"
29
-
30
- # Define risk categories for zero-shot classification
31
- risk_categories = [
32
- "fraudulent listing",
33
- "misleading information",
34
- "fake property",
35
- "scam attempt",
36
- "legitimate listing"
37
- ]
38
-
39
- # Perform zero-shot classification with better error handling
40
- try:
41
- classifier = load_model("zero-shot-classification")
42
- if hasattr(classifier, 'task_type') and classifier.task_type == "zero-shot-classification":
43
- # Using fallback classifier
44
- result = classifier(text_to_analyze, risk_categories)
45
- else:
46
- # Using actual model
47
- result = classifier(text_to_analyze, risk_categories, multi_label=True)
48
- except Exception as e:
49
- logger.error(f"Model error in fraud classification: {str(e)}")
50
- # Use simple keyword-based fallback
51
- result = simple_fraud_classification(text_to_analyze, risk_categories)
52
-
53
- # Process classification results
54
  fraud_score = 0.0
55
  if isinstance(result, dict) and 'scores' in result:
56
  for label, score in zip(result.get('labels', []), result.get('scores', [])):
57
  if label != "legitimate listing":
58
  try:
59
  score_val = float(score)
 
 
 
 
 
 
 
60
  except Exception:
61
  score_val = 0.0
62
  fraud_score += score_val
63
  fraud_classification['confidence_scores'][label] = score_val
64
  else:
65
  # Handle fallback result
66
- fraud_score = 0.1 # Default low score for fallback
67
 
68
- # Normalize fraud score to 0-1 range
69
  try:
70
- fraud_score = min(1.0, fraud_score / (len(risk_categories) - 1))
71
  except Exception:
72
  fraud_score = 0.0
73
  fraud_classification['alert_score'] = fraud_score
74
-
75
- # Define fraud indicators to check
76
- fraud_indicators = {
77
- 'high_risk': [
78
- r'urgent|immediate|hurry|limited time|special offer',
79
- r'bank|transfer|wire|payment|money',
80
- r'fake|scam|fraud|illegal|unauthorized',
81
- r'guaranteed|promised|assured|certain',
82
- r'contact.*whatsapp|whatsapp.*contact',
83
- r'price.*negotiable|negotiable.*price',
84
- r'no.*documents|documents.*not.*required',
85
- r'cash.*only|only.*cash',
86
- r'off.*market|market.*off',
87
- r'under.*table|table.*under'
88
- ],
89
- 'medium_risk': [
90
- r'unverified|unconfirmed|unchecked',
91
- r'partial|incomplete|missing',
92
- r'different.*location|location.*different',
93
- r'price.*increased|increased.*price',
94
- r'no.*photos|photos.*not.*available',
95
- r'contact.*email|email.*contact',
96
- r'agent.*not.*available|not.*available.*agent',
97
- r'property.*not.*viewable|not.*viewable.*property',
98
- r'price.*changed|changed.*price',
99
- r'details.*updated|updated.*details'
100
- ],
101
- 'low_risk': [
102
- r'new.*listing|listing.*new',
103
- r'recent.*update|update.*recent',
104
- r'price.*reduced|reduced.*price',
105
- r'contact.*phone|phone.*contact',
106
- r'agent.*available|available.*agent',
107
- r'property.*viewable|viewable.*property',
108
- r'photos.*available|available.*photos',
109
- r'documents.*available|available.*documents',
110
- r'price.*fixed|fixed.*price',
111
- r'details.*complete|complete.*details'
112
- ]
113
- }
114
-
115
- # Check for fraud indicators in text
116
- for risk_level, patterns in fraud_indicators.items():
117
- for pattern in patterns:
118
- try:
119
- matches = re.finditer(pattern, text_to_analyze, re.IGNORECASE)
120
- for match in matches:
121
- indicator = match.group(0)
122
- if indicator not in fraud_classification[risk_level]:
123
- fraud_classification[risk_level].append(indicator)
124
- except Exception as e:
125
- logger.warning(f"Regex error in fraud indicator pattern '{pattern}': {str(e)}")
126
-
127
- # Determine alert level based on fraud score and indicators
128
- try:
129
- if fraud_score > 0.7 or len(fraud_classification['high_risk']) > 0:
130
- fraud_classification['alert_level'] = 'critical'
131
- elif fraud_score > 0.5 or len(fraud_classification['medium_risk']) > 2:
132
- fraud_classification['alert_level'] = 'high'
133
- elif fraud_score > 0.3 or len(fraud_classification['medium_risk']) > 0:
134
- fraud_classification['alert_level'] = 'medium'
135
- elif fraud_score > 0.1 or len(fraud_classification['low_risk']) > 0:
136
- fraud_classification['alert_level'] = 'low'
137
- else:
138
- fraud_classification['alert_level'] = 'minimal'
139
- except Exception as e:
140
- logger.warning(f"Error determining alert level: {str(e)}")
141
  fraud_classification['alert_level'] = 'minimal'
142
-
143
- # Additional checks for common fraud patterns
144
- try:
145
- if re.search(r'price.*too.*good|too.*good.*price', text_to_analyze, re.IGNORECASE):
146
- fraud_classification['high_risk'].append("Unrealistically low price")
147
- if re.search(r'no.*inspection|inspection.*not.*allowed', text_to_analyze, re.IGNORECASE):
148
- fraud_classification['high_risk'].append("No property inspection allowed")
149
- if re.search(r'owner.*abroad|abroad.*owner', text_to_analyze, re.IGNORECASE):
150
- fraud_classification['medium_risk'].append("Owner claims to be abroad")
151
- if re.search(r'agent.*unavailable|unavailable.*agent', text_to_analyze, re.IGNORECASE):
152
- fraud_classification['medium_risk'].append("Agent unavailable for verification")
153
- except Exception as e:
154
- logger.warning(f"Error in additional fraud pattern checks: {str(e)}")
155
-
156
- # Check for inconsistencies in property details
157
- try:
158
- if isinstance(property_details, dict) and 'price' in property_details and 'market_value' in property_details:
159
- price_val = float(str(property_details['price']).replace(',', '').replace('₹', '').strip())
160
- market_value_val = float(str(property_details['market_value']).replace(',', '').replace('₹', '').strip())
161
- if price_val < market_value_val * 0.5:
162
- fraud_classification['high_risk'].append("Price significantly below market value")
163
- except Exception as e:
164
- logger.warning(f"Error checking price/market_value: {str(e)}")
165
-
166
  return fraud_classification
 
167
  except Exception as e:
168
  logger.error(f"Error in fraud classification: {str(e)}")
169
  return {
170
- 'alert_level': 'error',
171
- 'alert_score': 1.0,
172
- 'high_risk': [f"Error in fraud classification: {str(e)}"],
 
173
  'medium_risk': [],
174
  'low_risk': [],
175
- 'confidence_scores': {}
176
  }
177
 
178
  def simple_fraud_classification(text, categories):
 
1
  # models/fraud_classification.py
2
 
 
3
  from .model_loader import load_model
4
  from .logging_config import logger
5
+ import re
6
 
7
  def classify_fraud(property_details, description):
8
  """
9
+ Classify the fraud risk of a property listing using AI.
 
10
  """
11
  try:
12
+ # Combine property details and description for analysis
13
+ text_to_analyze = f"{property_details} {description}"
14
+
15
+ # CRITICAL: Check for obvious fake data patterns first
16
+ fake_patterns = [
17
+ r'\b\d+\s*$', # Numbers at end of lines
18
+ r'^\d+$', # Only numbers
19
+ r'\b\d{1,2}\s*$', # Single or double digits
20
+ r'price.*\d{1,3}', # Very low prices
21
+ r'size.*\d{1,3}', # Very small sizes
22
+ r'bedrooms.*\d{1,2}', # Very few bedrooms
23
+ r'bathrooms.*\d{1,2}', # Very few bathrooms
24
+ ]
25
+
26
+ fake_detected = False
27
+ for pattern in fake_patterns:
28
+ if re.search(pattern, text_to_analyze.lower()):
29
+ fake_detected = True
30
+ break
31
+
32
+ # Check for repeated numbers (like "2, 2, 2, 2")
33
+ numbers = re.findall(r'\b\d+\b', text_to_analyze.lower())
34
+ if len(numbers) >= 3:
35
+ unique_numbers = set(numbers)
36
+ if len(unique_numbers) <= 2: # If most numbers are the same
37
+ fake_detected = True
38
+
39
+ # Check for extremely low values
40
+ if any(word in text_to_analyze.lower() for word in ['₹2', '₹1', '₹3', '₹4', '₹5']):
41
+ fake_detected = True
42
+
43
+ # Check for very small property sizes
44
+ if any(word in text_to_analyze.lower() for word in ['2 sq ft', '1 sq ft', '3 sq ft', '4 sq ft', '5 sq ft']):
45
+ fake_detected = True
46
+
47
+ # If fake data is detected, return high fraud score immediately
48
+ if fake_detected:
49
+ return {
50
+ 'alert_level': 'high',
51
+ 'alert_score': 0.9, # 90% fraud score for fake data
52
+ 'confidence_scores': {
53
+ 'high risk listing': 0.9,
54
+ 'potential fraud': 0.8,
55
+ 'suspicious listing': 0.7,
56
+ 'legitimate listing': 0.1
57
+ },
58
+ 'high_risk': ['Fake data patterns detected'],
59
+ 'medium_risk': [],
60
+ 'low_risk': [],
61
+ 'reasoning': 'This property was classified as high risk due to detected fake data patterns (repeated numbers, suspiciously low values, unrealistic specifications).'
62
+ }
63
+
64
+ # Use a more lenient classification approach for legitimate-looking data
65
+ classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
66
+
67
+ # More balanced risk categories
68
+ risk_categories = [
69
+ "legitimate listing",
70
+ "suspicious listing",
71
+ "potential fraud",
72
+ "high risk listing"
73
+ ]
74
+
75
+ # Classify the text
76
+ result = classifier(text_to_analyze[:1000], risk_categories, multi_label=False)
77
+
78
  fraud_classification = {
79
  'alert_level': 'minimal',
80
  'alert_score': 0.0,
81
+ 'confidence_scores': {},
82
  'high_risk': [],
83
  'medium_risk': [],
84
  'low_risk': [],
85
+ 'reasoning': ''
86
  }
87
+
88
+ # Process classification results - More lenient for legitimate data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  fraud_score = 0.0
90
  if isinstance(result, dict) and 'scores' in result:
91
  for label, score in zip(result.get('labels', []), result.get('scores', [])):
92
  if label != "legitimate listing":
93
  try:
94
  score_val = float(score)
95
+ # Reduce the impact of suspicious classifications
96
+ if label == "suspicious listing":
97
+ score_val *= 0.5 # Reduce suspicious impact by 50%
98
+ elif label == "potential fraud":
99
+ score_val *= 0.7 # Reduce potential fraud impact by 30%
100
+ elif label == "high risk listing":
101
+ score_val *= 0.8 # Reduce high risk impact by 20%
102
  except Exception:
103
  score_val = 0.0
104
  fraud_score += score_val
105
  fraud_classification['confidence_scores'][label] = score_val
106
  else:
107
  # Handle fallback result
108
+ fraud_score = 0.05 # Reduced from 0.1 to 0.05
109
 
110
+ # Normalize fraud score to 0-1 range with more lenient scaling
111
  try:
112
+ fraud_score = min(1.0, fraud_score / (len(risk_categories) - 1) * 0.7) # Reduced by 30%
113
  except Exception:
114
  fraud_score = 0.0
115
  fraud_classification['alert_score'] = fraud_score
116
+
117
+ # Determine alert level with more lenient thresholds
118
+ if fraud_score >= 0.7: # Increased from 0.6
119
+ fraud_classification['alert_level'] = 'high'
120
+ elif fraud_score >= 0.4: # Increased from 0.3
121
+ fraud_classification['alert_level'] = 'medium'
122
+ elif fraud_score >= 0.2: # Increased from 0.1
123
+ fraud_classification['alert_level'] = 'low'
124
+ else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  fraud_classification['alert_level'] = 'minimal'
126
+
127
+ # Generate reasoning based on scores
128
+ reasoning_parts = []
129
+
130
+ if fraud_score < 0.2:
131
+ reasoning_parts.append("This property was classified as legitimate based on AI analysis of the listing details.")
132
+ elif fraud_score < 0.4:
133
+ reasoning_parts.append("This property was classified as low risk based on AI analysis of the listing details.")
134
+ elif fraud_score < 0.7:
135
+ reasoning_parts.append("This property was classified as medium risk based on AI analysis of the listing details.")
136
+ else:
137
+ reasoning_parts.append("This property was classified as high risk based on AI analysis of the listing details.")
138
+
139
+ # Add specific risk indicators if any
140
+ if fraud_classification['confidence_scores']:
141
+ highest_risk = max(fraud_classification['confidence_scores'].items(), key=lambda x: x[1])
142
+ if highest_risk[1] > 0.3:
143
+ reasoning_parts.append(f"Primary concern: {highest_risk[0]} (confidence: {highest_risk[1]:.0%})")
144
+
145
+ fraud_classification['reasoning'] = " ".join(reasoning_parts)
146
+
 
 
 
147
  return fraud_classification
148
+
149
  except Exception as e:
150
  logger.error(f"Error in fraud classification: {str(e)}")
151
  return {
152
+ 'alert_level': 'minimal',
153
+ 'alert_score': 0.05, # Reduced from 0.1
154
+ 'confidence_scores': {},
155
+ 'high_risk': [],
156
  'medium_risk': [],
157
  'low_risk': [],
158
+ 'reasoning': f'Fraud analysis failed: {str(e)}'
159
  }
160
 
161
  def simple_fraud_classification(text, categories):
models/trust_score.py CHANGED
@@ -2,54 +2,173 @@
2
 
3
  from .model_loader import load_model
4
  from .logging_config import logger
 
5
 
6
  def generate_trust_score(text, image_analysis, pdf_analysis):
7
  try:
8
- # Use a simpler approach to avoid timeouts
9
- trust_score = 50.0 # Start with neutral score
10
  reasoning_parts = []
11
 
12
  # Simple text-based trust indicators
13
  text_lower = str(text).lower()
14
 
15
- # Positive indicators
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  positive_indicators = [
17
  'verified', 'authentic', 'genuine', 'real', 'legitimate',
18
- 'complete', 'detailed', 'professional', 'official', 'certified'
 
 
 
 
19
  ]
20
 
21
- # Negative indicators
22
  negative_indicators = [
23
  'fake', 'scam', 'fraud', 'suspicious', 'unverified',
24
- 'incomplete', 'missing', 'unclear', 'doubtful', 'questionable'
 
 
 
25
  ]
26
 
27
  # Count positive and negative indicators
28
  positive_count = sum(1 for indicator in positive_indicators if indicator in text_lower)
29
  negative_count = sum(1 for indicator in negative_indicators if indicator in text_lower)
30
 
31
- # Adjust score based on indicators
32
- if positive_count > 0:
33
- trust_score += min(20, positive_count * 5)
34
  reasoning_parts.append(f"Found {positive_count} positive trust indicators")
35
 
36
  if negative_count > 0:
37
- trust_score -= min(30, negative_count * 10)
38
  reasoning_parts.append(f"Found {negative_count} negative trust indicators")
39
 
40
- # Image analysis contribution
41
  if image_analysis:
42
  image_count = len(image_analysis) if isinstance(image_analysis, list) else 1
43
  if image_count > 0:
44
- trust_score += min(15, image_count * 3)
45
- reasoning_parts.append(f"Property has {image_count} images")
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- # PDF analysis contribution
48
  if pdf_analysis:
49
  pdf_count = len(pdf_analysis) if isinstance(pdf_analysis, list) else 1
50
  if pdf_count > 0:
51
- trust_score += min(15, pdf_count * 5)
52
- reasoning_parts.append(f"Property has {pdf_count} documents")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  # Ensure score is within bounds
55
  trust_score = max(0, min(100, trust_score))
@@ -64,4 +183,4 @@ def generate_trust_score(text, image_analysis, pdf_analysis):
64
 
65
  except Exception as e:
66
  logger.error(f"Error in trust score generation: {str(e)}")
67
- return 35.0, f"Trust analysis failed: {str(e)}"
 
2
 
3
  from .model_loader import load_model
4
  from .logging_config import logger
5
+ import re
6
 
7
  def generate_trust_score(text, image_analysis, pdf_analysis):
8
  try:
9
+ # Start with a much lower base score and be very strict
10
+ trust_score = 20.0 # Drastically reduced from 60.0
11
  reasoning_parts = []
12
 
13
  # Simple text-based trust indicators
14
  text_lower = str(text).lower()
15
 
16
+ # CRITICAL: Check for obvious fake data patterns
17
+ fake_patterns = [
18
+ r'\b\d+\s*$', # Numbers at end of lines
19
+ r'^\d+$', # Only numbers
20
+ r'\b\d{1,2}\s*$', # Single or double digits
21
+ r'price.*\d{1,3}', # Very low prices
22
+ r'size.*\d{1,3}', # Very small sizes
23
+ r'bedrooms.*\d{1,2}', # Very few bedrooms
24
+ r'bathrooms.*\d{1,2}', # Very few bathrooms
25
+ ]
26
+
27
+ fake_detected = False
28
+ for pattern in fake_patterns:
29
+ if re.search(pattern, text_lower):
30
+ fake_detected = True
31
+ trust_score -= 30 # Heavy penalty for fake patterns
32
+ reasoning_parts.append("Detected suspicious number patterns")
33
+ break
34
+
35
+ # Check for repeated numbers (like "2, 2, 2, 2")
36
+ numbers = re.findall(r'\b\d+\b', text_lower)
37
+ if len(numbers) >= 3:
38
+ unique_numbers = set(numbers)
39
+ if len(unique_numbers) <= 2: # If most numbers are the same
40
+ fake_detected = True
41
+ trust_score -= 40 # Very heavy penalty
42
+ reasoning_parts.append("Detected repeated number patterns (likely fake data)")
43
+
44
+ # Check for extremely low values
45
+ if any(word in text_lower for word in ['₹2', '₹1', '₹3', '₹4', '₹5']):
46
+ fake_detected = True
47
+ trust_score -= 50 # Extremely heavy penalty
48
+ reasoning_parts.append("Detected suspiciously low pricing")
49
+
50
+ # Check for very small property sizes
51
+ if any(word in text_lower for word in ['2 sq ft', '1 sq ft', '3 sq ft', '4 sq ft', '5 sq ft']):
52
+ fake_detected = True
53
+ trust_score -= 40
54
+ reasoning_parts.append("Detected unrealistic property size")
55
+
56
+ # Check for generic property names
57
+ if any(word in text_lower for word in ['2', '1', '3', '4', '5']) and len(text.strip()) < 50:
58
+ fake_detected = True
59
+ trust_score -= 30
60
+ reasoning_parts.append("Detected generic/numeric property name")
61
+
62
+ # Positive indicators - Much more strict
63
  positive_indicators = [
64
  'verified', 'authentic', 'genuine', 'real', 'legitimate',
65
+ 'complete', 'detailed', 'professional', 'official', 'certified',
66
+ 'luxurious', 'modern', 'spacious', 'well-maintained', 'prime location',
67
+ 'amenities', 'security', 'parking', 'garden', 'balcony',
68
+ 'renovated', 'furnished', 'semi-furnished', 'ready to move',
69
+ 'clear title', 'no litigation', 'approved', 'registered'
70
  ]
71
 
72
+ # Negative indicators - More comprehensive
73
  negative_indicators = [
74
  'fake', 'scam', 'fraud', 'suspicious', 'unverified',
75
+ 'incomplete', 'missing', 'unclear', 'doubtful', 'questionable',
76
+ 'urgent sale', 'quick sale', 'no documents needed', 'cash only',
77
+ 'below market', 'distress sale', 'owner abroad', 'inheritance',
78
+ 'unclear title', 'litigation', 'dispute', 'encroachment'
79
  ]
80
 
81
  # Count positive and negative indicators
82
  positive_count = sum(1 for indicator in positive_indicators if indicator in text_lower)
83
  negative_count = sum(1 for indicator in negative_indicators if indicator in text_lower)
84
 
85
+ # Adjust score based on indicators - Much stricter
86
+ if positive_count > 0 and not fake_detected:
87
+ trust_score += min(15, positive_count * 2) # Reduced from 25 to 15
88
  reasoning_parts.append(f"Found {positive_count} positive trust indicators")
89
 
90
  if negative_count > 0:
91
+ trust_score -= min(30, negative_count * 8) # Increased penalty from 20 to 30
92
  reasoning_parts.append(f"Found {negative_count} negative trust indicators")
93
 
94
+ # Image analysis contribution - Much stricter
95
  if image_analysis:
96
  image_count = len(image_analysis) if isinstance(image_analysis, list) else 1
97
  if image_count > 0:
98
+ # Check if images are actually property-related
99
+ property_related_count = sum(1 for img in image_analysis if img.get('is_property_related', False))
100
+ if property_related_count > 0:
101
+ trust_score += min(10, property_related_count * 3) # Reduced from 20 to 10
102
+ reasoning_parts.append(f"Property has {property_related_count} property-related images")
103
+ else:
104
+ trust_score -= 20 # Penalty for non-property images
105
+ reasoning_parts.append("No property-related images detected")
106
+
107
+ # Bonus for multiple high-quality images
108
+ if property_related_count >= 3:
109
+ trust_score += 5
110
+ reasoning_parts.append("Multiple property images provided")
111
 
112
+ # PDF analysis contribution - Much stricter
113
  if pdf_analysis:
114
  pdf_count = len(pdf_analysis) if isinstance(pdf_analysis, list) else 1
115
  if pdf_count > 0:
116
+ # Check if documents are actually property-related
117
+ property_related_docs = sum(1 for doc in pdf_analysis if doc.get('is_property_related', False))
118
+ if property_related_docs > 0:
119
+ trust_score += min(10, property_related_docs * 4) # Reduced from 20 to 10
120
+ reasoning_parts.append(f"Property has {property_related_docs} property-related documents")
121
+ else:
122
+ trust_score -= 15 # Penalty for non-property documents
123
+ reasoning_parts.append("No property-related documents detected")
124
+
125
+ # Bonus for multiple documents
126
+ if property_related_docs >= 2:
127
+ trust_score += 3
128
+ reasoning_parts.append("Multiple supporting documents provided")
129
+
130
+ # Text quality assessment - Much stricter
131
+ if text and len(text) > 200 and not fake_detected:
132
+ trust_score += 8
133
+ reasoning_parts.append("Detailed property description provided")
134
+ elif text and len(text) > 100 and not fake_detected:
135
+ trust_score += 4
136
+ reasoning_parts.append("Adequate property description provided")
137
+ elif len(text) < 50:
138
+ trust_score -= 20 # Heavy penalty for very short descriptions
139
+ reasoning_parts.append("Very short property description")
140
+
141
+ # Location quality assessment - Much stricter
142
+ if 'hyderabad' in text_lower or 'mumbai' in text_lower or 'delhi' in text_lower or 'bangalore' in text_lower:
143
+ if not fake_detected:
144
+ trust_score += 3
145
+ reasoning_parts.append("Property in major city")
146
+
147
+ # Property type assessment - Much stricter
148
+ if any(prop_type in text_lower for prop_type in ['apartment', 'flat', 'house', 'villa', 'bungalow']):
149
+ if not fake_detected:
150
+ trust_score += 2
151
+ reasoning_parts.append("Clear property type mentioned")
152
+
153
+ # Amenities assessment - Much stricter
154
+ amenities_count = sum(1 for amenity in ['pool', 'gym', 'garden', 'parking', 'security', 'lift', 'balcony']
155
+ if amenity in text_lower)
156
+ if amenities_count > 0 and not fake_detected:
157
+ trust_score += min(5, amenities_count * 1) # Reduced from 10 to 5
158
+ reasoning_parts.append(f"Property has {amenities_count} amenities mentioned")
159
+
160
+ # CRITICAL: Additional fake data checks
161
+ # Check if all major fields are just numbers
162
+ numeric_fields = ['property_name', 'bedrooms', 'bathrooms', 'sq_ft', 'market_value']
163
+ numeric_count = 0
164
+ for field in numeric_fields:
165
+ if field in text_lower and re.search(r'\b\d{1,2}\b', text_lower):
166
+ numeric_count += 1
167
+
168
+ if numeric_count >= 3: # If 3+ fields are just numbers
169
+ fake_detected = True
170
+ trust_score -= 60 # Extremely heavy penalty
171
+ reasoning_parts.append("Multiple fields contain only numbers (highly suspicious)")
172
 
173
  # Ensure score is within bounds
174
  trust_score = max(0, min(100, trust_score))
 
183
 
184
  except Exception as e:
185
  logger.error(f"Error in trust score generation: {str(e)}")
186
+ return 10.0, f"Trust analysis failed: {str(e)}" # Reduced from 50.0 to 10.0