sksameermujahid commited on
Commit
7ac74a0
·
verified ·
1 Parent(s): 01dfef8

Upload 21 files

Browse files
models/address_verification.py CHANGED
@@ -139,7 +139,9 @@ def verify_address(data):
139
  elif city and state:
140
  verification_points = min(1.0, verification_points + 0.05) # 5% bonus
141
 
142
- address_results['verification_score'] = verification_points * 100 # Convert to percentage
 
 
143
 
144
  return address_results
145
  except Exception as e:
 
139
  elif city and state:
140
  verification_points = min(1.0, verification_points + 0.05) # 5% bonus
141
 
142
+ # Ensure verification score is properly capped at 100%
143
+ verification_score = min(100.0, verification_points * 100) # Convert to percentage and cap at 100%
144
+ address_results['verification_score'] = verification_score
145
 
146
  return address_results
147
  except Exception as e:
models/cross_validation.py CHANGED
@@ -539,16 +539,25 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
539
  'recommendation': 'Provide a valid property name (not just numbers)'
540
  })
541
 
542
- # Property type validation
543
  property_type = data.get('property_type', '').strip()
544
- if not property_type:
545
  analysis_sections['basic_info'].append({
546
  'check': 'property_type',
547
- 'status': 'missing',
548
- 'message': 'Property type is required.',
549
- 'details': 'Please specify the property type.',
550
  'severity': 'high' if fake_data_detected else 'medium',
551
- 'recommendation': 'Specify property type (apartment, house, etc.)'
 
 
 
 
 
 
 
 
 
552
  })
553
 
554
  # Status validation
@@ -615,8 +624,9 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
615
  bedrooms = safe_int_convert(data.get('bedrooms', 0))
616
  bathrooms = safe_float_convert(data.get('bathrooms', 0))
617
  year_built = safe_int_convert(data.get('year_built', 0))
 
618
 
619
- # Much more lenient validation ranges
620
  if bedrooms < 0 or bedrooms > 50: # Increased range from 20 to 50
621
  analysis_sections['specifications'].append({
622
  'check': 'bedrooms',
@@ -626,6 +636,15 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
626
  'severity': 'high' if bedrooms < 0 else 'medium',
627
  'recommendation': 'Provide realistic bedroom count'
628
  })
 
 
 
 
 
 
 
 
 
629
 
630
  if bathrooms < 0 or bathrooms > 30: # Increased range from 15 to 30
631
  analysis_sections['specifications'].append({
@@ -636,6 +655,15 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
636
  'severity': 'high' if bathrooms < 0 else 'medium',
637
  'recommendation': 'Provide realistic bathroom count'
638
  })
 
 
 
 
 
 
 
 
 
639
 
640
  current_year = datetime.now().year
641
  if year_built > current_year + 5 or year_built < 1800: # More lenient future year
@@ -648,7 +676,7 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
648
  'recommendation': 'Provide realistic year built'
649
  })
650
 
651
- # Pricing validation - Handle flat data structure - Much more lenient
652
  if market_value <= 0:
653
  analysis_sections['pricing'].append({
654
  'check': 'market_value',
@@ -658,15 +686,56 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
658
  'severity': 'high',
659
  'recommendation': 'Provide property market value'
660
  })
661
- elif market_value < 10000: # Much more lenient minimum price
662
- analysis_sections['pricing'].append({
663
- 'check': 'market_value',
664
- 'status': 'fraudulent' if market_value < 1000 else 'suspicious',
665
- 'message': 'Unusually low market value.',
666
- 'details': f'Market value: ₹{market_value:,.0f}',
667
- 'severity': 'high' if market_value < 1000 else 'medium',
668
- 'recommendation': 'Verify market value is accurate'
669
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
670
 
671
  # Description validation - Much more lenient
672
  description = data.get('description', '').strip()
 
539
  'recommendation': 'Provide a valid property name (not just numbers)'
540
  })
541
 
542
+ # Property type validation - Much stricter
543
  property_type = data.get('property_type', '').strip()
544
+ if not property_type or property_type.lower() in ['unknown', 'none', 'null', '']:
545
  analysis_sections['basic_info'].append({
546
  'check': 'property_type',
547
+ 'status': 'suspicious', # Changed from 'missing' to 'suspicious'
548
+ 'message': 'Property type is unclear or missing.',
549
+ 'details': f'Property type: {property_type}',
550
  'severity': 'high' if fake_data_detected else 'medium',
551
+ 'recommendation': 'Specify clear property type (apartment, house, villa, etc.)'
552
+ })
553
+ elif property_type.lower() in ['unknown', 'none', 'null']:
554
+ analysis_sections['basic_info'].append({
555
+ 'check': 'property_type',
556
+ 'status': 'suspicious',
557
+ 'message': 'Property type is marked as unknown.',
558
+ 'details': f'Property type: {property_type}',
559
+ 'severity': 'medium',
560
+ 'recommendation': 'Provide a specific property type instead of "unknown"'
561
  })
562
 
563
  # Status validation
 
624
  bedrooms = safe_int_convert(data.get('bedrooms', 0))
625
  bathrooms = safe_float_convert(data.get('bathrooms', 0))
626
  year_built = safe_int_convert(data.get('year_built', 0))
627
+ square_feet = safe_float_convert(data.get('sq_ft', 0))
628
 
629
+ # Much more lenient validation ranges, but stricter for 0 values
630
  if bedrooms < 0 or bedrooms > 50: # Increased range from 20 to 50
631
  analysis_sections['specifications'].append({
632
  'check': 'bedrooms',
 
636
  'severity': 'high' if bedrooms < 0 else 'medium',
637
  'recommendation': 'Provide realistic bedroom count'
638
  })
639
+ elif bedrooms == 0 and square_feet > 200: # Suspicious if 0 bedrooms but large property
640
+ analysis_sections['specifications'].append({
641
+ 'check': 'bedrooms',
642
+ 'status': 'suspicious',
643
+ 'message': 'No bedrooms specified for a large property.',
644
+ 'details': f'Bedrooms: {bedrooms}, Square feet: {square_feet}',
645
+ 'severity': 'medium',
646
+ 'recommendation': 'Specify bedroom count for this property size'
647
+ })
648
 
649
  if bathrooms < 0 or bathrooms > 30: # Increased range from 15 to 30
650
  analysis_sections['specifications'].append({
 
655
  'severity': 'high' if bathrooms < 0 else 'medium',
656
  'recommendation': 'Provide realistic bathroom count'
657
  })
658
+ elif bathrooms == 0 and square_feet > 100: # Suspicious if 0 bathrooms but significant property
659
+ analysis_sections['specifications'].append({
660
+ 'check': 'bathrooms',
661
+ 'status': 'suspicious',
662
+ 'message': 'No bathrooms specified for this property size.',
663
+ 'details': f'Bathrooms: {bathrooms}, Square feet: {square_feet}',
664
+ 'severity': 'medium',
665
+ 'recommendation': 'Specify bathroom count for this property size'
666
+ })
667
 
668
  current_year = datetime.now().year
669
  if year_built > current_year + 5 or year_built < 1800: # More lenient future year
 
676
  'recommendation': 'Provide realistic year built'
677
  })
678
 
679
+ # Pricing validation - Handle flat data structure - Much more lenient and context-aware
680
  if market_value <= 0:
681
  analysis_sections['pricing'].append({
682
  'check': 'market_value',
 
686
  'severity': 'high',
687
  'recommendation': 'Provide property market value'
688
  })
689
+ else:
690
+ # Context-aware pricing validation
691
+ square_feet = safe_float_convert(data.get('sq_ft', 0))
692
+ property_type = data.get('property_type', '').lower()
693
+ is_rental = data.get('is_rental', False)
694
+
695
+ # Calculate price per sq ft
696
+ price_per_sqft = market_value / square_feet if square_feet > 0 else 0
697
+
698
+ # Different thresholds based on property type and rental status
699
+ if is_rental:
700
+ # Rental properties - monthly rates
701
+ if price_per_sqft < 5: # Very low rental rate
702
+ analysis_sections['pricing'].append({
703
+ 'check': 'market_value',
704
+ 'status': 'suspicious',
705
+ 'message': 'Unusually low rental rate.',
706
+ 'details': f'Rental rate: ₹{price_per_sqft:.2f}/sq ft/month',
707
+ 'severity': 'medium',
708
+ 'recommendation': 'Verify rental rate is accurate'
709
+ })
710
+ elif price_per_sqft > 100: # Very high rental rate
711
+ analysis_sections['pricing'].append({
712
+ 'check': 'market_value',
713
+ 'status': 'suspicious',
714
+ 'message': 'Unusually high rental rate.',
715
+ 'details': f'Rental rate: ₹{price_per_sqft:.2f}/sq ft/month',
716
+ 'severity': 'medium',
717
+ 'recommendation': 'Verify rental rate is accurate'
718
+ })
719
+ else:
720
+ # Purchase properties
721
+ if price_per_sqft < 1000: # Very low purchase price
722
+ analysis_sections['pricing'].append({
723
+ 'check': 'market_value',
724
+ 'status': 'suspicious',
725
+ 'message': 'Unusually low purchase price.',
726
+ 'details': f'Price per sq ft: ₹{price_per_sqft:.2f}',
727
+ 'severity': 'medium',
728
+ 'recommendation': 'Verify purchase price is accurate'
729
+ })
730
+ elif price_per_sqft > 50000: # Very high purchase price
731
+ analysis_sections['pricing'].append({
732
+ 'check': 'market_value',
733
+ 'status': 'suspicious',
734
+ 'message': 'Unusually high purchase price.',
735
+ 'details': f'Price per sq ft: ₹{price_per_sqft:.2f}',
736
+ 'severity': 'medium',
737
+ 'recommendation': 'Verify purchase price is accurate'
738
+ })
739
 
740
  # Description validation - Much more lenient
741
  description = data.get('description', '').strip()
models/text_quality.py CHANGED
@@ -35,19 +35,19 @@ def assess_text_quality(text):
35
  classifier = load_model("zero-shot-classification") # Use standard model instead of typeform
36
  except Exception as e:
37
  logger.error(f"Error loading model in text quality: {str(e)}")
38
- # Fallback scoring for when model fails
39
  text_length = len(text)
40
  if text_length > 200:
41
- fallback_score = 60
42
  assessment = 'good'
43
  elif text_length > 100:
44
- fallback_score = 40
45
  assessment = 'adequate'
46
  elif text_length > 50:
47
- fallback_score = 25
48
  assessment = 'basic'
49
  else:
50
- fallback_score = 15
51
  assessment = 'basic'
52
 
53
  return {
@@ -100,16 +100,16 @@ def assess_text_quality(text):
100
 
101
  # Calculate final score (0-100) with better handling of edge cases
102
  base_score = (positive_score - negative_score + 1) * 50
103
- quality_score = max(10, min(100, int(base_score))) # Ensure minimum 10% score
104
 
105
- # Determine assessment
106
- if quality_score >= 80:
107
  assessment = 'excellent'
108
- elif quality_score >= 60:
109
  assessment = 'good'
110
- elif quality_score >= 40:
111
  assessment = 'adequate'
112
- elif quality_score >= 20:
113
  assessment = 'basic'
114
  else:
115
  assessment = 'basic' # Changed from 'very poor' to 'basic'
@@ -138,9 +138,9 @@ def assess_text_quality(text):
138
 
139
  except Exception as e:
140
  logger.error(f"Error in text quality assessment: {str(e)}")
141
- # Return reasonable fallback instead of 0
142
  text_length = len(str(text)) if text else 0
143
- fallback_score = max(10, min(50, text_length // 2)) # Basic scoring based on length
144
 
145
  return {
146
  'assessment': 'basic',
 
35
  classifier = load_model("zero-shot-classification") # Use standard model instead of typeform
36
  except Exception as e:
37
  logger.error(f"Error loading model in text quality: {str(e)}")
38
+ # Much more lenient fallback scoring for when model fails
39
  text_length = len(text)
40
  if text_length > 200:
41
+ fallback_score = 70 # Increased from 60
42
  assessment = 'good'
43
  elif text_length > 100:
44
+ fallback_score = 50 # Increased from 40
45
  assessment = 'adequate'
46
  elif text_length > 50:
47
+ fallback_score = 35 # Increased from 25
48
  assessment = 'basic'
49
  else:
50
+ fallback_score = 25 # Increased from 15
51
  assessment = 'basic'
52
 
53
  return {
 
100
 
101
  # Calculate final score (0-100) with better handling of edge cases
102
  base_score = (positive_score - negative_score + 1) * 50
103
+ quality_score = max(20, min(100, int(base_score))) # Increased minimum from 10% to 20%
104
 
105
+ # Much more lenient assessment thresholds
106
+ if quality_score >= 70: # Reduced from 80
107
  assessment = 'excellent'
108
+ elif quality_score >= 50: # Reduced from 60
109
  assessment = 'good'
110
+ elif quality_score >= 30: # Reduced from 40
111
  assessment = 'adequate'
112
+ elif quality_score >= 20: # Reduced from 20
113
  assessment = 'basic'
114
  else:
115
  assessment = 'basic' # Changed from 'very poor' to 'basic'
 
138
 
139
  except Exception as e:
140
  logger.error(f"Error in text quality assessment: {str(e)}")
141
+ # Return much more reasonable fallback instead of 0
142
  text_length = len(str(text)) if text else 0
143
+ fallback_score = max(25, min(60, text_length // 2 + 20)) # Much more lenient scoring based on length
144
 
145
  return {
146
  'assessment': 'basic',