Upload 21 files
Browse files- models/address_verification.py +3 -1
- models/cross_validation.py +86 -17
- models/text_quality.py +13 -13
models/address_verification.py
CHANGED
|
@@ -139,7 +139,9 @@ def verify_address(data):
|
|
| 139 |
elif city and state:
|
| 140 |
verification_points = min(1.0, verification_points + 0.05) # 5% bonus
|
| 141 |
|
| 142 |
-
|
|
|
|
|
|
|
| 143 |
|
| 144 |
return address_results
|
| 145 |
except Exception as e:
|
|
|
|
| 139 |
elif city and state:
|
| 140 |
verification_points = min(1.0, verification_points + 0.05) # 5% bonus
|
| 141 |
|
| 142 |
+
# Ensure verification score is properly capped at 100%
|
| 143 |
+
verification_score = min(100.0, verification_points * 100) # Convert to percentage and cap at 100%
|
| 144 |
+
address_results['verification_score'] = verification_score
|
| 145 |
|
| 146 |
return address_results
|
| 147 |
except Exception as e:
|
models/cross_validation.py
CHANGED
|
@@ -539,16 +539,25 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
| 539 |
'recommendation': 'Provide a valid property name (not just numbers)'
|
| 540 |
})
|
| 541 |
|
| 542 |
-
# Property type validation
|
| 543 |
property_type = data.get('property_type', '').strip()
|
| 544 |
-
if not property_type:
|
| 545 |
analysis_sections['basic_info'].append({
|
| 546 |
'check': 'property_type',
|
| 547 |
-
'status': 'missing'
|
| 548 |
-
'message': 'Property type is
|
| 549 |
-
'details': '
|
| 550 |
'severity': 'high' if fake_data_detected else 'medium',
|
| 551 |
-
'recommendation': 'Specify property type (apartment, house, etc.)'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 552 |
})
|
| 553 |
|
| 554 |
# Status validation
|
|
@@ -615,8 +624,9 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
| 615 |
bedrooms = safe_int_convert(data.get('bedrooms', 0))
|
| 616 |
bathrooms = safe_float_convert(data.get('bathrooms', 0))
|
| 617 |
year_built = safe_int_convert(data.get('year_built', 0))
|
|
|
|
| 618 |
|
| 619 |
-
# Much more lenient validation ranges
|
| 620 |
if bedrooms < 0 or bedrooms > 50: # Increased range from 20 to 50
|
| 621 |
analysis_sections['specifications'].append({
|
| 622 |
'check': 'bedrooms',
|
|
@@ -626,6 +636,15 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
| 626 |
'severity': 'high' if bedrooms < 0 else 'medium',
|
| 627 |
'recommendation': 'Provide realistic bedroom count'
|
| 628 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 629 |
|
| 630 |
if bathrooms < 0 or bathrooms > 30: # Increased range from 15 to 30
|
| 631 |
analysis_sections['specifications'].append({
|
|
@@ -636,6 +655,15 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
| 636 |
'severity': 'high' if bathrooms < 0 else 'medium',
|
| 637 |
'recommendation': 'Provide realistic bathroom count'
|
| 638 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 639 |
|
| 640 |
current_year = datetime.now().year
|
| 641 |
if year_built > current_year + 5 or year_built < 1800: # More lenient future year
|
|
@@ -648,7 +676,7 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
| 648 |
'recommendation': 'Provide realistic year built'
|
| 649 |
})
|
| 650 |
|
| 651 |
-
# Pricing validation - Handle flat data structure - Much more lenient
|
| 652 |
if market_value <= 0:
|
| 653 |
analysis_sections['pricing'].append({
|
| 654 |
'check': 'market_value',
|
|
@@ -658,15 +686,56 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
| 658 |
'severity': 'high',
|
| 659 |
'recommendation': 'Provide property market value'
|
| 660 |
})
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 670 |
|
| 671 |
# Description validation - Much more lenient
|
| 672 |
description = data.get('description', '').strip()
|
|
|
|
| 539 |
'recommendation': 'Provide a valid property name (not just numbers)'
|
| 540 |
})
|
| 541 |
|
| 542 |
+
# Property type validation - Much stricter
|
| 543 |
property_type = data.get('property_type', '').strip()
|
| 544 |
+
if not property_type or property_type.lower() in ['unknown', 'none', 'null', '']:
|
| 545 |
analysis_sections['basic_info'].append({
|
| 546 |
'check': 'property_type',
|
| 547 |
+
'status': 'suspicious', # Changed from 'missing' to 'suspicious'
|
| 548 |
+
'message': 'Property type is unclear or missing.',
|
| 549 |
+
'details': f'Property type: {property_type}',
|
| 550 |
'severity': 'high' if fake_data_detected else 'medium',
|
| 551 |
+
'recommendation': 'Specify clear property type (apartment, house, villa, etc.)'
|
| 552 |
+
})
|
| 553 |
+
elif property_type.lower() in ['unknown', 'none', 'null']:
|
| 554 |
+
analysis_sections['basic_info'].append({
|
| 555 |
+
'check': 'property_type',
|
| 556 |
+
'status': 'suspicious',
|
| 557 |
+
'message': 'Property type is marked as unknown.',
|
| 558 |
+
'details': f'Property type: {property_type}',
|
| 559 |
+
'severity': 'medium',
|
| 560 |
+
'recommendation': 'Provide a specific property type instead of "unknown"'
|
| 561 |
})
|
| 562 |
|
| 563 |
# Status validation
|
|
|
|
| 624 |
bedrooms = safe_int_convert(data.get('bedrooms', 0))
|
| 625 |
bathrooms = safe_float_convert(data.get('bathrooms', 0))
|
| 626 |
year_built = safe_int_convert(data.get('year_built', 0))
|
| 627 |
+
square_feet = safe_float_convert(data.get('sq_ft', 0))
|
| 628 |
|
| 629 |
+
# Much more lenient validation ranges, but stricter for 0 values
|
| 630 |
if bedrooms < 0 or bedrooms > 50: # Increased range from 20 to 50
|
| 631 |
analysis_sections['specifications'].append({
|
| 632 |
'check': 'bedrooms',
|
|
|
|
| 636 |
'severity': 'high' if bedrooms < 0 else 'medium',
|
| 637 |
'recommendation': 'Provide realistic bedroom count'
|
| 638 |
})
|
| 639 |
+
elif bedrooms == 0 and square_feet > 200: # Suspicious if 0 bedrooms but large property
|
| 640 |
+
analysis_sections['specifications'].append({
|
| 641 |
+
'check': 'bedrooms',
|
| 642 |
+
'status': 'suspicious',
|
| 643 |
+
'message': 'No bedrooms specified for a large property.',
|
| 644 |
+
'details': f'Bedrooms: {bedrooms}, Square feet: {square_feet}',
|
| 645 |
+
'severity': 'medium',
|
| 646 |
+
'recommendation': 'Specify bedroom count for this property size'
|
| 647 |
+
})
|
| 648 |
|
| 649 |
if bathrooms < 0 or bathrooms > 30: # Increased range from 15 to 30
|
| 650 |
analysis_sections['specifications'].append({
|
|
|
|
| 655 |
'severity': 'high' if bathrooms < 0 else 'medium',
|
| 656 |
'recommendation': 'Provide realistic bathroom count'
|
| 657 |
})
|
| 658 |
+
elif bathrooms == 0 and square_feet > 100: # Suspicious if 0 bathrooms but significant property
|
| 659 |
+
analysis_sections['specifications'].append({
|
| 660 |
+
'check': 'bathrooms',
|
| 661 |
+
'status': 'suspicious',
|
| 662 |
+
'message': 'No bathrooms specified for this property size.',
|
| 663 |
+
'details': f'Bathrooms: {bathrooms}, Square feet: {square_feet}',
|
| 664 |
+
'severity': 'medium',
|
| 665 |
+
'recommendation': 'Specify bathroom count for this property size'
|
| 666 |
+
})
|
| 667 |
|
| 668 |
current_year = datetime.now().year
|
| 669 |
if year_built > current_year + 5 or year_built < 1800: # More lenient future year
|
|
|
|
| 676 |
'recommendation': 'Provide realistic year built'
|
| 677 |
})
|
| 678 |
|
| 679 |
+
# Pricing validation - Handle flat data structure - Much more lenient and context-aware
|
| 680 |
if market_value <= 0:
|
| 681 |
analysis_sections['pricing'].append({
|
| 682 |
'check': 'market_value',
|
|
|
|
| 686 |
'severity': 'high',
|
| 687 |
'recommendation': 'Provide property market value'
|
| 688 |
})
|
| 689 |
+
else:
|
| 690 |
+
# Context-aware pricing validation
|
| 691 |
+
square_feet = safe_float_convert(data.get('sq_ft', 0))
|
| 692 |
+
property_type = data.get('property_type', '').lower()
|
| 693 |
+
is_rental = data.get('is_rental', False)
|
| 694 |
+
|
| 695 |
+
# Calculate price per sq ft
|
| 696 |
+
price_per_sqft = market_value / square_feet if square_feet > 0 else 0
|
| 697 |
+
|
| 698 |
+
# Different thresholds based on property type and rental status
|
| 699 |
+
if is_rental:
|
| 700 |
+
# Rental properties - monthly rates
|
| 701 |
+
if price_per_sqft < 5: # Very low rental rate
|
| 702 |
+
analysis_sections['pricing'].append({
|
| 703 |
+
'check': 'market_value',
|
| 704 |
+
'status': 'suspicious',
|
| 705 |
+
'message': 'Unusually low rental rate.',
|
| 706 |
+
'details': f'Rental rate: ₹{price_per_sqft:.2f}/sq ft/month',
|
| 707 |
+
'severity': 'medium',
|
| 708 |
+
'recommendation': 'Verify rental rate is accurate'
|
| 709 |
+
})
|
| 710 |
+
elif price_per_sqft > 100: # Very high rental rate
|
| 711 |
+
analysis_sections['pricing'].append({
|
| 712 |
+
'check': 'market_value',
|
| 713 |
+
'status': 'suspicious',
|
| 714 |
+
'message': 'Unusually high rental rate.',
|
| 715 |
+
'details': f'Rental rate: ₹{price_per_sqft:.2f}/sq ft/month',
|
| 716 |
+
'severity': 'medium',
|
| 717 |
+
'recommendation': 'Verify rental rate is accurate'
|
| 718 |
+
})
|
| 719 |
+
else:
|
| 720 |
+
# Purchase properties
|
| 721 |
+
if price_per_sqft < 1000: # Very low purchase price
|
| 722 |
+
analysis_sections['pricing'].append({
|
| 723 |
+
'check': 'market_value',
|
| 724 |
+
'status': 'suspicious',
|
| 725 |
+
'message': 'Unusually low purchase price.',
|
| 726 |
+
'details': f'Price per sq ft: ₹{price_per_sqft:.2f}',
|
| 727 |
+
'severity': 'medium',
|
| 728 |
+
'recommendation': 'Verify purchase price is accurate'
|
| 729 |
+
})
|
| 730 |
+
elif price_per_sqft > 50000: # Very high purchase price
|
| 731 |
+
analysis_sections['pricing'].append({
|
| 732 |
+
'check': 'market_value',
|
| 733 |
+
'status': 'suspicious',
|
| 734 |
+
'message': 'Unusually high purchase price.',
|
| 735 |
+
'details': f'Price per sq ft: ₹{price_per_sqft:.2f}',
|
| 736 |
+
'severity': 'medium',
|
| 737 |
+
'recommendation': 'Verify purchase price is accurate'
|
| 738 |
+
})
|
| 739 |
|
| 740 |
# Description validation - Much more lenient
|
| 741 |
description = data.get('description', '').strip()
|
models/text_quality.py
CHANGED
|
@@ -35,19 +35,19 @@ def assess_text_quality(text):
|
|
| 35 |
classifier = load_model("zero-shot-classification") # Use standard model instead of typeform
|
| 36 |
except Exception as e:
|
| 37 |
logger.error(f"Error loading model in text quality: {str(e)}")
|
| 38 |
-
#
|
| 39 |
text_length = len(text)
|
| 40 |
if text_length > 200:
|
| 41 |
-
fallback_score = 60
|
| 42 |
assessment = 'good'
|
| 43 |
elif text_length > 100:
|
| 44 |
-
fallback_score = 40
|
| 45 |
assessment = 'adequate'
|
| 46 |
elif text_length > 50:
|
| 47 |
-
fallback_score = 25
|
| 48 |
assessment = 'basic'
|
| 49 |
else:
|
| 50 |
-
fallback_score = 15
|
| 51 |
assessment = 'basic'
|
| 52 |
|
| 53 |
return {
|
|
@@ -100,16 +100,16 @@ def assess_text_quality(text):
|
|
| 100 |
|
| 101 |
# Calculate final score (0-100) with better handling of edge cases
|
| 102 |
base_score = (positive_score - negative_score + 1) * 50
|
| 103 |
-
quality_score = max(
|
| 104 |
|
| 105 |
-
#
|
| 106 |
-
if quality_score >= 80
|
| 107 |
assessment = 'excellent'
|
| 108 |
-
elif quality_score >= 60
|
| 109 |
assessment = 'good'
|
| 110 |
-
elif quality_score >= 40
|
| 111 |
assessment = 'adequate'
|
| 112 |
-
elif quality_score >= 20:
|
| 113 |
assessment = 'basic'
|
| 114 |
else:
|
| 115 |
assessment = 'basic' # Changed from 'very poor' to 'basic'
|
|
@@ -138,9 +138,9 @@ def assess_text_quality(text):
|
|
| 138 |
|
| 139 |
except Exception as e:
|
| 140 |
logger.error(f"Error in text quality assessment: {str(e)}")
|
| 141 |
-
# Return reasonable fallback instead of 0
|
| 142 |
text_length = len(str(text)) if text else 0
|
| 143 |
-
fallback_score = max(
|
| 144 |
|
| 145 |
return {
|
| 146 |
'assessment': 'basic',
|
|
|
|
| 35 |
classifier = load_model("zero-shot-classification") # Use standard model instead of typeform
|
| 36 |
except Exception as e:
|
| 37 |
logger.error(f"Error loading model in text quality: {str(e)}")
|
| 38 |
+
# Much more lenient fallback scoring for when model fails
|
| 39 |
text_length = len(text)
|
| 40 |
if text_length > 200:
|
| 41 |
+
fallback_score = 70 # Increased from 60
|
| 42 |
assessment = 'good'
|
| 43 |
elif text_length > 100:
|
| 44 |
+
fallback_score = 50 # Increased from 40
|
| 45 |
assessment = 'adequate'
|
| 46 |
elif text_length > 50:
|
| 47 |
+
fallback_score = 35 # Increased from 25
|
| 48 |
assessment = 'basic'
|
| 49 |
else:
|
| 50 |
+
fallback_score = 25 # Increased from 15
|
| 51 |
assessment = 'basic'
|
| 52 |
|
| 53 |
return {
|
|
|
|
| 100 |
|
| 101 |
# Calculate final score (0-100) with better handling of edge cases
|
| 102 |
base_score = (positive_score - negative_score + 1) * 50
|
| 103 |
+
quality_score = max(20, min(100, int(base_score))) # Increased minimum from 10% to 20%
|
| 104 |
|
| 105 |
+
# Much more lenient assessment thresholds
|
| 106 |
+
if quality_score >= 70: # Reduced from 80
|
| 107 |
assessment = 'excellent'
|
| 108 |
+
elif quality_score >= 50: # Reduced from 60
|
| 109 |
assessment = 'good'
|
| 110 |
+
elif quality_score >= 30: # Reduced from 40
|
| 111 |
assessment = 'adequate'
|
| 112 |
+
elif quality_score >= 20: # Reduced from 20
|
| 113 |
assessment = 'basic'
|
| 114 |
else:
|
| 115 |
assessment = 'basic' # Changed from 'very poor' to 'basic'
|
|
|
|
| 138 |
|
| 139 |
except Exception as e:
|
| 140 |
logger.error(f"Error in text quality assessment: {str(e)}")
|
| 141 |
+
# Return much more reasonable fallback instead of 0
|
| 142 |
text_length = len(str(text)) if text else 0
|
| 143 |
+
fallback_score = max(25, min(60, text_length // 2 + 20)) # Much more lenient scoring based on length
|
| 144 |
|
| 145 |
return {
|
| 146 |
'assessment': 'basic',
|