Upload 22 files
Browse files- app.py +19 -0
- models/cross_validation.py +17 -2
- models/parallel_processor.py +1 -0
- models/price_analysis.py +113 -42
app.py
CHANGED
@@ -596,6 +596,11 @@ def verify_property():
|
|
596 |
image_model_used.add(result['model_used'])
|
597 |
if 'parallelization_info' in result:
|
598 |
image_parallel_info.append(result['parallelization_info'])
|
|
|
|
|
|
|
|
|
|
|
599 |
# Process PDFs in parallel
|
600 |
pdf_texts = []
|
601 |
pdf_analysis = []
|
@@ -621,6 +626,10 @@ def verify_property():
|
|
621 |
pdf_analysis.append(result)
|
622 |
if 'parallelization_info' in result:
|
623 |
pdf_parallel_info.append(result['parallelization_info'])
|
|
|
|
|
|
|
|
|
624 |
|
625 |
# Create consolidated text for analysis
|
626 |
consolidated_text = f"""
|
@@ -640,6 +649,16 @@ def verify_property():
|
|
640 |
Legal Details: {data['legal_details']}
|
641 |
"""
|
642 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
643 |
# Process description translation if needed
|
644 |
try:
|
645 |
description = data['description']
|
|
|
596 |
image_model_used.add(result['model_used'])
|
597 |
if 'parallelization_info' in result:
|
598 |
image_parallel_info.append(result['parallelization_info'])
|
599 |
+
|
600 |
+
# Add image count to data for cross-validation
|
601 |
+
data['image_count'] = len(images)
|
602 |
+
data['has_images'] = len(images) > 0
|
603 |
+
|
604 |
# Process PDFs in parallel
|
605 |
pdf_texts = []
|
606 |
pdf_analysis = []
|
|
|
626 |
pdf_analysis.append(result)
|
627 |
if 'parallelization_info' in result:
|
628 |
pdf_parallel_info.append(result['parallelization_info'])
|
629 |
+
|
630 |
+
# Add document count to data for cross-validation
|
631 |
+
data['document_count'] = len(pdf_texts)
|
632 |
+
data['has_documents'] = len(pdf_texts) > 0
|
633 |
|
634 |
# Create consolidated text for analysis
|
635 |
consolidated_text = f"""
|
|
|
649 |
Legal Details: {data['legal_details']}
|
650 |
"""
|
651 |
|
652 |
+
# Detect if this is a rental property
|
653 |
+
is_rental = any(keyword in data['status'].lower() for keyword in ['rent', 'lease', 'let', 'hiring'])
|
654 |
+
if not is_rental:
|
655 |
+
# Check description for rental keywords
|
656 |
+
is_rental = any(keyword in data['description'].lower() for keyword in ['rent', 'lease', 'let', 'hiring', 'monthly', 'per month'])
|
657 |
+
|
658 |
+
# Add rental detection to data
|
659 |
+
data['is_rental'] = is_rental
|
660 |
+
data['property_status'] = 'rental' if is_rental else 'sale'
|
661 |
+
|
662 |
# Process description translation if needed
|
663 |
try:
|
664 |
description = data['description']
|
models/cross_validation.py
CHANGED
@@ -747,7 +747,9 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
747 |
|
748 |
# Document analysis - More lenient
|
749 |
documents = data.get('documents', [])
|
750 |
-
|
|
|
|
|
751 |
if check_files_exist(documents):
|
752 |
# Files exist but couldn't be analyzed
|
753 |
analysis_sections['documents'].append({
|
@@ -790,7 +792,20 @@ def perform_cross_validation(data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
790 |
|
791 |
# Image analysis - More lenient
|
792 |
images = data.get('images', [])
|
793 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
794 |
if check_files_exist(images):
|
795 |
# Files exist but couldn't be analyzed
|
796 |
analysis_sections['documents'].append({
|
|
|
747 |
|
748 |
# Document analysis - More lenient
|
749 |
documents = data.get('documents', [])
|
750 |
+
has_documents = data.get('has_documents', False) or data.get('document_count', 0) > 0
|
751 |
+
|
752 |
+
if media_analysis['total_documents'] == 0 and not has_documents:
|
753 |
if check_files_exist(documents):
|
754 |
# Files exist but couldn't be analyzed
|
755 |
analysis_sections['documents'].append({
|
|
|
792 |
|
793 |
# Image analysis - More lenient
|
794 |
images = data.get('images', [])
|
795 |
+
has_images = data.get('has_images', False) or data.get('image_count', 0) > 0
|
796 |
+
|
797 |
+
# If images were uploaded but media analysis didn't detect them, consider them as valid
|
798 |
+
if has_images and media_analysis['total_images'] == 0:
|
799 |
+
# Images were uploaded but not analyzed by media analysis - this is normal
|
800 |
+
analysis_sections['documents'].append({
|
801 |
+
'check': 'images_validation',
|
802 |
+
'status': 'valid',
|
803 |
+
'message': 'Property images uploaded successfully.',
|
804 |
+
'details': f'{data.get("image_count", 0)} images were uploaded and processed.',
|
805 |
+
'severity': 'low',
|
806 |
+
'recommendation': 'Images are being analyzed.'
|
807 |
+
})
|
808 |
+
elif media_analysis['total_images'] == 0 and not has_images:
|
809 |
if check_files_exist(images):
|
810 |
# Files exist but couldn't be analyzed
|
811 |
analysis_sections['documents'].append({
|
models/parallel_processor.py
CHANGED
@@ -293,6 +293,7 @@ class ParallelProcessor:
|
|
293 |
"""Run price analysis"""
|
294 |
try:
|
295 |
from .price_analysis import analyze_price
|
|
|
296 |
return analyze_price(data, price_context, data.get('latitude'), data.get('longitude'), property_data)
|
297 |
except Exception as e:
|
298 |
logger.error(f"Error in price analysis: {str(e)}")
|
|
|
293 |
"""Run price analysis"""
|
294 |
try:
|
295 |
from .price_analysis import analyze_price
|
296 |
+
# Pass rental information to price analysis
|
297 |
return analyze_price(data, price_context, data.get('latitude'), data.get('longitude'), property_data)
|
298 |
except Exception as e:
|
299 |
logger.error(f"Error in price analysis: {str(e)}")
|
models/price_analysis.py
CHANGED
@@ -537,12 +537,32 @@ def analyze_price(data, context_text=None, latitude=None, longitude=None, proper
|
|
537 |
|
538 |
city = data.get('city', '').strip() or 'Unknown'
|
539 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
540 |
# Calculate price per sq.ft properly
|
541 |
price_per_sqft = price / sq_ft if sq_ft > 0 else price
|
542 |
|
543 |
# Get market data for comparison
|
544 |
market_data = get_hyderabad_market_data()
|
545 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
546 |
|
547 |
# Calculate deviation from market average
|
548 |
if market_avg > 0:
|
@@ -551,52 +571,101 @@ def analyze_price(data, context_text=None, latitude=None, longitude=None, proper
|
|
551 |
deviation = 0
|
552 |
|
553 |
# Determine assessment based on deviation and price reasonableness - Much more lenient
|
554 |
-
if
|
555 |
-
|
556 |
-
|
557 |
-
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
|
567 |
-
|
568 |
-
|
569 |
-
|
570 |
-
|
571 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
572 |
|
573 |
# Generate risk indicators - Much more lenient
|
574 |
risk_indicators = []
|
575 |
-
if
|
576 |
-
|
577 |
-
|
578 |
-
|
579 |
-
|
580 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
581 |
|
582 |
# Price ranges for the city - Much more lenient
|
583 |
-
|
584 |
-
|
585 |
-
'
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
|
590 |
-
'
|
591 |
-
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
-
'
|
596 |
-
|
597 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
598 |
}
|
599 |
-
}
|
600 |
|
601 |
# Determine price range
|
602 |
price_range = "unknown"
|
@@ -616,6 +685,7 @@ def analyze_price(data, context_text=None, latitude=None, longitude=None, proper
|
|
616 |
'location_price_assessment': f"Price analysis: {assessment}",
|
617 |
'has_price': True,
|
618 |
'has_sqft': True,
|
|
|
619 |
'market_trends': {
|
620 |
'trend': market_data.get('market_trend', 'unknown') if market_data else 'unknown',
|
621 |
'growth_rate': market_data.get('growth_rate', 0) if market_data else 0,
|
@@ -626,7 +696,7 @@ def analyze_price(data, context_text=None, latitude=None, longitude=None, proper
|
|
626 |
'price_factors': {
|
627 |
'market_average': market_avg,
|
628 |
'deviation_percentage': deviation,
|
629 |
-
'price_reasonableness': 'suspicious' if price_per_sqft <
|
630 |
},
|
631 |
'risk_indicators': risk_indicators,
|
632 |
'market_average': market_avg,
|
@@ -649,6 +719,7 @@ def analyze_price(data, context_text=None, latitude=None, longitude=None, proper
|
|
649 |
'location_price_assessment': 'unknown',
|
650 |
'has_price': False,
|
651 |
'has_sqft': False,
|
|
|
652 |
'market_trends': {},
|
653 |
'price_factors': {},
|
654 |
'risk_indicators': [],
|
|
|
537 |
|
538 |
city = data.get('city', '').strip() or 'Unknown'
|
539 |
|
540 |
+
# Detect if this is a rental property
|
541 |
+
is_rental = data.get('is_rental', False)
|
542 |
+
if not is_rental:
|
543 |
+
# Check status and description for rental keywords
|
544 |
+
status = data.get('status', '').lower()
|
545 |
+
description = data.get('description', '').lower()
|
546 |
+
is_rental = any(keyword in status for keyword in ['rent', 'lease', 'let', 'hiring']) or \
|
547 |
+
any(keyword in description for keyword in ['rent', 'lease', 'let', 'hiring', 'monthly', 'per month'])
|
548 |
+
|
549 |
# Calculate price per sq.ft properly
|
550 |
price_per_sqft = price / sq_ft if sq_ft > 0 else price
|
551 |
|
552 |
# Get market data for comparison
|
553 |
market_data = get_hyderabad_market_data()
|
554 |
+
|
555 |
+
# Adjust market data based on rental vs purchase
|
556 |
+
if is_rental:
|
557 |
+
# For rental properties, use monthly rental rates
|
558 |
+
market_avg = 25 # ₹25/sq ft/month average for Hyderabad rentals
|
559 |
+
market_min = 15 # ₹15/sq ft/month minimum
|
560 |
+
market_max = 80 # ₹80/sq ft/month maximum
|
561 |
+
else:
|
562 |
+
# For purchase properties, use purchase rates
|
563 |
+
market_avg = market_data.get('avg_price_per_sqft', 8500) if market_data else 8500
|
564 |
+
market_min = market_data.get('min_price_per_sqft', 4500) if market_data else 4500
|
565 |
+
market_max = market_data.get('max_price_per_sqft', 25000) if market_data else 25000
|
566 |
|
567 |
# Calculate deviation from market average
|
568 |
if market_avg > 0:
|
|
|
571 |
deviation = 0
|
572 |
|
573 |
# Determine assessment based on deviation and price reasonableness - Much more lenient
|
574 |
+
if is_rental:
|
575 |
+
# Rental property pricing logic
|
576 |
+
if price_per_sqft < 5: # Extremely low rental price
|
577 |
+
assessment = "suspicious_pricing"
|
578 |
+
confidence = 0.3 # Increased from 0.2
|
579 |
+
elif price_per_sqft < market_avg * 0.3: # Very below market rental
|
580 |
+
assessment = "below_market"
|
581 |
+
confidence = 0.5 # Increased from 0.4
|
582 |
+
elif price_per_sqft < market_avg * 0.7: # Below market rental
|
583 |
+
assessment = "below_market"
|
584 |
+
confidence = 0.8 # Increased from 0.7
|
585 |
+
elif price_per_sqft <= market_avg * 1.5: # Market rate rental
|
586 |
+
assessment = "market_rate"
|
587 |
+
confidence = 0.9 # Increased from 0.8
|
588 |
+
elif price_per_sqft <= market_avg * 2.0: # Above market rental
|
589 |
+
assessment = "above_market"
|
590 |
+
confidence = 0.8 # Increased from 0.7
|
591 |
+
else: # Very above market rental
|
592 |
+
assessment = "premium_pricing"
|
593 |
+
confidence = 0.6 # Increased from 0.5
|
594 |
+
else:
|
595 |
+
# Purchase property pricing logic (existing logic)
|
596 |
+
if price_per_sqft < 50: # Extremely low price - increased from 100
|
597 |
+
assessment = "suspicious_pricing"
|
598 |
+
confidence = 0.2 # Increased from 0.1
|
599 |
+
elif price_per_sqft < market_avg * 0.2: # Very below market - reduced from 0.3
|
600 |
+
assessment = "below_market"
|
601 |
+
confidence = 0.4 # Increased from 0.3
|
602 |
+
elif price_per_sqft < market_avg * 0.6: # Below market - reduced from 0.7
|
603 |
+
assessment = "below_market"
|
604 |
+
confidence = 0.7 # Increased from 0.6
|
605 |
+
elif price_per_sqft <= market_avg * 1.5: # Market rate - increased from 1.3
|
606 |
+
assessment = "market_rate"
|
607 |
+
confidence = 0.9 # Increased from 0.8
|
608 |
+
elif price_per_sqft <= market_avg * 2.5: # Above market - increased from 2.0
|
609 |
+
assessment = "above_market"
|
610 |
+
confidence = 0.8 # Increased from 0.7
|
611 |
+
else: # Very above market
|
612 |
+
assessment = "premium_pricing"
|
613 |
+
confidence = 0.6 # Increased from 0.5
|
614 |
|
615 |
# Generate risk indicators - Much more lenient
|
616 |
risk_indicators = []
|
617 |
+
if is_rental:
|
618 |
+
if price_per_sqft < 5: # Increased from 100
|
619 |
+
risk_indicators.append("⚠️ Property priced extremely low (suspicious)")
|
620 |
+
elif price_per_sqft < market_avg * 0.3: # Reduced from 0.3
|
621 |
+
risk_indicators.append("⚠️ Property priced significantly below market average")
|
622 |
+
elif price_per_sqft > market_avg * 2.0: # Increased from 2.0
|
623 |
+
risk_indicators.append("⚠️ Property priced significantly above market average")
|
624 |
+
else:
|
625 |
+
if price_per_sqft < 50: # Increased from 100
|
626 |
+
risk_indicators.append("⚠️ Property priced extremely low (suspicious)")
|
627 |
+
elif price_per_sqft < market_avg * 0.2: # Reduced from 0.3
|
628 |
+
risk_indicators.append("⚠️ Property priced significantly below market average")
|
629 |
+
elif price_per_sqft > market_avg * 2.5: # Increased from 2.0
|
630 |
+
risk_indicators.append("⚠️ Property priced significantly above market average")
|
631 |
|
632 |
# Price ranges for the city - Much more lenient
|
633 |
+
if is_rental:
|
634 |
+
price_ranges = {
|
635 |
+
'budget': {
|
636 |
+
'min': market_avg * 0.4, # Reduced from 0.5
|
637 |
+
'max': market_avg * 0.8,
|
638 |
+
'description': f'Budget rental properties in {city}'
|
639 |
+
},
|
640 |
+
'mid_range': {
|
641 |
+
'min': market_avg * 0.8,
|
642 |
+
'max': market_avg * 1.4, # Increased from 1.2
|
643 |
+
'description': f'Mid-range rental properties in {city}'
|
644 |
+
},
|
645 |
+
'premium': {
|
646 |
+
'min': market_avg * 1.4, # Reduced from 1.2
|
647 |
+
'max': market_avg * 2.5, # Increased from 2.0
|
648 |
+
'description': f'Premium rental properties in {city}'
|
649 |
+
}
|
650 |
+
}
|
651 |
+
else:
|
652 |
+
price_ranges = {
|
653 |
+
'budget': {
|
654 |
+
'min': market_avg * 0.3, # Reduced from 0.5
|
655 |
+
'max': market_avg * 0.8,
|
656 |
+
'description': f'Budget properties in {city}'
|
657 |
+
},
|
658 |
+
'mid_range': {
|
659 |
+
'min': market_avg * 0.8,
|
660 |
+
'max': market_avg * 1.4, # Increased from 1.2
|
661 |
+
'description': f'Mid-range properties in {city}'
|
662 |
+
},
|
663 |
+
'premium': {
|
664 |
+
'min': market_avg * 1.4, # Reduced from 1.2
|
665 |
+
'max': market_avg * 2.5, # Increased from 2.0
|
666 |
+
'description': f'Premium properties in {city}'
|
667 |
+
}
|
668 |
}
|
|
|
669 |
|
670 |
# Determine price range
|
671 |
price_range = "unknown"
|
|
|
685 |
'location_price_assessment': f"Price analysis: {assessment}",
|
686 |
'has_price': True,
|
687 |
'has_sqft': True,
|
688 |
+
'is_rental': is_rental,
|
689 |
'market_trends': {
|
690 |
'trend': market_data.get('market_trend', 'unknown') if market_data else 'unknown',
|
691 |
'growth_rate': market_data.get('growth_rate', 0) if market_data else 0,
|
|
|
696 |
'price_factors': {
|
697 |
'market_average': market_avg,
|
698 |
'deviation_percentage': deviation,
|
699 |
+
'price_reasonableness': 'suspicious' if price_per_sqft < (5 if is_rental else 50) else 'reasonable'
|
700 |
},
|
701 |
'risk_indicators': risk_indicators,
|
702 |
'market_average': market_avg,
|
|
|
719 |
'location_price_assessment': 'unknown',
|
720 |
'has_price': False,
|
721 |
'has_sqft': False,
|
722 |
+
'is_rental': False,
|
723 |
'market_trends': {},
|
724 |
'price_factors': {},
|
725 |
'risk_indicators': [],
|