Spaces:

sksameermujahid
/

propertyverification

Sleeping

App Files Files Community

propertyverification / models /trust_score.py

sksameermujahid

Upload 22 files

ebb3d5e verified 8 days ago

raw

history blame contribute delete

9.59 kB

	# models/trust_score.py

	from .model_loader import load_model
	from .logging_config import logger
	import re

	def generate_trust_score(text, image_analysis, pdf_analysis):
	try:
	# Start with a much higher base score for legitimate properties
	trust_score = 50.0 # Increased from 30.0 to 50.0 to give more reasonable starting point
	reasoning_parts = []

	# Simple text-based trust indicators
	text_lower = str(text).lower()

	# CRITICAL: Check for obvious fake data patterns - but be much less punitive
	fake_patterns = [
	r'^\d+$', # Only numbers (very strict)
	r'price.*\d{1,2}', # Very low prices (more lenient)
	r'size.*\d{1,2}', # Very small sizes (more lenient)
	]

	fake_detected = False
	for pattern in fake_patterns:
	if re.search(pattern, text_lower):
	# Only mark as fake if it's extremely obvious
	if pattern == r'^\d+$' and len(text.strip()) <= 3:
	fake_detected = True
	trust_score -= 10 # Reduced penalty from 15 to 10
	reasoning_parts.append("Detected suspicious number patterns")
	break
	# For other patterns, be more lenient
	elif pattern in [r'price.\d{1,2}', r'size.\d{1,2}']:
	# Only mark as fake if multiple patterns are found
	continue

	# Check for repeated numbers (like "2, 2, 2, 2") - but be much less punitive
	numbers = re.findall(r'\b\d+\b', text_lower)
	if len(numbers) >= 5: # Increased threshold from 3 to 5
	unique_numbers = set(numbers)
	if len(unique_numbers) <= 1: # Only if ALL numbers are the same
	fake_detected = True
	trust_score -= 15 # Reduced penalty from 20 to 15
	reasoning_parts.append("Detected repeated number patterns (likely fake data)")

	# Check for extremely low values - but be much less punitive
	if any(word in text_lower for word in ['₹1', '₹2']): # Only extremely low values
	fake_detected = True
	trust_score -= 20 # Reduced penalty from 25 to 20
	reasoning_parts.append("Detected suspiciously low pricing")

	# Check for very small property sizes - but be much less punitive
	if any(word in text_lower for word in ['1 sq', '2 sq']): # Only extremely small
	fake_detected = True
	trust_score -= 15 # Reduced penalty from 20 to 15
	reasoning_parts.append("Detected suspiciously small property size")

	# Positive trust indicators - Much more generous
	positive_indicators = [
	'apartment', 'flat', 'house', 'villa', 'bungalow', 'property', 'real estate',
	'bedroom', 'bathroom', 'kitchen', 'living', 'dining', 'balcony', 'parking',
	'amenities', 'facilities', 'security', 'lift', 'gym', 'pool', 'garden',
	'hyderabad', 'mumbai', 'delhi', 'bangalore', 'chennai', 'kolkata', 'pune',
	'verified', 'authentic', 'genuine', 'legitimate', 'original', 'certified',
	'pg', 'hostel', 'office', 'commercial', 'retail', 'warehouse', 'industrial'
	]

	negative_indicators = [
	'fake', 'fraud', 'scam', 'suspicious', 'doubtful', 'unverified', 'unauthentic',
	'illegal', 'unauthorized', 'forged', 'counterfeit', 'bogus', 'phony'
	]

	positive_count = sum(1 for indicator in positive_indicators if indicator in text_lower)
	negative_count = sum(1 for indicator in negative_indicators if indicator in text_lower)

	# Adjust score based on indicators - Much more balanced
	if positive_count > 0 and not fake_detected:
	trust_score += min(25, positive_count * 4) # Increased from 20 to 25
	reasoning_parts.append(f"Found {positive_count} positive trust indicators")

	if negative_count > 0:
	trust_score -= min(20, negative_count * 4) # Reduced penalty from 25 to 20
	reasoning_parts.append(f"Found {negative_count} negative trust indicators")

	# Image analysis contribution - Much more balanced
	if image_analysis:
	image_count = len(image_analysis) if isinstance(image_analysis, list) else 1
	if image_count > 0:
	# Check if images are actually property-related
	property_related_count = sum(1 for img in image_analysis if img.get('is_property_related', False))
	if property_related_count > 0:
	trust_score += min(20, property_related_count * 5) # Increased from 15 to 20
	reasoning_parts.append(f"Property has {property_related_count} property-related images")
	else:
	trust_score -= 10 # Reduced penalty from 15 to 10
	reasoning_parts.append("No property-related images detected")

	# Bonus for multiple high-quality images
	if property_related_count >= 3:
	trust_score += 12 # Increased from 8 to 12
	reasoning_parts.append("Multiple property images provided")

	# PDF analysis contribution - Much more balanced
	if pdf_analysis:
	pdf_count = len(pdf_analysis) if isinstance(pdf_analysis, list) else 1
	if pdf_count > 0:
	# Check if documents are actually property-related
	property_related_docs = sum(1 for doc in pdf_analysis if doc.get('is_property_related', False))
	if property_related_docs > 0:
	trust_score += min(20, property_related_docs * 6) # Increased from 15 to 20
	reasoning_parts.append(f"Property has {property_related_docs} property-related documents")
	else:
	trust_score -= 8 # Reduced penalty from 10 to 8
	reasoning_parts.append("No property-related documents detected")

	# Bonus for multiple documents
	if property_related_docs >= 2:
	trust_score += 8 # Increased from 5 to 8
	reasoning_parts.append("Multiple supporting documents provided")

	# Text quality assessment - Much more balanced
	if text and len(text) > 200 and not fake_detected:
	trust_score += 15 # Increased from 12 to 15
	reasoning_parts.append("Detailed property description provided")
	elif text and len(text) > 100 and not fake_detected:
	trust_score += 10 # Increased from 8 to 10
	reasoning_parts.append("Adequate property description provided")
	elif len(text) < 50:
	trust_score -= 10 # Reduced penalty from 15 to 10
	reasoning_parts.append("Very short property description")

	# Location quality assessment - Much more balanced
	if 'hyderabad' in text_lower or 'mumbai' in text_lower or 'delhi' in text_lower or 'bangalore' in text_lower:
	if not fake_detected:
	trust_score += 8 # Increased from 5 to 8
	reasoning_parts.append("Property in major city")

	# Property type assessment - Much more balanced
	if any(prop_type in text_lower for prop_type in ['apartment', 'flat', 'house', 'villa', 'bungalow', 'pg', 'office']):
	if not fake_detected:
	trust_score += 6 # Increased from 4 to 6
	reasoning_parts.append("Clear property type mentioned")

	# Amenities assessment - Much more balanced
	amenities_count = sum(1 for amenity in ['pool', 'gym', 'garden', 'parking', 'security', 'lift', 'balcony']
	if amenity in text_lower)
	if amenities_count > 0 and not fake_detected:
	trust_score += min(12, amenities_count * 3) # Increased from 8 to 12
	reasoning_parts.append(f"Property has {amenities_count} amenities mentioned")

	# CRITICAL: Additional fake data checks - but be much less punitive
	# Check if all major fields are just numbers
	numeric_fields = ['property_name', 'bedrooms', 'bathrooms', 'sq_ft', 'market_value']
	numeric_count = 0
	for field in numeric_fields:
	if field in text_lower and re.search(r'\b\d{1,2}\b', text_lower):
	numeric_count += 1

	if numeric_count >= 4: # Increased threshold from 3 to 4
	fake_detected = True
	trust_score -= 25 # Reduced penalty from 30 to 25
	reasoning_parts.append("Multiple fields contain only numbers (highly suspicious)")

	# Ensure minimum score for any valid data
	if trust_score < 20 and (image_analysis or pdf_analysis):
	trust_score = 20 # Increased minimum score from 10 to 20

	# Ensure score is within bounds
	trust_score = max(0, min(100, trust_score))

	# Create reasoning
	if reasoning_parts:
	reasoning = ". ".join(reasoning_parts) + "."
	else:
	reasoning = "Basic trust assessment completed."

	return trust_score, reasoning

	except Exception as e:
	logger.error(f"Error in trust score generation: {str(e)}")
	return 35.0, f"Trust analysis failed: {str(e)}" # Increased from 20.0 to 35.0