|
|
|
|
|
from .model_loader import load_model |
|
from .logging_config import logger |
|
import re |
|
|
|
def generate_trust_score(text, image_analysis, pdf_analysis): |
|
try: |
|
|
|
trust_score = 50.0 |
|
reasoning_parts = [] |
|
|
|
|
|
text_lower = str(text).lower() |
|
|
|
|
|
fake_patterns = [ |
|
r'^\d+$', |
|
r'price.*\d{1,2}', |
|
r'size.*\d{1,2}', |
|
] |
|
|
|
fake_detected = False |
|
for pattern in fake_patterns: |
|
if re.search(pattern, text_lower): |
|
|
|
if pattern == r'^\d+$' and len(text.strip()) <= 3: |
|
fake_detected = True |
|
trust_score -= 10 |
|
reasoning_parts.append("Detected suspicious number patterns") |
|
break |
|
|
|
elif pattern in [r'price.*\d{1,2}', r'size.*\d{1,2}']: |
|
|
|
continue |
|
|
|
|
|
numbers = re.findall(r'\b\d+\b', text_lower) |
|
if len(numbers) >= 5: |
|
unique_numbers = set(numbers) |
|
if len(unique_numbers) <= 1: |
|
fake_detected = True |
|
trust_score -= 15 |
|
reasoning_parts.append("Detected repeated number patterns (likely fake data)") |
|
|
|
|
|
if any(word in text_lower for word in ['₹1', '₹2']): |
|
fake_detected = True |
|
trust_score -= 20 |
|
reasoning_parts.append("Detected suspiciously low pricing") |
|
|
|
|
|
if any(word in text_lower for word in ['1 sq', '2 sq']): |
|
fake_detected = True |
|
trust_score -= 15 |
|
reasoning_parts.append("Detected suspiciously small property size") |
|
|
|
|
|
positive_indicators = [ |
|
'apartment', 'flat', 'house', 'villa', 'bungalow', 'property', 'real estate', |
|
'bedroom', 'bathroom', 'kitchen', 'living', 'dining', 'balcony', 'parking', |
|
'amenities', 'facilities', 'security', 'lift', 'gym', 'pool', 'garden', |
|
'hyderabad', 'mumbai', 'delhi', 'bangalore', 'chennai', 'kolkata', 'pune', |
|
'verified', 'authentic', 'genuine', 'legitimate', 'original', 'certified', |
|
'pg', 'hostel', 'office', 'commercial', 'retail', 'warehouse', 'industrial' |
|
] |
|
|
|
negative_indicators = [ |
|
'fake', 'fraud', 'scam', 'suspicious', 'doubtful', 'unverified', 'unauthentic', |
|
'illegal', 'unauthorized', 'forged', 'counterfeit', 'bogus', 'phony' |
|
] |
|
|
|
positive_count = sum(1 for indicator in positive_indicators if indicator in text_lower) |
|
negative_count = sum(1 for indicator in negative_indicators if indicator in text_lower) |
|
|
|
|
|
if positive_count > 0 and not fake_detected: |
|
trust_score += min(25, positive_count * 4) |
|
reasoning_parts.append(f"Found {positive_count} positive trust indicators") |
|
|
|
if negative_count > 0: |
|
trust_score -= min(20, negative_count * 4) |
|
reasoning_parts.append(f"Found {negative_count} negative trust indicators") |
|
|
|
|
|
if image_analysis: |
|
image_count = len(image_analysis) if isinstance(image_analysis, list) else 1 |
|
if image_count > 0: |
|
|
|
property_related_count = sum(1 for img in image_analysis if img.get('is_property_related', False)) |
|
if property_related_count > 0: |
|
trust_score += min(20, property_related_count * 5) |
|
reasoning_parts.append(f"Property has {property_related_count} property-related images") |
|
else: |
|
trust_score -= 10 |
|
reasoning_parts.append("No property-related images detected") |
|
|
|
|
|
if property_related_count >= 3: |
|
trust_score += 12 |
|
reasoning_parts.append("Multiple property images provided") |
|
|
|
|
|
if pdf_analysis: |
|
pdf_count = len(pdf_analysis) if isinstance(pdf_analysis, list) else 1 |
|
if pdf_count > 0: |
|
|
|
property_related_docs = sum(1 for doc in pdf_analysis if doc.get('is_property_related', False)) |
|
if property_related_docs > 0: |
|
trust_score += min(20, property_related_docs * 6) |
|
reasoning_parts.append(f"Property has {property_related_docs} property-related documents") |
|
else: |
|
trust_score -= 8 |
|
reasoning_parts.append("No property-related documents detected") |
|
|
|
|
|
if property_related_docs >= 2: |
|
trust_score += 8 |
|
reasoning_parts.append("Multiple supporting documents provided") |
|
|
|
|
|
if text and len(text) > 200 and not fake_detected: |
|
trust_score += 15 |
|
reasoning_parts.append("Detailed property description provided") |
|
elif text and len(text) > 100 and not fake_detected: |
|
trust_score += 10 |
|
reasoning_parts.append("Adequate property description provided") |
|
elif len(text) < 50: |
|
trust_score -= 10 |
|
reasoning_parts.append("Very short property description") |
|
|
|
|
|
if 'hyderabad' in text_lower or 'mumbai' in text_lower or 'delhi' in text_lower or 'bangalore' in text_lower: |
|
if not fake_detected: |
|
trust_score += 8 |
|
reasoning_parts.append("Property in major city") |
|
|
|
|
|
if any(prop_type in text_lower for prop_type in ['apartment', 'flat', 'house', 'villa', 'bungalow', 'pg', 'office']): |
|
if not fake_detected: |
|
trust_score += 6 |
|
reasoning_parts.append("Clear property type mentioned") |
|
|
|
|
|
amenities_count = sum(1 for amenity in ['pool', 'gym', 'garden', 'parking', 'security', 'lift', 'balcony'] |
|
if amenity in text_lower) |
|
if amenities_count > 0 and not fake_detected: |
|
trust_score += min(12, amenities_count * 3) |
|
reasoning_parts.append(f"Property has {amenities_count} amenities mentioned") |
|
|
|
|
|
|
|
numeric_fields = ['property_name', 'bedrooms', 'bathrooms', 'sq_ft', 'market_value'] |
|
numeric_count = 0 |
|
for field in numeric_fields: |
|
if field in text_lower and re.search(r'\b\d{1,2}\b', text_lower): |
|
numeric_count += 1 |
|
|
|
if numeric_count >= 4: |
|
fake_detected = True |
|
trust_score -= 25 |
|
reasoning_parts.append("Multiple fields contain only numbers (highly suspicious)") |
|
|
|
|
|
if trust_score < 20 and (image_analysis or pdf_analysis): |
|
trust_score = 20 |
|
|
|
|
|
trust_score = max(0, min(100, trust_score)) |
|
|
|
|
|
if reasoning_parts: |
|
reasoning = ". ".join(reasoning_parts) + "." |
|
else: |
|
reasoning = "Basic trust assessment completed." |
|
|
|
return trust_score, reasoning |
|
|
|
except Exception as e: |
|
logger.error(f"Error in trust score generation: {str(e)}") |
|
return 35.0, f"Trust analysis failed: {str(e)}" |
|
|