Upload app.py
Browse files
app.py
CHANGED
@@ -15,6 +15,8 @@ from datetime import datetime
|
|
15 |
# from deep_translator import GoogleTranslator
|
16 |
from models.logging_config import logger
|
17 |
from models.model_loader import load_model, clear_model_cache
|
|
|
|
|
18 |
from models.image_analysis import analyze_image
|
19 |
from models.pdf_analysis import extract_pdf_text, analyze_pdf_content
|
20 |
from models.property_summary import generate_property_summary
|
@@ -43,28 +45,16 @@ CORS(app) # Enable CORS for frontend
|
|
43 |
geocoder = Nominatim(user_agent="indian_property_verifier", timeout=10)
|
44 |
|
45 |
# Pre-load models to avoid loading delays during requests
|
|
|
46 |
def preload_models():
|
47 |
"""Pre-load essential models to improve response times."""
|
48 |
try:
|
49 |
-
logger.info("Pre-loading essential models...")
|
50 |
|
51 |
-
#
|
52 |
-
|
53 |
-
futures = {
|
54 |
-
executor.submit(load_model, "zero-shot-classification"): "zero-shot-classification",
|
55 |
-
executor.submit(load_model, "summarization"): "summarization",
|
56 |
-
executor.submit(load_model, "text-classification"): "text-classification"
|
57 |
-
}
|
58 |
-
|
59 |
-
for future in concurrent.futures.as_completed(futures, timeout=60):
|
60 |
-
model_type = futures[future]
|
61 |
-
try:
|
62 |
-
model = future.result()
|
63 |
-
logger.info(f"Successfully pre-loaded {model_type} model")
|
64 |
-
except Exception as e:
|
65 |
-
logger.warning(f"Failed to pre-load {model_type} model: {str(e)}")
|
66 |
|
67 |
-
logger.info("Model pre-loading completed")
|
68 |
except Exception as e:
|
69 |
logger.error(f"Error during model pre-loading: {str(e)}")
|
70 |
|
@@ -189,6 +179,42 @@ def get_location():
|
|
189 |
'message': str(e)
|
190 |
}), 500
|
191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
def calculate_final_verdict(results):
|
193 |
"""
|
194 |
Calculate a comprehensive final verdict based on all analysis results.
|
@@ -198,140 +224,255 @@ def calculate_final_verdict(results):
|
|
198 |
try:
|
199 |
# Defensive: ensure results is a dict
|
200 |
if not isinstance(results, dict):
|
201 |
-
logger.warning(f"
|
202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
|
221 |
-
|
222 |
-
if check.get('status') in ['invalid', 'suspicious', 'inconsistent']:
|
223 |
-
red_flags_count += 1
|
224 |
-
if check.get('status') == 'invalid':
|
225 |
-
critical_issues.append(check.get('message', 'Invalid data detected'))
|
226 |
-
else:
|
227 |
-
warnings.append(check.get('message', 'Suspicious data detected'))
|
228 |
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
|
234 |
-
if
|
235 |
-
|
236 |
-
warnings.append("Suspicious pricing detected")
|
237 |
|
238 |
-
if
|
239 |
-
|
240 |
-
warnings.append("No documents provided")
|
241 |
|
242 |
-
if
|
243 |
-
|
244 |
-
warnings.append("No images provided")
|
245 |
|
246 |
-
|
247 |
-
|
248 |
-
'trust': max(0, trust_score - (red_flags_count * 10)),
|
249 |
-
'fraud': max(0, 100 - (fraud_classification.get('alert_score', 0) * 100) - (red_flags_count * 15)),
|
250 |
-
'quality': max(0, quality_assessment.get('score', 0) - (red_flags_count * 5)),
|
251 |
-
'specs': max(0, specs_verification.get('verification_score', 0) - (red_flags_count * 10)),
|
252 |
-
'location': max(0, location_analysis.get('completeness_score', 0) - (red_flags_count * 5)),
|
253 |
-
'price': max(0, (price_analysis.get('confidence', 0) * 100) - (red_flags_count * 10)) if price_analysis.get('has_price') else 0,
|
254 |
-
'legal': max(0, legal_analysis.get('completeness_score', 0) - (red_flags_count * 5)),
|
255 |
-
'documents': max(0, min(100, (document_analysis.get('pdf_count', 0) / 3) * 100) - (red_flags_count * 10)),
|
256 |
-
'images': max(0, min(100, (image_analysis.get('image_count', 0) / 5) * 100) - (red_flags_count * 10))
|
257 |
-
}
|
258 |
|
259 |
-
|
260 |
-
|
261 |
-
'trust': 0.20,
|
262 |
-
'fraud': 0.25, # Increased weight for fraud detection
|
263 |
-
'quality': 0.15,
|
264 |
-
'specs': 0.10,
|
265 |
-
'location': 0.10,
|
266 |
-
'price': 0.05,
|
267 |
-
'legal': 0.05,
|
268 |
-
'documents': 0.05,
|
269 |
-
'images': 0.05
|
270 |
-
}
|
271 |
|
272 |
-
|
|
|
273 |
|
274 |
-
|
275 |
-
|
276 |
|
277 |
-
|
278 |
-
|
279 |
-
high_risk_indicators = len(fraud_classification.get('high_risk', []))
|
280 |
-
medium_risk_indicators = len(fraud_classification.get('medium_risk', []))
|
281 |
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
elif fraud_level in ['critical', 'high'] or high_risk_indicators > 0:
|
287 |
-
verdict = 'fraudulent'
|
288 |
-
confidence = 'high'
|
289 |
-
elif fraud_level == 'medium' or medium_risk_indicators > 2 or red_flags_count >= 3:
|
290 |
-
verdict = 'suspicious'
|
291 |
-
confidence = 'medium'
|
292 |
-
elif final_score >= 80 and red_flags_count <= 1:
|
293 |
-
verdict = 'legitimate'
|
294 |
-
confidence = 'high'
|
295 |
-
elif final_score >= 60 and red_flags_count <= 2:
|
296 |
-
verdict = 'legitimate'
|
297 |
-
confidence = 'medium'
|
298 |
-
elif final_score >= 40 and red_flags_count <= 3:
|
299 |
-
verdict = 'suspicious'
|
300 |
-
confidence = 'medium'
|
301 |
-
else:
|
302 |
-
verdict = 'suspicious'
|
303 |
-
confidence = 'high'
|
304 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
305 |
return {
|
306 |
'verdict': verdict,
|
307 |
-
'confidence':
|
308 |
-
'
|
309 |
-
'
|
310 |
-
'
|
311 |
-
'
|
312 |
-
'
|
313 |
-
'
|
314 |
-
'
|
315 |
-
|
316 |
-
|
317 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
}
|
|
|
319 |
except Exception as e:
|
320 |
logger.error(f"Error calculating final verdict: {str(e)}")
|
321 |
return {
|
322 |
-
'verdict': '
|
323 |
-
'confidence':
|
324 |
-
'
|
325 |
-
'
|
326 |
-
'
|
327 |
-
'risk_indicators': {'high': 0, 'medium': 0, 'total_red_flags': 0},
|
328 |
-
'critical_issues': ['Error in calculation'],
|
329 |
-
'warnings': ['System error occurred']
|
330 |
}
|
331 |
|
332 |
@app.route('/verify', methods=['POST'])
|
333 |
def verify_property():
|
334 |
try:
|
|
|
|
|
335 |
if not request.form and not request.files:
|
336 |
logger.warning("No form data or files provided")
|
337 |
return jsonify({
|
@@ -374,51 +515,46 @@ def verify_property():
|
|
374 |
'status': 'error'
|
375 |
}), 400
|
376 |
|
377 |
-
# Process images
|
378 |
images = []
|
379 |
image_analysis = []
|
380 |
if 'images' in request.files:
|
381 |
-
|
382 |
-
image_files = {}
|
383 |
for img_file in request.files.getlist('images'):
|
384 |
if img_file.filename and img_file.filename.lower().endswith(('.jpg', '.jpeg', '.png')):
|
385 |
-
image_files
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
image_analysis.append(
|
396 |
-
|
397 |
-
|
398 |
-
image_analysis.append({'error': str(e), 'is_property_related': False})
|
399 |
-
|
400 |
-
# Process PDFs
|
401 |
pdf_texts = []
|
402 |
pdf_analysis = []
|
403 |
if 'documents' in request.files:
|
404 |
-
|
405 |
-
pdf_files = {}
|
406 |
for pdf_file in request.files.getlist('documents'):
|
407 |
if pdf_file.filename and pdf_file.filename.lower().endswith('.pdf'):
|
408 |
-
pdf_files
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
|
|
414 |
pdf_texts.append({
|
415 |
-
'filename':
|
416 |
-
'text':
|
417 |
})
|
418 |
-
pdf_analysis.append(
|
419 |
-
|
420 |
-
|
421 |
-
pdf_analysis.append({'error': str(e)})
|
422 |
|
423 |
# Create consolidated text for analysis
|
424 |
consolidated_text = f"""
|
@@ -442,13 +578,6 @@ def verify_property():
|
|
442 |
try:
|
443 |
description = data['description']
|
444 |
if description and len(description) > 10:
|
445 |
-
# Temporarily disable translation to avoid import errors
|
446 |
-
# text_language = detect(description)
|
447 |
-
# if text_language != 'en':
|
448 |
-
# translated_description = GoogleTranslator(source=text_language, target='en').translate(description)
|
449 |
-
# data['description_translated'] = translated_description
|
450 |
-
# else:
|
451 |
-
# data['description_translated'] = description
|
452 |
data['description_translated'] = description
|
453 |
else:
|
454 |
data['description_translated'] = description
|
@@ -456,68 +585,50 @@ def verify_property():
|
|
456 |
logger.error(f"Error in language detection/translation: {str(e)}")
|
457 |
data['description_translated'] = data['description']
|
458 |
|
459 |
-
# Run all analyses in parallel using
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
# Prepare property data for price analysis
|
465 |
-
property_data = {}
|
466 |
-
if data.get('sq_ft'):
|
467 |
-
try:
|
468 |
-
property_data['size'] = float(data['sq_ft'])
|
469 |
-
except ValueError:
|
470 |
-
pass
|
471 |
-
if data.get('market_value'):
|
472 |
-
try:
|
473 |
-
property_data['price'] = float(data['market_value'].replace('₹', '').replace(',', ''))
|
474 |
-
except ValueError:
|
475 |
-
pass
|
476 |
-
if data.get('year_built'):
|
477 |
-
try:
|
478 |
-
current_year = datetime.now().year
|
479 |
-
property_data['property_age'] = current_year - int(data['year_built'])
|
480 |
-
except ValueError:
|
481 |
-
pass
|
482 |
-
|
483 |
-
# Create context text for price analysis
|
484 |
-
price_context = f"""
|
485 |
-
Property: {data['property_name']}
|
486 |
-
Type: {data['property_type']}
|
487 |
-
Location: {data['address']}, {data['city']}, {data['state']}
|
488 |
-
Size: {data['sq_ft']} sq ft
|
489 |
-
Market Value: ₹{data['market_value']}
|
490 |
-
Description: {data['description']}
|
491 |
-
Amenities: {data['amenities']}
|
492 |
-
"""
|
493 |
-
|
494 |
-
tasks = [
|
495 |
-
loop.run_in_executor(executor, generate_property_summary, data),
|
496 |
-
loop.run_in_executor(executor, classify_fraud, consolidated_text, data),
|
497 |
-
loop.run_in_executor(executor, generate_trust_score, consolidated_text, image_analysis, pdf_analysis),
|
498 |
-
loop.run_in_executor(executor, generate_suggestions, consolidated_text, data),
|
499 |
-
loop.run_in_executor(executor, assess_text_quality, data['description_translated']),
|
500 |
-
loop.run_in_executor(executor, verify_address, data),
|
501 |
-
loop.run_in_executor(executor, perform_cross_validation, data),
|
502 |
-
loop.run_in_executor(executor, analyze_location, data),
|
503 |
-
loop.run_in_executor(executor, lambda: analyze_price(data, price_context, data.get('latitude'), data.get('longitude'), property_data)),
|
504 |
-
loop.run_in_executor(executor, analyze_legal_details, data['legal_details']),
|
505 |
-
loop.run_in_executor(executor, verify_property_specs, data),
|
506 |
-
loop.run_in_executor(executor, analyze_market_value, data)
|
507 |
-
]
|
508 |
-
results = await asyncio.gather(*tasks)
|
509 |
-
return results
|
510 |
-
|
511 |
-
# Run analyses and get results
|
512 |
loop = asyncio.new_event_loop()
|
513 |
asyncio.set_event_loop(loop)
|
514 |
-
|
515 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
516 |
|
517 |
# Unpack results
|
518 |
-
summary
|
519 |
-
|
520 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
521 |
|
522 |
# Prepare response
|
523 |
document_analysis = {
|
@@ -553,13 +664,20 @@ def verify_property():
|
|
553 |
'image_analysis': image_results,
|
554 |
'specs_verification': specs_verification,
|
555 |
'market_analysis': market_analysis,
|
556 |
-
'images': images
|
|
|
|
|
|
|
|
|
557 |
}
|
558 |
|
559 |
# Calculate final verdict
|
560 |
final_verdict = calculate_final_verdict(results)
|
561 |
results['final_verdict'] = final_verdict
|
562 |
|
|
|
|
|
|
|
563 |
return jsonify(make_json_serializable(results))
|
564 |
|
565 |
except Exception as e:
|
|
|
15 |
# from deep_translator import GoogleTranslator
|
16 |
from models.logging_config import logger
|
17 |
from models.model_loader import load_model, clear_model_cache
|
18 |
+
from models.parallel_processor import parallel_processor
|
19 |
+
from models.performance_optimizer import performance_optimizer, optimize_model_loading, timed_function
|
20 |
from models.image_analysis import analyze_image
|
21 |
from models.pdf_analysis import extract_pdf_text, analyze_pdf_content
|
22 |
from models.property_summary import generate_property_summary
|
|
|
45 |
geocoder = Nominatim(user_agent="indian_property_verifier", timeout=10)
|
46 |
|
47 |
# Pre-load models to avoid loading delays during requests
|
48 |
+
@timed_function
|
49 |
def preload_models():
|
50 |
"""Pre-load essential models to improve response times."""
|
51 |
try:
|
52 |
+
logger.info("Pre-loading essential models with performance optimization...")
|
53 |
|
54 |
+
# Use the performance optimizer for model loading
|
55 |
+
optimize_model_loading()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
+
logger.info("Model pre-loading completed with optimization")
|
58 |
except Exception as e:
|
59 |
logger.error(f"Error during model pre-loading: {str(e)}")
|
60 |
|
|
|
179 |
'message': str(e)
|
180 |
}), 500
|
181 |
|
182 |
+
@app.route('/performance', methods=['GET'])
|
183 |
+
def get_performance_metrics():
|
184 |
+
"""Get system performance metrics and cache statistics"""
|
185 |
+
try:
|
186 |
+
from models.performance_optimizer import get_performance_metrics
|
187 |
+
|
188 |
+
metrics = get_performance_metrics()
|
189 |
+
|
190 |
+
return jsonify({
|
191 |
+
'status': 'success',
|
192 |
+
'metrics': metrics,
|
193 |
+
'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
194 |
+
})
|
195 |
+
except Exception as e:
|
196 |
+
logger.error(f"Error getting performance metrics: {str(e)}")
|
197 |
+
return jsonify({
|
198 |
+
'status': 'error',
|
199 |
+
'message': str(e)
|
200 |
+
}), 500
|
201 |
+
|
202 |
+
@app.route('/clear-cache', methods=['POST'])
|
203 |
+
def clear_cache():
|
204 |
+
"""Clear all cached results"""
|
205 |
+
try:
|
206 |
+
performance_optimizer.clear_cache()
|
207 |
+
return jsonify({
|
208 |
+
'status': 'success',
|
209 |
+
'message': 'Cache cleared successfully'
|
210 |
+
})
|
211 |
+
except Exception as e:
|
212 |
+
logger.error(f"Error clearing cache: {str(e)}")
|
213 |
+
return jsonify({
|
214 |
+
'status': 'error',
|
215 |
+
'message': str(e)
|
216 |
+
}), 500
|
217 |
+
|
218 |
def calculate_final_verdict(results):
|
219 |
"""
|
220 |
Calculate a comprehensive final verdict based on all analysis results.
|
|
|
224 |
try:
|
225 |
# Defensive: ensure results is a dict
|
226 |
if not isinstance(results, dict):
|
227 |
+
logger.warning(f"Results is not a dict: {type(results)}")
|
228 |
+
return {
|
229 |
+
'verdict': 'VERIFICATION REQUIRED',
|
230 |
+
'confidence': 0.0,
|
231 |
+
'reasoning': 'Insufficient data for verification',
|
232 |
+
'risk_level': 'medium',
|
233 |
+
'overall_score': 25
|
234 |
+
}
|
235 |
+
|
236 |
+
# Extract key metrics with defensive programming
|
237 |
+
fraud_classification = results.get('fraud_classification', {})
|
238 |
+
trust_score_data = results.get('trust_score', {})
|
239 |
+
address_verification = results.get('address_verification', {})
|
240 |
+
cross_validation = results.get('cross_validation', [])
|
241 |
+
location_analysis = results.get('location_analysis', {})
|
242 |
+
price_analysis = results.get('price_analysis', {})
|
243 |
+
legal_analysis = results.get('legal_analysis', {})
|
244 |
+
specs_verification = results.get('specs_verification', {})
|
245 |
+
quality_assessment = results.get('quality_assessment', {})
|
246 |
+
|
247 |
+
# Calculate fraud risk score
|
248 |
+
fraud_score = 0.0
|
249 |
+
fraud_level = fraud_classification.get('alert_level', 'minimal')
|
250 |
+
fraud_alert_score = fraud_classification.get('alert_score', 0.0)
|
251 |
|
252 |
+
fraud_score_mapping = {
|
253 |
+
'critical': 1.0,
|
254 |
+
'high': 0.8,
|
255 |
+
'medium': 0.6,
|
256 |
+
'low': 0.3,
|
257 |
+
'minimal': 0.1
|
258 |
+
}
|
259 |
+
fraud_score = fraud_score_mapping.get(fraud_level, 0.1) * fraud_alert_score
|
260 |
+
|
261 |
+
# Calculate trust score
|
262 |
+
trust_score = 0.0
|
263 |
+
if isinstance(trust_score_data, dict):
|
264 |
+
trust_score = trust_score_data.get('score', 0.0)
|
265 |
+
# Convert percentage to decimal if needed
|
266 |
+
if trust_score > 1.0:
|
267 |
+
trust_score = trust_score / 100.0
|
268 |
+
elif isinstance(trust_score_data, tuple) and len(trust_score_data) > 0:
|
269 |
+
trust_score = trust_score_data[0]
|
270 |
+
# Convert percentage to decimal if needed
|
271 |
+
if trust_score > 1.0:
|
272 |
+
trust_score = trust_score / 100.0
|
273 |
+
else:
|
274 |
+
trust_score = 0.0
|
275 |
+
|
276 |
+
# Calculate address verification score
|
277 |
+
address_score = 0.0
|
278 |
+
if address_verification and isinstance(address_verification, dict):
|
279 |
+
verification_score = address_verification.get('verification_score', 0.0)
|
280 |
+
address_score = float(verification_score) / 100.0 if verification_score > 0 else 0.0
|
281 |
+
|
282 |
+
# Calculate location analysis score
|
283 |
+
location_score = 0.0
|
284 |
+
if location_analysis and isinstance(location_analysis, dict):
|
285 |
+
completeness_score = location_analysis.get('completeness_score', 0.0)
|
286 |
+
location_score = float(completeness_score) / 100.0 if completeness_score > 0 else 0.0
|
287 |
+
|
288 |
+
# Calculate price analysis score
|
289 |
+
price_score = 0.0
|
290 |
+
if price_analysis and isinstance(price_analysis, dict):
|
291 |
+
confidence = price_analysis.get('confidence', 0.0)
|
292 |
+
price_score = float(confidence) if confidence > 0 else 0.0
|
293 |
+
|
294 |
+
# Calculate legal analysis score
|
295 |
+
legal_score = 0.0
|
296 |
+
if legal_analysis and isinstance(legal_analysis, dict):
|
297 |
+
confidence = legal_analysis.get('confidence', 0.0)
|
298 |
+
legal_score = float(confidence) if confidence > 0 else 0.0
|
299 |
+
|
300 |
+
# Calculate specs verification score
|
301 |
+
specs_score = 0.0
|
302 |
+
if specs_verification and isinstance(specs_verification, dict):
|
303 |
+
verification_score = specs_verification.get('verification_score', 0.0)
|
304 |
+
specs_score = float(verification_score) / 100.0 if verification_score > 0 else 0.0
|
305 |
+
|
306 |
+
# Calculate quality assessment score
|
307 |
+
quality_score = 0.0
|
308 |
+
if quality_assessment and isinstance(quality_assessment, dict):
|
309 |
+
score = quality_assessment.get('score', 0.0)
|
310 |
+
quality_score = float(score) / 100.0 if score > 0 else 0.0
|
311 |
+
|
312 |
+
# Calculate cross validation issues
|
313 |
+
cross_validation_issues = 0
|
314 |
+
high_severity_issues = 0
|
315 |
+
medium_severity_issues = 0
|
316 |
|
317 |
+
if isinstance(cross_validation, list):
|
318 |
+
cross_validation_issues = len(cross_validation)
|
319 |
+
for issue in cross_validation:
|
320 |
+
if isinstance(issue, dict):
|
321 |
+
severity = issue.get('severity', 'low')
|
322 |
+
if severity == 'high':
|
323 |
+
high_severity_issues += 1
|
324 |
+
elif severity == 'medium':
|
325 |
+
medium_severity_issues += 1
|
326 |
+
|
327 |
+
# Weighted scoring system with improved weights
|
328 |
+
weights = {
|
329 |
+
'fraud': 0.30, # Increased weight for fraud detection
|
330 |
+
'trust': 0.25, # Increased weight for trust score
|
331 |
+
'address': 0.15, # Address verification
|
332 |
+
'location': 0.10, # Location analysis
|
333 |
+
'price': 0.10, # Price analysis
|
334 |
+
'legal': 0.05, # Legal analysis
|
335 |
+
'specs': 0.03, # Specs verification
|
336 |
+
'quality': 0.02 # Quality assessment
|
337 |
+
}
|
338 |
+
|
339 |
+
# Calculate weighted score
|
340 |
+
weighted_score = (
|
341 |
+
(1.0 - fraud_score) * weights['fraud'] +
|
342 |
+
trust_score * weights['trust'] +
|
343 |
+
address_score * weights['address'] +
|
344 |
+
location_score * weights['location'] +
|
345 |
+
price_score * weights['price'] +
|
346 |
+
legal_score * weights['legal'] +
|
347 |
+
specs_score * weights['specs'] +
|
348 |
+
quality_score * weights['quality']
|
349 |
+
)
|
350 |
+
|
351 |
+
# Debug logging
|
352 |
+
logger.info(f"Score components: fraud={fraud_score:.3f}, trust={trust_score:.3f}, address={address_score:.3f}, location={location_score:.3f}, price={price_score:.3f}, legal={legal_score:.3f}, specs={specs_score:.3f}, quality={quality_score:.3f}")
|
353 |
+
logger.info(f"Weighted score before penalty: {weighted_score:.3f}")
|
354 |
+
|
355 |
+
# Adjust score based on cross validation issues
|
356 |
+
issue_penalty = 0.0
|
357 |
+
if high_severity_issues > 0:
|
358 |
+
issue_penalty += high_severity_issues * 0.08 # Reduced from 0.15 to 0.08 (8% penalty per high severity issue)
|
359 |
+
if medium_severity_issues > 0:
|
360 |
+
issue_penalty += medium_severity_issues * 0.04 # Reduced from 0.08 to 0.04 (4% penalty per medium severity issue)
|
361 |
|
362 |
+
weighted_score = max(0.0, weighted_score - issue_penalty)
|
|
|
|
|
|
|
|
|
|
|
|
|
363 |
|
364 |
+
logger.info(f"Issue penalty: {issue_penalty:.3f}, Final weighted score: {weighted_score:.3f}")
|
365 |
+
|
366 |
+
# Ensure minimum score for any valid data
|
367 |
+
if any([trust_score > 0, address_score > 0, location_score > 0, price_score > 0]):
|
368 |
+
weighted_score = max(0.15, weighted_score) # Increased minimum from 0.1 to 0.15 (15% minimum)
|
369 |
+
|
370 |
+
# Determine verdict and risk level with improved logic
|
371 |
+
if weighted_score >= 0.75 and fraud_score < 0.2 and high_severity_issues == 0:
|
372 |
+
verdict = 'VERIFIED REAL ESTATE LISTING'
|
373 |
+
risk_level = 'low'
|
374 |
+
elif weighted_score >= 0.60 and fraud_score < 0.4 and high_severity_issues <= 1:
|
375 |
+
verdict = 'LIKELY LEGITIMATE'
|
376 |
+
risk_level = 'low'
|
377 |
+
elif weighted_score >= 0.40 and fraud_score < 0.6 and high_severity_issues <= 2:
|
378 |
+
verdict = 'SUSPICIOUS LISTING'
|
379 |
+
risk_level = 'medium'
|
380 |
+
elif fraud_score >= 0.6 or weighted_score < 0.20 or high_severity_issues >= 3:
|
381 |
+
verdict = 'HIGH RISK LISTING'
|
382 |
+
risk_level = 'high'
|
383 |
+
elif weighted_score >= 0.20:
|
384 |
+
verdict = 'VERIFICATION REQUIRED'
|
385 |
+
risk_level = 'medium'
|
386 |
+
else:
|
387 |
+
verdict = 'INSUFFICIENT DATA'
|
388 |
+
risk_level = 'medium'
|
389 |
+
|
390 |
+
# Generate detailed reasoning
|
391 |
+
reasoning_parts = []
|
392 |
|
393 |
+
if fraud_score > 0.3:
|
394 |
+
reasoning_parts.append(f"Fraud risk detected (level: {fraud_level})")
|
|
|
395 |
|
396 |
+
if trust_score < 0.5:
|
397 |
+
reasoning_parts.append(f"Low trust score ({trust_score:.1%})")
|
|
|
398 |
|
399 |
+
if address_score < 0.5:
|
400 |
+
reasoning_parts.append("Address verification issues")
|
|
|
401 |
|
402 |
+
if location_score < 0.5:
|
403 |
+
reasoning_parts.append("Location verification issues")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
404 |
|
405 |
+
if price_score < 0.5:
|
406 |
+
reasoning_parts.append("Price analysis concerns")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
407 |
|
408 |
+
if legal_score < 0.5:
|
409 |
+
reasoning_parts.append("Legal documentation issues")
|
410 |
|
411 |
+
if high_severity_issues > 0:
|
412 |
+
reasoning_parts.append(f"{high_severity_issues} critical validation issues")
|
413 |
|
414 |
+
if medium_severity_issues > 0:
|
415 |
+
reasoning_parts.append(f"{medium_severity_issues} moderate validation issues")
|
|
|
|
|
416 |
|
417 |
+
if not reasoning_parts:
|
418 |
+
reasoning_parts.append("All verification checks passed successfully")
|
419 |
+
|
420 |
+
reasoning = ". ".join(reasoning_parts)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
421 |
|
422 |
+
# Calculate overall score as percentage
|
423 |
+
overall_score = int(weighted_score * 100)
|
424 |
+
|
425 |
+
# Ensure score is between 0 and 100
|
426 |
+
overall_score = max(0, min(100, overall_score))
|
427 |
+
|
428 |
+
# Ensure minimum score for any valid data
|
429 |
+
if overall_score == 0 and any([trust_score > 0, address_score > 0, location_score > 0]):
|
430 |
+
overall_score = 15 # Minimum 15% score if any component is valid
|
431 |
+
|
432 |
+
# Final score adjustment based on data quality
|
433 |
+
if high_severity_issues >= 3:
|
434 |
+
overall_score = max(10, overall_score) # Minimum 10% for high risk
|
435 |
+
elif high_severity_issues >= 1:
|
436 |
+
overall_score = max(15, overall_score) # Minimum 15% for medium risk
|
437 |
+
else:
|
438 |
+
overall_score = max(20, overall_score) # Minimum 20% for low risk
|
439 |
+
|
440 |
return {
|
441 |
'verdict': verdict,
|
442 |
+
'confidence': min(1.0, weighted_score),
|
443 |
+
'reasoning': reasoning,
|
444 |
+
'risk_level': risk_level,
|
445 |
+
'overall_score': overall_score,
|
446 |
+
'scores': {
|
447 |
+
'fraud_score': fraud_score,
|
448 |
+
'trust_score': trust_score,
|
449 |
+
'address_score': address_score,
|
450 |
+
'location_score': location_score,
|
451 |
+
'price_score': price_score,
|
452 |
+
'legal_score': legal_score,
|
453 |
+
'specs_score': specs_score,
|
454 |
+
'quality_score': quality_score,
|
455 |
+
'weighted_score': weighted_score,
|
456 |
+
'cross_validation_issues': cross_validation_issues,
|
457 |
+
'high_severity_issues': high_severity_issues
|
458 |
+
}
|
459 |
}
|
460 |
+
|
461 |
except Exception as e:
|
462 |
logger.error(f"Error calculating final verdict: {str(e)}")
|
463 |
return {
|
464 |
+
'verdict': 'VERIFICATION REQUIRED',
|
465 |
+
'confidence': 0.0,
|
466 |
+
'reasoning': f'Error in verdict calculation: {str(e)}',
|
467 |
+
'risk_level': 'medium',
|
468 |
+
'overall_score': 25
|
|
|
|
|
|
|
469 |
}
|
470 |
|
471 |
@app.route('/verify', methods=['POST'])
|
472 |
def verify_property():
|
473 |
try:
|
474 |
+
start_time = time.time()
|
475 |
+
|
476 |
if not request.form and not request.files:
|
477 |
logger.warning("No form data or files provided")
|
478 |
return jsonify({
|
|
|
515 |
'status': 'error'
|
516 |
}), 400
|
517 |
|
518 |
+
# Process images in parallel
|
519 |
images = []
|
520 |
image_analysis = []
|
521 |
if 'images' in request.files:
|
522 |
+
image_files = []
|
|
|
523 |
for img_file in request.files.getlist('images'):
|
524 |
if img_file.filename and img_file.filename.lower().endswith(('.jpg', '.jpeg', '.png')):
|
525 |
+
image_files.append(img_file)
|
526 |
+
|
527 |
+
if image_files:
|
528 |
+
# Process images in parallel
|
529 |
+
image_results = parallel_processor.process_images_parallel(image_files)
|
530 |
+
for result in image_results:
|
531 |
+
if 'image_data' in result:
|
532 |
+
images.append(result['image_data'])
|
533 |
+
image_analysis.append(result['analysis'])
|
534 |
+
else:
|
535 |
+
image_analysis.append(result)
|
536 |
+
|
537 |
+
# Process PDFs in parallel
|
|
|
|
|
|
|
538 |
pdf_texts = []
|
539 |
pdf_analysis = []
|
540 |
if 'documents' in request.files:
|
541 |
+
pdf_files = []
|
|
|
542 |
for pdf_file in request.files.getlist('documents'):
|
543 |
if pdf_file.filename and pdf_file.filename.lower().endswith('.pdf'):
|
544 |
+
pdf_files.append(pdf_file)
|
545 |
+
|
546 |
+
if pdf_files:
|
547 |
+
# Process PDFs in parallel
|
548 |
+
pdf_results = parallel_processor.process_pdfs_parallel(pdf_files)
|
549 |
+
for result in pdf_results:
|
550 |
+
if 'filename' in result:
|
551 |
pdf_texts.append({
|
552 |
+
'filename': result['filename'],
|
553 |
+
'text': result['text']
|
554 |
})
|
555 |
+
pdf_analysis.append(result['analysis'])
|
556 |
+
else:
|
557 |
+
pdf_analysis.append(result)
|
|
|
558 |
|
559 |
# Create consolidated text for analysis
|
560 |
consolidated_text = f"""
|
|
|
578 |
try:
|
579 |
description = data['description']
|
580 |
if description and len(description) > 10:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
581 |
data['description_translated'] = description
|
582 |
else:
|
583 |
data['description_translated'] = description
|
|
|
585 |
logger.error(f"Error in language detection/translation: {str(e)}")
|
586 |
data['description_translated'] = data['description']
|
587 |
|
588 |
+
# Run all analyses in parallel using the new parallel processor
|
589 |
+
analysis_start_time = time.time()
|
590 |
+
|
591 |
+
# Create new event loop for async operations
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
592 |
loop = asyncio.new_event_loop()
|
593 |
asyncio.set_event_loop(loop)
|
594 |
+
|
595 |
+
try:
|
596 |
+
analysis_results = loop.run_until_complete(
|
597 |
+
parallel_processor.run_analyses_parallel(data, consolidated_text, image_analysis, pdf_analysis)
|
598 |
+
)
|
599 |
+
finally:
|
600 |
+
loop.close()
|
601 |
+
|
602 |
+
analysis_time = time.time() - analysis_start_time
|
603 |
+
logger.info(f"Analysis completed in {analysis_time:.2f} seconds")
|
604 |
|
605 |
# Unpack results
|
606 |
+
summary = analysis_results.get('summary', "Property summary unavailable.")
|
607 |
+
|
608 |
+
# Ensure summary is not placeholder text
|
609 |
+
if summary and isinstance(summary, str):
|
610 |
+
if "[Insert Property Description Here]" in summary or "[insert property price here]" in summary:
|
611 |
+
# Generate a basic summary if placeholder text is detected
|
612 |
+
from .property_summary import create_basic_summary
|
613 |
+
summary = create_basic_summary(data)
|
614 |
+
|
615 |
+
fraud_classification = analysis_results.get('fraud', {})
|
616 |
+
trust_result = analysis_results.get('trust', (0.0, "Trust analysis failed"))
|
617 |
+
suggestions = analysis_results.get('suggestions', {})
|
618 |
+
quality_assessment = analysis_results.get('quality', {})
|
619 |
+
address_verification = analysis_results.get('address', {})
|
620 |
+
cross_validation = analysis_results.get('cross_validation', [])
|
621 |
+
location_analysis = analysis_results.get('location', {})
|
622 |
+
price_analysis = analysis_results.get('price', {})
|
623 |
+
legal_analysis = analysis_results.get('legal', {})
|
624 |
+
specs_verification = analysis_results.get('specs', {})
|
625 |
+
market_analysis = analysis_results.get('market', {})
|
626 |
+
|
627 |
+
# Handle trust score result
|
628 |
+
if isinstance(trust_result, tuple):
|
629 |
+
trust_score, trust_reasoning = trust_result
|
630 |
+
else:
|
631 |
+
trust_score, trust_reasoning = 0.0, "Trust analysis failed"
|
632 |
|
633 |
# Prepare response
|
634 |
document_analysis = {
|
|
|
664 |
'image_analysis': image_results,
|
665 |
'specs_verification': specs_verification,
|
666 |
'market_analysis': market_analysis,
|
667 |
+
'images': images,
|
668 |
+
'processing_time': {
|
669 |
+
'total_time': time.time() - start_time,
|
670 |
+
'analysis_time': analysis_time
|
671 |
+
}
|
672 |
}
|
673 |
|
674 |
# Calculate final verdict
|
675 |
final_verdict = calculate_final_verdict(results)
|
676 |
results['final_verdict'] = final_verdict
|
677 |
|
678 |
+
total_time = time.time() - start_time
|
679 |
+
logger.info(f"Total verification completed in {total_time:.2f} seconds")
|
680 |
+
|
681 |
return jsonify(make_json_serializable(results))
|
682 |
|
683 |
except Exception as e:
|