sksameermujahid commited on
Commit
6e3dbdb
·
verified ·
1 Parent(s): 38d6cf7

Upload 23 files

Browse files
app.py CHANGED
@@ -18,7 +18,7 @@ from models.model_loader import load_model, clear_model_cache
18
  from models.parallel_processor import parallel_processor
19
  from models.performance_optimizer import performance_optimizer, optimize_model_loading, timed_function
20
  from models.image_analysis import analyze_image
21
- from models.pdf_analysis import extract_pdf_text, analyze_pdf_content
22
  from models.property_summary import generate_property_summary
23
  from models.fraud_classification import classify_fraud
24
  from models.trust_score import generate_trust_score
@@ -51,8 +51,22 @@ def preload_models():
51
  try:
52
  logger.info("Pre-loading essential models with performance optimization...")
53
 
54
- # Use the performance optimizer for model loading
55
- optimize_model_loading()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  logger.info("Model pre-loading completed with optimization")
58
  except Exception as e:
@@ -518,12 +532,13 @@ def verify_property():
518
  # Process images in parallel
519
  images = []
520
  image_analysis = []
 
 
521
  if 'images' in request.files:
522
  image_files = []
523
  for img_file in request.files.getlist('images'):
524
  if img_file.filename and img_file.filename.lower().endswith(('.jpg', '.jpeg', '.png')):
525
  image_files.append(img_file)
526
-
527
  if image_files:
528
  # Process images in parallel
529
  image_results = parallel_processor.process_images_parallel(image_files)
@@ -531,18 +546,25 @@ def verify_property():
531
  if 'image_data' in result:
532
  images.append(result['image_data'])
533
  image_analysis.append(result['analysis'])
 
 
 
 
534
  else:
535
  image_analysis.append(result)
536
-
 
 
 
537
  # Process PDFs in parallel
538
  pdf_texts = []
539
  pdf_analysis = []
 
540
  if 'documents' in request.files:
541
  pdf_files = []
542
  for pdf_file in request.files.getlist('documents'):
543
  if pdf_file.filename and pdf_file.filename.lower().endswith('.pdf'):
544
  pdf_files.append(pdf_file)
545
-
546
  if pdf_files:
547
  # Process PDFs in parallel
548
  pdf_results = parallel_processor.process_pdfs_parallel(pdf_files)
@@ -553,8 +575,12 @@ def verify_property():
553
  'text': result['text']
554
  })
555
  pdf_analysis.append(result['analysis'])
 
 
556
  else:
557
  pdf_analysis.append(result)
 
 
558
 
559
  # Create consolidated text for analysis
560
  consolidated_text = f"""
@@ -602,17 +628,13 @@ def verify_property():
602
  analysis_time = time.time() - analysis_start_time
603
  logger.info(f"Analysis completed in {analysis_time:.2f} seconds")
604
 
 
 
 
605
  # Unpack results
606
  summary = analysis_results.get('summary', "Property summary unavailable.")
607
-
608
- # Ensure summary is not placeholder text
609
- if summary and isinstance(summary, str):
610
- if "[Insert Property Description Here]" in summary or "[insert property price here]" in summary:
611
- # Generate a basic summary if placeholder text is detected
612
- from .property_summary import create_basic_summary
613
- summary = create_basic_summary(data)
614
-
615
  fraud_classification = analysis_results.get('fraud', {})
 
616
  trust_result = analysis_results.get('trust', (0.0, "Trust analysis failed"))
617
  suggestions = analysis_results.get('suggestions', {})
618
  quality_assessment = analysis_results.get('quality', {})
@@ -620,10 +642,23 @@ def verify_property():
620
  cross_validation = analysis_results.get('cross_validation', [])
621
  location_analysis = analysis_results.get('location', {})
622
  price_analysis = analysis_results.get('price', {})
623
- legal_analysis = analysis_results.get('legal', {})
624
  specs_verification = analysis_results.get('specs', {})
625
  market_analysis = analysis_results.get('market', {})
626
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
627
  # Handle trust score result
628
  if isinstance(trust_result, tuple):
629
  trust_score, trust_reasoning = trust_result
@@ -634,12 +669,106 @@ def verify_property():
634
  document_analysis = {
635
  'pdf_count': len(pdf_texts),
636
  'pdf_texts': pdf_texts,
637
- 'pdf_analysis': pdf_analysis
 
638
  }
 
 
639
  image_results = {
640
  'image_count': len(images),
641
- 'image_analysis': image_analysis
 
 
642
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
643
 
644
  report_id = str(uuid.uuid4())
645
 
@@ -648,7 +777,9 @@ def verify_property():
648
  'report_id': report_id,
649
  'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
650
  'summary': summary,
 
651
  'fraud_classification': fraud_classification,
 
652
  'trust_score': {
653
  'score': trust_score,
654
  'reasoning': trust_reasoning
@@ -660,6 +791,7 @@ def verify_property():
660
  'location_analysis': location_analysis,
661
  'price_analysis': price_analysis,
662
  'legal_analysis': legal_analysis,
 
663
  'document_analysis': document_analysis,
664
  'image_analysis': image_results,
665
  'specs_verification': specs_verification,
 
18
  from models.parallel_processor import parallel_processor
19
  from models.performance_optimizer import performance_optimizer, optimize_model_loading, timed_function
20
  from models.image_analysis import analyze_image
21
+ from models.pdf_analysis import extract_text_from_pdf, analyze_pdf_content
22
  from models.property_summary import generate_property_summary
23
  from models.fraud_classification import classify_fraud
24
  from models.trust_score import generate_trust_score
 
51
  try:
52
  logger.info("Pre-loading essential models with performance optimization...")
53
 
54
+ # Only preload the most essential models to avoid disconnections
55
+ essential_models = [
56
+ "zero-shot-classification", # For fraud, legal, suggestions
57
+ "summarization" # For property summary
58
+ ]
59
+
60
+ for model_task in essential_models:
61
+ try:
62
+ logger.info(f"Pre-loading {model_task} model...")
63
+ model = load_model(model_task)
64
+ if hasattr(model, 'fallback_used') and model.fallback_used:
65
+ logger.info(f"Using fallback for {model_task}: {getattr(model, 'fallback_model', 'unknown')}")
66
+ else:
67
+ logger.info(f"Successfully pre-loaded {model_task} model")
68
+ except Exception as e:
69
+ logger.warning(f"Failed to pre-load {model_task}: {str(e)}")
70
 
71
  logger.info("Model pre-loading completed with optimization")
72
  except Exception as e:
 
532
  # Process images in parallel
533
  images = []
534
  image_analysis = []
535
+ image_model_used = set()
536
+ image_parallel_info = []
537
  if 'images' in request.files:
538
  image_files = []
539
  for img_file in request.files.getlist('images'):
540
  if img_file.filename and img_file.filename.lower().endswith(('.jpg', '.jpeg', '.png')):
541
  image_files.append(img_file)
 
542
  if image_files:
543
  # Process images in parallel
544
  image_results = parallel_processor.process_images_parallel(image_files)
 
546
  if 'image_data' in result:
547
  images.append(result['image_data'])
548
  image_analysis.append(result['analysis'])
549
+ if 'model_used' in result['analysis']:
550
+ image_model_used.add(result['analysis']['model_used'])
551
+ if 'parallelization_info' in result:
552
+ image_parallel_info.append(result['parallelization_info'])
553
  else:
554
  image_analysis.append(result)
555
+ if 'model_used' in result:
556
+ image_model_used.add(result['model_used'])
557
+ if 'parallelization_info' in result:
558
+ image_parallel_info.append(result['parallelization_info'])
559
  # Process PDFs in parallel
560
  pdf_texts = []
561
  pdf_analysis = []
562
+ pdf_parallel_info = []
563
  if 'documents' in request.files:
564
  pdf_files = []
565
  for pdf_file in request.files.getlist('documents'):
566
  if pdf_file.filename and pdf_file.filename.lower().endswith('.pdf'):
567
  pdf_files.append(pdf_file)
 
568
  if pdf_files:
569
  # Process PDFs in parallel
570
  pdf_results = parallel_processor.process_pdfs_parallel(pdf_files)
 
575
  'text': result['text']
576
  })
577
  pdf_analysis.append(result['analysis'])
578
+ if 'parallelization_info' in result:
579
+ pdf_parallel_info.append(result['parallelization_info'])
580
  else:
581
  pdf_analysis.append(result)
582
+ if 'parallelization_info' in result:
583
+ pdf_parallel_info.append(result['parallelization_info'])
584
 
585
  # Create consolidated text for analysis
586
  consolidated_text = f"""
 
628
  analysis_time = time.time() - analysis_start_time
629
  logger.info(f"Analysis completed in {analysis_time:.2f} seconds")
630
 
631
+ # Ensemble/agentic logic for summary, fraud, and legal analysis
632
+ # (run multiple models and combine results if possible)
633
+ # For demonstration, just add model_used/fallback info to the results
634
  # Unpack results
635
  summary = analysis_results.get('summary', "Property summary unavailable.")
 
 
 
 
 
 
 
 
636
  fraud_classification = analysis_results.get('fraud', {})
637
+ legal_analysis = analysis_results.get('legal', {})
638
  trust_result = analysis_results.get('trust', (0.0, "Trust analysis failed"))
639
  suggestions = analysis_results.get('suggestions', {})
640
  quality_assessment = analysis_results.get('quality', {})
 
642
  cross_validation = analysis_results.get('cross_validation', [])
643
  location_analysis = analysis_results.get('location', {})
644
  price_analysis = analysis_results.get('price', {})
 
645
  specs_verification = analysis_results.get('specs', {})
646
  market_analysis = analysis_results.get('market', {})
647
+
648
+ # Add model_used/fallback info if present
649
+ if hasattr(summary, 'model_used'):
650
+ summary_model_used = summary.model_used
651
+ else:
652
+ summary_model_used = getattr(summary, 'fallback_model', None)
653
+ if hasattr(fraud_classification, 'model_used'):
654
+ fraud_model_used = fraud_classification.model_used
655
+ else:
656
+ fraud_model_used = getattr(fraud_classification, 'fallback_model', None)
657
+ if hasattr(legal_analysis, 'model_used'):
658
+ legal_model_used = legal_analysis.model_used
659
+ else:
660
+ legal_model_used = getattr(legal_analysis, 'fallback_model', None)
661
+
662
  # Handle trust score result
663
  if isinstance(trust_result, tuple):
664
  trust_score, trust_reasoning = trust_result
 
669
  document_analysis = {
670
  'pdf_count': len(pdf_texts),
671
  'pdf_texts': pdf_texts,
672
+ 'pdf_analysis': pdf_analysis,
673
+ 'pdf_parallelization': pdf_parallel_info
674
  }
675
+
676
+ # Fix image analysis structure to match frontend expectations
677
  image_results = {
678
  'image_count': len(images),
679
+ 'image_analysis': image_analysis,
680
+ 'image_model_used': list(image_model_used),
681
+ 'image_parallelization': image_parallel_info
682
  }
683
+
684
+ # Ensure image analysis has proper structure for frontend
685
+ if image_analysis:
686
+ # Convert image analysis to proper format if needed
687
+ formatted_image_analysis = []
688
+ for i, analysis in enumerate(image_analysis):
689
+ if isinstance(analysis, dict):
690
+ # Ensure all required fields are present
691
+ formatted_analysis = {
692
+ 'is_property_related': analysis.get('is_property_related', False),
693
+ 'predicted_label': analysis.get('predicted_label', 'Unknown'),
694
+ 'confidence': analysis.get('confidence', 0.0),
695
+ 'real_estate_confidence': analysis.get('real_estate_confidence', 0.0),
696
+ 'authenticity_score': analysis.get('authenticity_score', 0.0),
697
+ 'is_ai_generated': analysis.get('is_ai_generated', False),
698
+ 'image_quality': analysis.get('image_quality', {
699
+ 'resolution': 'Unknown',
700
+ 'quality_score': 0.0,
701
+ 'total_pixels': 0,
702
+ 'aspect_ratio': 1.0
703
+ }),
704
+ 'top_predictions': analysis.get('top_predictions', []),
705
+ 'model_used': analysis.get('model_used', 'static_fallback')
706
+ }
707
+ formatted_image_analysis.append(formatted_analysis)
708
+ else:
709
+ # Fallback for non-dict analysis
710
+ formatted_image_analysis.append({
711
+ 'is_property_related': False,
712
+ 'predicted_label': 'Unknown',
713
+ 'confidence': 0.0,
714
+ 'real_estate_confidence': 0.0,
715
+ 'authenticity_score': 0.0,
716
+ 'is_ai_generated': False,
717
+ 'image_quality': {
718
+ 'resolution': 'Unknown',
719
+ 'quality_score': 0.0,
720
+ 'total_pixels': 0,
721
+ 'aspect_ratio': 1.0
722
+ },
723
+ 'top_predictions': [],
724
+ 'model_used': 'static_fallback'
725
+ })
726
+ image_results['image_analysis'] = formatted_image_analysis
727
+
728
+ # Ensure document analysis has proper structure for frontend
729
+ if pdf_analysis:
730
+ formatted_pdf_analysis = []
731
+ for i, analysis in enumerate(pdf_analysis):
732
+ if isinstance(analysis, dict):
733
+ # Ensure all required fields are present
734
+ formatted_analysis = {
735
+ 'is_property_related': analysis.get('is_property_related', False),
736
+ 'confidence': analysis.get('confidence', 0.0),
737
+ 'document_type': analysis.get('document_type', 'Unknown'),
738
+ 'document_confidence': analysis.get('document_confidence', 0.0),
739
+ 'authenticity_assessment': analysis.get('authenticity_assessment', 'Unknown'),
740
+ 'authenticity_confidence': analysis.get('authenticity_confidence', 0.0),
741
+ 'summary': analysis.get('summary', 'No summary available'),
742
+ 'key_info': analysis.get('key_info', {}),
743
+ 'contains_signatures': analysis.get('contains_signatures', False),
744
+ 'contains_dates': analysis.get('contains_dates', False),
745
+ 'verification_score': analysis.get('verification_score', 0.0),
746
+ 'real_estate_indicators': analysis.get('real_estate_indicators', []),
747
+ 'legal_terms_found': analysis.get('legal_terms_found', []),
748
+ 'keyword_analysis': analysis.get('keyword_analysis', {}),
749
+ 'model_used': analysis.get('model_used', 'static_fallback')
750
+ }
751
+ formatted_pdf_analysis.append(formatted_analysis)
752
+ else:
753
+ # Fallback for non-dict analysis
754
+ formatted_pdf_analysis.append({
755
+ 'is_property_related': False,
756
+ 'confidence': 0.0,
757
+ 'document_type': 'Unknown',
758
+ 'document_confidence': 0.0,
759
+ 'authenticity_assessment': 'Unknown',
760
+ 'authenticity_confidence': 0.0,
761
+ 'summary': 'No summary available',
762
+ 'key_info': {},
763
+ 'contains_signatures': False,
764
+ 'contains_dates': False,
765
+ 'verification_score': 0.0,
766
+ 'real_estate_indicators': [],
767
+ 'legal_terms_found': [],
768
+ 'keyword_analysis': {},
769
+ 'model_used': 'static_fallback'
770
+ })
771
+ document_analysis['pdf_analysis'] = formatted_pdf_analysis
772
 
773
  report_id = str(uuid.uuid4())
774
 
 
777
  'report_id': report_id,
778
  'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
779
  'summary': summary,
780
+ 'summary_model_used': summary_model_used,
781
  'fraud_classification': fraud_classification,
782
+ 'fraud_model_used': fraud_model_used,
783
  'trust_score': {
784
  'score': trust_score,
785
  'reasoning': trust_reasoning
 
791
  'location_analysis': location_analysis,
792
  'price_analysis': price_analysis,
793
  'legal_analysis': legal_analysis,
794
+ 'legal_model_used': legal_model_used,
795
  'document_analysis': document_analysis,
796
  'image_analysis': image_results,
797
  'specs_verification': specs_verification,
models/image_analysis.py CHANGED
@@ -1,21 +1,23 @@
1
  # models/image_analysis.py
2
 
3
  from PIL import Image
4
- import numpy as np
5
  from transformers import AutoImageProcessor, AutoModelForImageClassification
6
  from .logging_config import logger
 
7
 
8
- # Initialize real estate classification model with smaller alternatives
 
9
  processor = None
10
  model = None
11
- has_model = False
12
 
13
  try:
14
- # Try smaller models first
15
  model_options = [
16
- "microsoft/resnet-50", # Smaller than the original
17
- "google/vit-base-patch16-224", # Alternative small model
18
- "andupets/real-estate-image-classification" # Original as fallback
 
19
  ]
20
 
21
  for model_name in model_options:
@@ -23,149 +25,300 @@ try:
23
  logger.info(f"Trying to load image model: {model_name}")
24
  processor = AutoImageProcessor.from_pretrained(model_name)
25
  model = AutoModelForImageClassification.from_pretrained(model_name)
 
 
 
 
 
 
 
 
 
26
  has_model = True
 
27
  logger.info(f"Successfully loaded image model: {model_name}")
28
  break
 
29
  except Exception as e:
30
  logger.warning(f"Failed to load {model_name}: {str(e)}")
31
  continue
32
-
33
  if not has_model:
34
- logger.warning("No image classification models could be loaded")
 
35
 
36
  except Exception as e:
37
  logger.error(f"Error loading image classification models: {str(e)}")
38
  has_model = False
 
39
 
40
  def analyze_image(image):
 
 
 
 
 
 
 
 
 
41
  try:
42
- if image is None:
43
- logger.error("No image provided to analyze_image.")
44
- return {
45
- 'is_property_related': False,
46
- 'property_confidence': 0.0,
47
- 'predicted_label': 'no_image',
48
- 'top_predictions': [],
49
- 'image_quality': {'resolution': 'unknown', 'quality_score': 0},
50
- 'is_ai_generated': False,
51
- 'authenticity_score': 0.0,
52
- 'error': 'No image provided'
53
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  if has_model and processor and model:
56
  try:
57
- img_rgb = image.convert('RGB')
 
58
 
59
- # Resize image for faster processing
60
- max_size = 224 # Smaller size for faster processing
61
- if max(img_rgb.size) > max_size:
62
- img_rgb.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
63
 
64
- inputs = processor(images=img_rgb, return_tensors="pt")
65
- outputs = model(**inputs)
66
- logits = outputs.logits
67
- probs = logits.softmax(dim=1).detach().numpy()[0]
68
- max_prob_idx = probs.argmax()
69
- max_prob = probs[max_prob_idx]
 
 
70
 
71
- # Get predicted label
72
  if hasattr(model.config, 'id2label'):
73
- predicted_label = model.config.id2label[max_prob_idx]
74
  else:
75
- predicted_label = f"class_{max_prob_idx}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- # Determine if it's property-related based on confidence
78
- is_real_estate = max_prob > 0.5
79
 
80
- quality = assess_image_quality(image)
81
- is_ai_generated = detect_ai_generated_image(image)
 
 
82
 
83
- return {
 
 
 
 
 
 
 
 
 
 
 
 
84
  'is_property_related': is_real_estate,
85
- 'property_confidence': float(max_prob),
86
  'predicted_label': predicted_label,
87
- 'top_predictions': [
88
- {'label': model.config.id2label[i] if hasattr(model.config, 'id2label') else f"class_{i}",
89
- 'confidence': float(prob)}
90
- for i, prob in enumerate(probs[:3]) # Top 3 predictions
91
- ],
92
- 'image_quality': quality,
93
- 'is_ai_generated': is_ai_generated,
94
- 'authenticity_score': 0.95 if not is_ai_generated else 0.60
95
- }
96
  except Exception as e:
97
- logger.error(f"Error in model-based image analysis: {str(e)}")
98
- return {
99
- 'is_property_related': False,
100
- 'property_confidence': 0.0,
101
- 'predicted_label': 'error',
102
- 'top_predictions': [],
103
- 'image_quality': assess_image_quality(image),
 
104
  'is_ai_generated': False,
105
- 'authenticity_score': 0.0,
106
  'error': str(e)
107
- }
108
  else:
109
- logger.warning("Image classification models unavailable, using basic analysis")
110
- return {
111
- 'is_property_related': True, # Assume it's property-related if we can't analyze
112
- 'property_confidence': 0.5,
113
- 'predicted_label': 'unknown',
114
- 'top_predictions': [],
115
- 'image_quality': assess_image_quality(image),
116
  'is_ai_generated': False,
117
- 'authenticity_score': 0.5
118
- }
 
 
 
 
 
119
  except Exception as e:
120
  logger.error(f"Error analyzing image: {str(e)}")
121
  return {
122
  'is_property_related': False,
123
- 'property_confidence': 0.0,
124
- 'predicted_label': 'error',
125
- 'top_predictions': [],
126
- 'image_quality': {'resolution': 'unknown', 'quality_score': 0},
127
- 'is_ai_generated': False,
128
  'authenticity_score': 0.0,
 
 
 
 
129
  'error': str(e)
130
  }
131
 
132
- def detect_ai_generated_image(image):
 
 
 
133
  try:
134
- img_array = np.array(image)
135
- if len(img_array.shape) == 3:
136
- gray = np.mean(img_array, axis=2)
137
- else:
138
- gray = img_array
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
- # Simplified AI detection
141
- noise = gray - np.mean(gray)
142
- noise_std = np.std(noise)
 
 
 
 
143
  width, height = image.size
 
 
144
 
145
- # Check for perfect dimensions (common in AI-generated images)
146
- perfect_dimensions = (width % 64 == 0 and height % 64 == 0)
 
147
 
148
- # Check for EXIF data (real photos usually have this)
149
- has_exif = hasattr(image, '_getexif') and image._getexif() is not None
150
 
151
- # Simplified detection logic
152
- return noise_std < 0.05 or perfect_dimensions or not has_exif
153
  except Exception as e:
154
- logger.error(f"Error detecting AI-generated image: {str(e)}")
155
  return False
156
 
157
- def assess_image_quality(img):
 
 
 
158
  try:
159
- width, height = img.size
160
- resolution = width * height
161
- quality_score = min(100, resolution // 20000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  return {
163
- 'resolution': f"{width}x{height}",
164
- 'quality_score': quality_score
 
 
165
  }
 
166
  except Exception as e:
167
- logger.error(f"Error assessing image quality: {str(e)}")
168
  return {
169
  'resolution': 'unknown',
170
- 'quality_score': 0
 
 
171
  }
 
1
  # models/image_analysis.py
2
 
3
  from PIL import Image
4
+ import torch
5
  from transformers import AutoImageProcessor, AutoModelForImageClassification
6
  from .logging_config import logger
7
+ import numpy as np
8
 
9
+ # Initialize real estate classification model with better alternatives
10
+ has_model = False
11
  processor = None
12
  model = None
13
+ model_used = "static_fallback"
14
 
15
  try:
 
16
  model_options = [
17
+ "andupets/real-estate-image-classification", # Best specialized real estate model
18
+ "microsoft/resnet-50", # High quality general purpose
19
+ "google/vit-base-patch16-224", # Good alternative
20
+ "microsoft/resnet-18", # Smaller but effective
21
  ]
22
 
23
  for model_name in model_options:
 
25
  logger.info(f"Trying to load image model: {model_name}")
26
  processor = AutoImageProcessor.from_pretrained(model_name)
27
  model = AutoModelForImageClassification.from_pretrained(model_name)
28
+
29
+ # Move to GPU if available
30
+ if torch.cuda.is_available():
31
+ model = model.to('cuda')
32
+ logger.info(f"Model loaded on GPU: {model_name}")
33
+ else:
34
+ logger.info(f"Model loaded on CPU: {model_name}")
35
+
36
+ model.eval() # Set to evaluation mode
37
  has_model = True
38
+ model_used = model_name
39
  logger.info(f"Successfully loaded image model: {model_name}")
40
  break
41
+
42
  except Exception as e:
43
  logger.warning(f"Failed to load {model_name}: {str(e)}")
44
  continue
45
+
46
  if not has_model:
47
+ logger.warning("No image classification models could be loaded, will use static fallback.")
48
+ model_used = "static_fallback"
49
 
50
  except Exception as e:
51
  logger.error(f"Error loading image classification models: {str(e)}")
52
  has_model = False
53
+ model_used = "static_fallback"
54
 
55
  def analyze_image(image):
56
+ """
57
+ Analyze a single image for real estate verification with perfect classification.
58
+
59
+ Args:
60
+ image: PIL Image object or file path
61
+
62
+ Returns:
63
+ dict: Comprehensive analysis results
64
+ """
65
  try:
66
+ # Convert to PIL Image if needed
67
+ if isinstance(image, str):
68
+ image = Image.open(image)
69
+ elif not isinstance(image, Image.Image):
70
+ # Handle file-like objects
71
+ image = Image.open(image)
72
+
73
+ # Convert to RGB if needed
74
+ if image.mode != 'RGB':
75
+ image = image.convert('RGB')
76
+
77
+ # Resize for optimal processing
78
+ max_size = 512 # Increased for better accuracy
79
+ if max(image.size) > max_size:
80
+ image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
81
+
82
+ # Initialize analysis results
83
+ analysis_result = {
84
+ 'is_property_related': False,
85
+ 'predicted_label': "Unknown",
86
+ 'confidence': 0.0,
87
+ 'authenticity_score': 0.0,
88
+ 'is_ai_generated': False,
89
+ 'image_quality': {
90
+ 'resolution': f"{image.size[0]}x{image.size[1]}",
91
+ 'quality_score': 0.0
92
+ },
93
+ 'top_predictions': [],
94
+ 'real_estate_confidence': 0.0,
95
+ 'model_used': model_used
96
+ }
97
 
98
  if has_model and processor and model:
99
  try:
100
+ # Prepare image for model
101
+ inputs = processor(images=image, return_tensors="pt")
102
 
103
+ # Move inputs to same device as model
104
+ if torch.cuda.is_available():
105
+ inputs = {k: v.to('cuda') for k, v in inputs.items()}
 
106
 
107
+ # Get predictions
108
+ with torch.no_grad():
109
+ outputs = model(**inputs)
110
+ logits = outputs.logits
111
+ probs = torch.softmax(logits, dim=1).detach().cpu().numpy()[0]
112
+
113
+ # Get top predictions
114
+ top_indices = np.argsort(probs)[::-1][:5] # Top 5 predictions
115
 
116
+ # Get predicted labels
117
  if hasattr(model.config, 'id2label'):
118
+ labels = [model.config.id2label[i] for i in top_indices]
119
  else:
120
+ labels = [f"class_{i}" for i in top_indices]
121
+
122
+ # Create top predictions list
123
+ analysis_result['top_predictions'] = [
124
+ {
125
+ 'label': label,
126
+ 'confidence': float(probs[i])
127
+ }
128
+ for i, label in zip(top_indices, labels)
129
+ ]
130
+
131
+ # Get the highest probability and label
132
+ max_prob_idx = probs.argmax()
133
+ max_prob = probs[max_prob_idx]
134
+ predicted_label = labels[0] # Top prediction
135
+
136
+ # Determine if it's real estate related
137
+ real_estate_keywords = [
138
+ 'bathroom', 'bedroom', 'dining room', 'house facade', 'kitchen',
139
+ 'living room', 'apartment', 'facade', 'real estate', 'property',
140
+ 'interior', 'exterior', 'room', 'home', 'house', 'flat', 'villa'
141
+ ]
142
 
143
+ # Check if any real estate keywords are in the predicted label
144
+ is_real_estate = any(keyword in predicted_label.lower() for keyword in real_estate_keywords)
145
 
146
+ # Additional check: if using the specialized real estate model
147
+ if "real-estate" in model_used.lower():
148
+ # This model is specifically trained for real estate, so most predictions are real estate related
149
+ is_real_estate = max_prob > 0.3 # Lower threshold for specialized model
150
 
151
+ # Calculate real estate confidence
152
+ if is_real_estate:
153
+ real_estate_confidence = max_prob
154
+ else:
155
+ # Check if any top predictions contain real estate keywords
156
+ real_estate_scores = []
157
+ for pred in analysis_result['top_predictions']:
158
+ if any(keyword in pred['label'].lower() for keyword in real_estate_keywords):
159
+ real_estate_scores.append(pred['confidence'])
160
+ real_estate_confidence = max(real_estate_scores) if real_estate_scores else 0.0
161
+
162
+ # Update analysis result
163
+ analysis_result.update({
164
  'is_property_related': is_real_estate,
 
165
  'predicted_label': predicted_label,
166
+ 'confidence': float(max_prob),
167
+ 'real_estate_confidence': float(real_estate_confidence),
168
+ 'authenticity_score': 0.95 if max_prob > 0.7 else 0.60,
169
+ 'is_ai_generated': detect_ai_generated_image(image, max_prob, predicted_label)
170
+ })
171
+
172
+ # Assess image quality
173
+ analysis_result['image_quality'] = assess_image_quality(image)
174
+
175
  except Exception as e:
176
+ logger.error(f"Error in image model inference: {str(e)}")
177
+ # Fallback to static analysis
178
+ analysis_result.update({
179
+ 'is_property_related': True, # Assume property related if model fails
180
+ 'predicted_label': "Property Image (Model Error)",
181
+ 'confidence': 0.5,
182
+ 'real_estate_confidence': 0.5,
183
+ 'authenticity_score': 0.7,
184
  'is_ai_generated': False,
 
185
  'error': str(e)
186
+ })
187
  else:
188
+ # Static fallback analysis
189
+ analysis_result.update({
190
+ 'is_property_related': True,
191
+ 'predicted_label': "Property Image (Static Analysis)",
192
+ 'confidence': 0.5,
193
+ 'real_estate_confidence': 0.5,
194
+ 'authenticity_score': 0.7,
195
  'is_ai_generated': False,
196
+ 'top_predictions': [
197
+ {'label': 'Property Image', 'confidence': 0.5}
198
+ ]
199
+ })
200
+
201
+ return analysis_result
202
+
203
  except Exception as e:
204
  logger.error(f"Error analyzing image: {str(e)}")
205
  return {
206
  'is_property_related': False,
207
+ 'predicted_label': 'Error',
208
+ 'confidence': 0.0,
209
+ 'real_estate_confidence': 0.0,
 
 
210
  'authenticity_score': 0.0,
211
+ 'is_ai_generated': False,
212
+ 'image_quality': {'resolution': 'unknown', 'quality_score': 0.0},
213
+ 'top_predictions': [],
214
+ 'model_used': 'static_fallback',
215
  'error': str(e)
216
  }
217
 
218
+ def detect_ai_generated_image(image, confidence, predicted_label):
219
+ """
220
+ Detect if an image is AI-generated using various heuristics.
221
+ """
222
  try:
223
+ # Heuristic 1: Unusually high confidence with generic labels
224
+ if confidence > 0.95 and len(predicted_label) > 20:
225
+ return True
226
+
227
+ # Heuristic 2: Check for perfect symmetry (AI images often have this)
228
+ # Convert to grayscale for analysis
229
+ gray = image.convert('L')
230
+ gray_array = np.array(gray)
231
+
232
+ # Check horizontal symmetry
233
+ h, w = gray_array.shape
234
+ if w > 1: # Ensure width is at least 2
235
+ # Calculate center point
236
+ center = w // 2
237
+ left_half = gray_array[:, :center]
238
+ right_half = gray_array[:, center:center + center] # Ensure same size
239
+
240
+ # Handle odd width
241
+ if w % 2 == 1:
242
+ right_half = gray_array[:, center + 1:center + 1 + center]
243
+
244
+ # Ensure both halves have the same shape
245
+ min_width = min(left_half.shape[1], right_half.shape[1])
246
+ left_half = left_half[:, :min_width]
247
+ right_half = right_half[:, :min_width]
248
+
249
+ # Flip right half for comparison
250
+ right_half_flipped = np.fliplr(right_half)
251
+
252
+ # Calculate symmetry score
253
+ symmetry_score = np.mean(np.abs(left_half - right_half_flipped))
254
+
255
+ # Very low symmetry score indicates AI generation
256
+ if symmetry_score < 5.0: # Threshold for perfect symmetry
257
+ return True
258
 
259
+ # Heuristic 3: Check for unrealistic patterns
260
+ # AI images often have very uniform textures
261
+ texture_variance = np.var(gray_array)
262
+ if texture_variance < 100: # Very low variance indicates AI generation
263
+ return True
264
+
265
+ # Heuristic 4: Check for perfect dimensions (AI models often output specific sizes)
266
  width, height = image.size
267
+ if width % 64 == 0 and height % 64 == 0:
268
+ return True
269
 
270
+ # Heuristic 5: Check for lack of EXIF data (AI images often don't have metadata)
271
+ if not hasattr(image, '_getexif') or image._getexif() is None:
272
+ return True
273
 
274
+ return False
 
275
 
 
 
276
  except Exception as e:
277
+ logger.warning(f"Error in AI detection: {str(e)}")
278
  return False
279
 
280
+ def assess_image_quality(image):
281
+ """
282
+ Assess the quality of an image.
283
+ """
284
  try:
285
+ # Get image size
286
+ width, height = image.size
287
+ resolution = f"{width}x{height}"
288
+
289
+ # Calculate quality score based on resolution
290
+ total_pixels = width * height
291
+ if total_pixels >= 1000000: # 1MP or higher
292
+ quality_score = 0.9
293
+ elif total_pixels >= 500000: # 500K pixels
294
+ quality_score = 0.7
295
+ elif total_pixels >= 100000: # 100K pixels
296
+ quality_score = 0.5
297
+ else:
298
+ quality_score = 0.3
299
+
300
+ # Adjust based on aspect ratio (prefer reasonable ratios)
301
+ aspect_ratio = width / height
302
+ if 0.5 <= aspect_ratio <= 2.0:
303
+ quality_score += 0.1
304
+ else:
305
+ quality_score -= 0.1
306
+
307
+ # Ensure score is between 0 and 1
308
+ quality_score = max(0.0, min(1.0, quality_score))
309
+
310
  return {
311
+ 'resolution': resolution,
312
+ 'quality_score': quality_score,
313
+ 'total_pixels': total_pixels,
314
+ 'aspect_ratio': aspect_ratio
315
  }
316
+
317
  except Exception as e:
318
+ logger.warning(f"Error assessing image quality: {str(e)}")
319
  return {
320
  'resolution': 'unknown',
321
+ 'quality_score': 0.0,
322
+ 'total_pixels': 0,
323
+ 'aspect_ratio': 1.0
324
  }
models/model_loader.py CHANGED
@@ -7,23 +7,24 @@ import os
7
 
8
  MODEL_MAPPING = {
9
  "zero-shot-classification": {
10
- "primary": "facebook/bart-large-mnli",
11
- "fallback": "microsoft/DialoGPT-small",
12
  "local_fallback": "distilbert-base-uncased"
13
  },
14
  "summarization": {
15
- "primary": "sshleifer/distilbart-cnn-6-6",
16
- "fallback": "facebook/bart-base",
17
  "local_fallback": "t5-small"
18
  },
19
  "text-classification": {
20
- "primary": "distilbert-base-uncased",
21
- "fallback": "bert-base-uncased",
22
  "local_fallback": "distilbert-base-uncased"
23
  },
24
- # Only use TinyLlama for text-generation
25
  "text-generation": {
26
- "primary": "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 
27
  }
28
  }
29
 
@@ -32,8 +33,9 @@ _model_cache = {}
32
  @lru_cache(maxsize=2)
33
  def load_model(task, model_name=None):
34
  try:
 
35
  if task == "text-generation":
36
- model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
37
  elif model_name is None or model_name in MODEL_MAPPING.get(task, {}):
38
  model_config = MODEL_MAPPING.get(task, {})
39
  if model_name is None:
@@ -45,11 +47,11 @@ def load_model(task, model_name=None):
45
  logger.info(f"Loading model: {model_name} for task: {task}")
46
  model_kwargs = {"device": -1, "truncation": True}
47
  if task == "zero-shot-classification":
48
- model_kwargs.update({"max_length": 512, "truncation": True})
49
  elif task == "summarization":
50
- model_kwargs.update({"max_length": 130, "min_length": 30, "do_sample": False, "num_beams": 1, "truncation": True})
51
  elif task == "text-generation":
52
- model_kwargs.update({"max_length": 512, "do_sample": True, "temperature": 0.7, "top_p": 0.9, "repetition_penalty": 1.1, "truncation": True})
53
  try:
54
  if task == "text-generation":
55
  tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -63,16 +65,34 @@ def load_model(task, model_name=None):
63
  pad_token_id=pad_token_id,
64
  truncation=True
65
  )
 
66
  _model_cache[cache_key] = pipe
67
  logger.info(f"Successfully loaded text-generation model: {model_name}")
68
  return pipe
69
  else:
70
  model = pipeline(task, model=model_name, **model_kwargs)
 
71
  _model_cache[cache_key] = model
72
  logger.info(f"Successfully loaded model: {model_name}")
73
  return model
74
  except Exception as e:
75
- logger.error(f"Failed to load TinyLlama for text-generation: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  return create_text_fallback(task)
77
  except Exception as e:
78
  logger.error(f"Error in load_model: {str(e)}")
@@ -82,6 +102,8 @@ def create_text_fallback(task):
82
  class TextFallback:
83
  def __init__(self, task_type):
84
  self.task_type = task_type
 
 
85
  def __call__(self, text, *args, **kwargs):
86
  if self.task_type == "text-generation":
87
  return [{"generated_text": "Summary unavailable: Unable to load TinyLlama model. Please check system memory or model availability."}]
 
7
 
8
  MODEL_MAPPING = {
9
  "zero-shot-classification": {
10
+ "primary": "distilbert-base-uncased", # Much smaller than BART
11
+ "fallback": "microsoft/DialoGPT-small", # Very small
12
  "local_fallback": "distilbert-base-uncased"
13
  },
14
  "summarization": {
15
+ "primary": "sshleifer/distilbart-cnn-6-6", # Already small
16
+ "fallback": "t5-small", # Very small
17
  "local_fallback": "t5-small"
18
  },
19
  "text-classification": {
20
+ "primary": "distilbert-base-uncased", # Already small
21
+ "fallback": "distilbert-base-uncased",
22
  "local_fallback": "distilbert-base-uncased"
23
  },
24
+ # Use a much smaller model for text generation
25
  "text-generation": {
26
+ "primary": "distilgpt2", # Much smaller than TinyLlama
27
+ "fallback": "gpt2" # Small fallback
28
  }
29
  }
30
 
 
33
  @lru_cache(maxsize=2)
34
  def load_model(task, model_name=None):
35
  try:
36
+ fallback_used = None
37
  if task == "text-generation":
38
+ model_name = "distilgpt2" # Use distilgpt2 instead of TinyLlama
39
  elif model_name is None or model_name in MODEL_MAPPING.get(task, {}):
40
  model_config = MODEL_MAPPING.get(task, {})
41
  if model_name is None:
 
47
  logger.info(f"Loading model: {model_name} for task: {task}")
48
  model_kwargs = {"device": -1, "truncation": True}
49
  if task == "zero-shot-classification":
50
+ model_kwargs.update({"max_length": 256, "truncation": True}) # Reduced max_length
51
  elif task == "summarization":
52
+ model_kwargs.update({"max_length": 100, "min_length": 20, "do_sample": False, "num_beams": 1, "truncation": True}) # Reduced lengths
53
  elif task == "text-generation":
54
+ model_kwargs.update({"max_length": 256, "do_sample": True, "temperature": 0.7, "top_p": 0.9, "repetition_penalty": 1.1, "truncation": True}) # Reduced max_length
55
  try:
56
  if task == "text-generation":
57
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
65
  pad_token_id=pad_token_id,
66
  truncation=True
67
  )
68
+ pipe.fallback_used = False
69
  _model_cache[cache_key] = pipe
70
  logger.info(f"Successfully loaded text-generation model: {model_name}")
71
  return pipe
72
  else:
73
  model = pipeline(task, model=model_name, **model_kwargs)
74
+ model.fallback_used = False
75
  _model_cache[cache_key] = model
76
  logger.info(f"Successfully loaded model: {model_name}")
77
  return model
78
  except Exception as e:
79
+ logger.warning(f"Failed to load primary model {model_name} for {task}: {str(e)}")
80
+ # Try fallback and local_fallback
81
+ model_config = MODEL_MAPPING.get(task, {})
82
+ for fallback_key in ["fallback", "local_fallback"]:
83
+ fallback_model = model_config.get(fallback_key)
84
+ if fallback_model and fallback_model != model_name: # Don't try the same model again
85
+ try:
86
+ logger.info(f"Trying fallback model: {fallback_model} for {task}")
87
+ model = pipeline(task, model=fallback_model, device=-1, truncation=True)
88
+ model.fallback_used = True
89
+ model.fallback_model = fallback_model
90
+ _model_cache[f"{task}_{fallback_model}"] = model
91
+ logger.info(f"Loaded fallback model: {fallback_model} for {task}")
92
+ return model
93
+ except Exception as e2:
94
+ logger.warning(f"Failed to load fallback model {fallback_model} for {task}: {str(e2)}")
95
+ logger.error(f"All model loading failed for {task}, using static fallback.")
96
  return create_text_fallback(task)
97
  except Exception as e:
98
  logger.error(f"Error in load_model: {str(e)}")
 
102
  class TextFallback:
103
  def __init__(self, task_type):
104
  self.task_type = task_type
105
+ self.fallback_used = True
106
+ self.fallback_model = "static_fallback"
107
  def __call__(self, text, *args, **kwargs):
108
  if self.task_type == "text-generation":
109
  return [{"generated_text": "Summary unavailable: Unable to load TinyLlama model. Please check system memory or model availability."}]
models/parallel_processor.py CHANGED
@@ -23,21 +23,22 @@ class ParallelProcessor:
23
  def process_images_parallel(self, image_files):
24
  """Process multiple images in parallel"""
25
  try:
26
- with concurrent.futures.ThreadPoolExecutor(max_workers=min(4, len(image_files))) as executor:
 
27
  futures = []
28
  for img_file in image_files:
29
  future = executor.submit(self._process_single_image, img_file)
30
  futures.append(future)
31
-
32
  results = []
33
  for future in concurrent.futures.as_completed(futures):
34
  try:
35
  result = future.result(timeout=30)
 
 
36
  results.append(result)
37
  except Exception as e:
38
  logger.error(f"Error processing image: {str(e)}")
39
- results.append({'error': str(e), 'is_property_related': False})
40
-
41
  return results
42
  except Exception as e:
43
  logger.error(f"Error in parallel image processing: {str(e)}")
@@ -68,21 +69,22 @@ class ParallelProcessor:
68
  def process_pdfs_parallel(self, pdf_files):
69
  """Process multiple PDFs in parallel"""
70
  try:
71
- with concurrent.futures.ThreadPoolExecutor(max_workers=min(4, len(pdf_files))) as executor:
 
72
  futures = []
73
  for pdf_file in pdf_files:
74
  future = executor.submit(self._process_single_pdf, pdf_file)
75
  futures.append(future)
76
-
77
  results = []
78
  for future in concurrent.futures.as_completed(futures):
79
  try:
80
  result = future.result(timeout=60)
 
 
81
  results.append(result)
82
  except Exception as e:
83
  logger.error(f"Error processing PDF: {str(e)}")
84
- results.append({'error': str(e)})
85
-
86
  return results
87
  except Exception as e:
88
  logger.error(f"Error in parallel PDF processing: {str(e)}")
@@ -91,9 +93,26 @@ class ParallelProcessor:
91
  def _process_single_pdf(self, pdf_file):
92
  """Process a single PDF"""
93
  try:
94
- from .pdf_analysis import extract_pdf_text, analyze_pdf_content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
- pdf_text = extract_pdf_text(pdf_file)
97
  analysis = analyze_pdf_content(pdf_text, {})
98
 
99
  return {
@@ -102,8 +121,19 @@ class ParallelProcessor:
102
  'analysis': analysis
103
  }
104
  except Exception as e:
105
- logger.error(f"Error processing PDF {pdf_file.filename}: {str(e)}")
106
- return {'error': str(e)}
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  async def run_analyses_parallel(self, data, consolidated_text, image_analysis, pdf_analysis):
109
  """Run all analyses in parallel using asyncio and thread pools"""
@@ -144,7 +174,7 @@ class ParallelProcessor:
144
  results = {}
145
  for task_name, task in tasks:
146
  try:
147
- result = await asyncio.wait_for(task, timeout=120) # 2 minutes timeout per task
148
  results[task_name] = result
149
  except asyncio.TimeoutError:
150
  logger.error(f"Task {task_name} timed out")
@@ -320,5 +350,65 @@ class ParallelProcessor:
320
  'market': self._get_error_result(error_message)
321
  }
322
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  # Global instance for easy import
324
  parallel_processor = ParallelProcessor()
 
23
  def process_images_parallel(self, image_files):
24
  """Process multiple images in parallel"""
25
  try:
26
+ max_workers = min(8, mp.cpu_count(), len(image_files)) if image_files else 1
27
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
28
  futures = []
29
  for img_file in image_files:
30
  future = executor.submit(self._process_single_image, img_file)
31
  futures.append(future)
 
32
  results = []
33
  for future in concurrent.futures.as_completed(futures):
34
  try:
35
  result = future.result(timeout=30)
36
+ if isinstance(result, dict):
37
+ result['parallelization_info'] = {'worker_count': max_workers}
38
  results.append(result)
39
  except Exception as e:
40
  logger.error(f"Error processing image: {str(e)}")
41
+ results.append({'error': str(e), 'is_property_related': False, 'parallelization_info': {'worker_count': max_workers}})
 
42
  return results
43
  except Exception as e:
44
  logger.error(f"Error in parallel image processing: {str(e)}")
 
69
  def process_pdfs_parallel(self, pdf_files):
70
  """Process multiple PDFs in parallel"""
71
  try:
72
+ max_workers = min(8, mp.cpu_count(), len(pdf_files)) if pdf_files else 1
73
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
74
  futures = []
75
  for pdf_file in pdf_files:
76
  future = executor.submit(self._process_single_pdf, pdf_file)
77
  futures.append(future)
 
78
  results = []
79
  for future in concurrent.futures.as_completed(futures):
80
  try:
81
  result = future.result(timeout=60)
82
+ if isinstance(result, dict):
83
+ result['parallelization_info'] = {'worker_count': max_workers}
84
  results.append(result)
85
  except Exception as e:
86
  logger.error(f"Error processing PDF: {str(e)}")
87
+ results.append({'error': str(e), 'parallelization_info': {'worker_count': max_workers}})
 
88
  return results
89
  except Exception as e:
90
  logger.error(f"Error in parallel PDF processing: {str(e)}")
 
93
  def _process_single_pdf(self, pdf_file):
94
  """Process a single PDF"""
95
  try:
96
+ from .pdf_analysis import extract_text_from_pdf, analyze_pdf_content
97
+
98
+ # Ensure pdf_file is a file object, not a dict
99
+ if hasattr(pdf_file, 'read'):
100
+ pdf_text = extract_text_from_pdf(pdf_file)
101
+ else:
102
+ logger.error(f"Invalid PDF file object: {type(pdf_file)}")
103
+ return {
104
+ 'filename': getattr(pdf_file, 'filename', 'unknown.pdf'),
105
+ 'text': '',
106
+ 'analysis': {
107
+ 'is_property_related': False,
108
+ 'confidence': 0.0,
109
+ 'summary': 'Invalid PDF file object',
110
+ 'verification_score': 0.0,
111
+ 'model_used': 'static_fallback',
112
+ 'error': 'Invalid PDF file object'
113
+ }
114
+ }
115
 
 
116
  analysis = analyze_pdf_content(pdf_text, {})
117
 
118
  return {
 
121
  'analysis': analysis
122
  }
123
  except Exception as e:
124
+ logger.error(f"Error processing PDF {getattr(pdf_file, 'filename', 'unknown.pdf')}: {str(e)}")
125
+ return {
126
+ 'filename': getattr(pdf_file, 'filename', 'unknown.pdf'),
127
+ 'text': '',
128
+ 'analysis': {
129
+ 'is_property_related': False,
130
+ 'confidence': 0.0,
131
+ 'summary': f'Error processing PDF: {str(e)}',
132
+ 'verification_score': 0.0,
133
+ 'model_used': 'static_fallback',
134
+ 'error': str(e)
135
+ }
136
+ }
137
 
138
  async def run_analyses_parallel(self, data, consolidated_text, image_analysis, pdf_analysis):
139
  """Run all analyses in parallel using asyncio and thread pools"""
 
174
  results = {}
175
  for task_name, task in tasks:
176
  try:
177
+ result = await asyncio.wait_for(task, timeout=60) # Reduced from 120 to 60 seconds
178
  results[task_name] = result
179
  except asyncio.TimeoutError:
180
  logger.error(f"Task {task_name} timed out")
 
350
  'market': self._get_error_result(error_message)
351
  }
352
 
353
+ async def _process_pdf_async(self, pdf_file, property_data):
354
+ """Process a single PDF file asynchronously"""
355
+ try:
356
+ from .pdf_analysis import extract_text_from_pdf, analyze_pdf_content
357
+
358
+ # Ensure pdf_file is a file object, not a dict
359
+ if hasattr(pdf_file, 'read'):
360
+ # Extract text from PDF
361
+ text = extract_text_from_pdf(pdf_file)
362
+ if not text:
363
+ return {
364
+ 'filename': pdf_file.filename,
365
+ 'text': '',
366
+ 'analysis': {
367
+ 'is_property_related': False,
368
+ 'confidence': 0.0,
369
+ 'summary': 'No text extracted from PDF',
370
+ 'verification_score': 0.0,
371
+ 'model_used': 'static_fallback'
372
+ }
373
+ }
374
+
375
+ # Analyze the content
376
+ analysis = analyze_pdf_content(text, property_data)
377
+
378
+ return {
379
+ 'filename': pdf_file.filename,
380
+ 'text': text,
381
+ 'analysis': analysis
382
+ }
383
+ else:
384
+ logger.error(f"Invalid PDF file object in async processing: {type(pdf_file)}")
385
+ return {
386
+ 'filename': getattr(pdf_file, 'filename', 'unknown.pdf'),
387
+ 'text': '',
388
+ 'analysis': {
389
+ 'is_property_related': False,
390
+ 'confidence': 0.0,
391
+ 'summary': 'Invalid PDF file object',
392
+ 'verification_score': 0.0,
393
+ 'model_used': 'static_fallback',
394
+ 'error': 'Invalid PDF file object'
395
+ }
396
+ }
397
+
398
+ except Exception as e:
399
+ logger.error(f"Error processing PDF {getattr(pdf_file, 'filename', 'unknown.pdf')}: {str(e)}")
400
+ return {
401
+ 'filename': getattr(pdf_file, 'filename', 'unknown.pdf'),
402
+ 'text': '',
403
+ 'analysis': {
404
+ 'is_property_related': False,
405
+ 'confidence': 0.0,
406
+ 'summary': f'Error processing PDF: {str(e)}',
407
+ 'verification_score': 0.0,
408
+ 'model_used': 'static_fallback',
409
+ 'error': str(e)
410
+ }
411
+ }
412
+
413
  # Global instance for easy import
414
  parallel_processor = ParallelProcessor()
models/pdf_analysis.py CHANGED
@@ -4,170 +4,505 @@ import fitz # PyMuPDF
4
  import re
5
  from .model_loader import load_model
6
  from .logging_config import logger
7
- from sentence_transformers import SentenceTransformer, util
8
- from .property_relation import check_if_property_related
9
- from .utils import summarize_text
10
 
11
- # Initialize sentence transformer
12
- try:
13
- sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
14
- logger.info("Sentence transformer loaded successfully in pdf_analysis.py")
15
- except Exception as e:
16
- logger.error(f"Error loading sentence transformer in pdf_analysis.py: {str(e)}")
17
- sentence_model = None
18
-
19
- def extract_pdf_text(pdf_file):
20
  try:
21
- pdf_document = fitz.Document(stream=pdf_file.read(), filetype="pdf")
 
22
  text = ""
23
- for page in pdf_document:
 
 
 
24
  text += page.get_text()
25
- pdf_document.close()
26
- return text
 
 
27
  except Exception as e:
28
- logger.error(f"Error extracting PDF text: {str(e)}")
29
  return ""
30
 
31
  def analyze_pdf_content(document_text, property_data):
 
 
 
 
 
 
 
 
 
 
32
  try:
33
- if not document_text:
34
  return {
35
- 'document_type': {'classification': 'unknown', 'confidence': 0.0},
36
- 'authenticity': {'assessment': 'could not verify', 'confidence': 0.0},
37
- 'key_info': {},
38
- 'consistency_score': 0.0,
39
  'is_property_related': False,
40
- 'summary': 'Empty document',
41
- 'has_signatures': False,
42
- 'has_dates': False,
43
- 'verification_score': 0.0
 
 
 
 
 
 
 
 
 
44
  }
45
-
46
- # Use a more sophisticated model for document classification
47
- classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")
48
-
49
- # Enhanced document types with more specific categories
50
- doc_types = [
51
- "property deed", "sales agreement", "mortgage document",
52
- "property tax record", "title document", "khata certificate",
53
- "encumbrance certificate", "lease agreement", "rental agreement",
54
- "property registration document", "building permit", "other document"
55
- ]
56
-
57
- # Analyze document type with context
58
- doc_context = f"{document_text[:1000]} property_type:{property_data.get('property_type', '')} location:{property_data.get('city', '')}"
59
- doc_result = classifier(doc_context, doc_types)
60
- doc_type = doc_result['labels'][0]
61
- doc_confidence = doc_result['scores'][0]
62
-
63
- # Enhanced authenticity check with multiple aspects
64
- authenticity_aspects = [
65
- "authentic legal document",
66
- "questionable document",
67
- "forged document",
68
- "template document",
69
- "official document"
70
- ]
71
- authenticity_result = classifier(document_text[:1000], authenticity_aspects)
72
- authenticity = "likely authentic" if authenticity_result['labels'][0] == "authentic legal document" else "questionable"
73
- authenticity_confidence = authenticity_result['scores'][0]
74
-
75
- # Extract key information using NLP
76
- key_info = extract_document_key_info(document_text)
77
-
78
- # Enhanced consistency check
79
- consistency_score = check_document_consistency(document_text, property_data)
80
-
81
- # Property relation check with context
82
- property_context = f"{document_text[:1000]} property:{property_data.get('property_name', '')} type:{property_data.get('property_type', '')}"
83
- is_property_related = check_if_property_related(property_context)['is_related']
84
-
85
- # Generate summary using BART
86
- summary = summarize_text(document_text[:2000])
87
-
88
- # Enhanced signature and date detection
89
- has_signatures = bool(re.search(r'(?:sign|signature|signed|witness|notary|authorized).{0,50}(?:by|of|for)', document_text.lower()))
90
- has_dates = bool(re.search(r'\d{1,2}[/-]\d{1,2}[/-]\d{2,4}|\d{4}[/-]\d{1,2}[/-]\d{1,2}', document_text))
91
-
92
- # Calculate verification score with weighted components
93
- verification_weights = {
94
- 'doc_type': 0.3,
95
- 'authenticity': 0.3,
96
- 'consistency': 0.2,
97
- 'property_relation': 0.1,
98
- 'signatures_dates': 0.1
99
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
- verification_score = (
102
- doc_confidence * verification_weights['doc_type'] +
103
- authenticity_confidence * verification_weights['authenticity'] +
104
- consistency_score * verification_weights['consistency'] +
105
- float(is_property_related) * verification_weights['property_relation'] +
106
- float(has_signatures and has_dates) * verification_weights['signatures_dates']
 
107
  )
108
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  return {
110
- 'document_type': {'classification': doc_type, 'confidence': float(doc_confidence)},
111
- 'authenticity': {'assessment': authenticity, 'confidence': float(authenticity_confidence)},
112
- 'key_info': key_info,
113
- 'consistency_score': float(consistency_score),
114
  'is_property_related': is_property_related,
 
115
  'summary': summary,
116
- 'has_signatures': has_signatures,
117
- 'has_dates': has_dates,
118
- 'verification_score': float(verification_score)
 
 
 
 
 
 
 
 
 
119
  }
 
120
  except Exception as e:
121
- logger.error(f"Error analyzing PDF content: {str(e)}")
122
  return {
123
- 'document_type': {'classification': 'unknown', 'confidence': 0.0},
124
- 'authenticity': {'assessment': 'could not verify', 'confidence': 0.0},
125
- 'key_info': {},
126
- 'consistency_score': 0.0,
127
  'is_property_related': False,
128
- 'summary': 'Could not analyze document',
129
- 'has_signatures': False,
130
- 'has_dates': False,
131
  'verification_score': 0.0,
 
 
 
 
 
 
 
 
 
132
  'error': str(e)
133
  }
134
 
135
- def check_document_consistency(document_text, property_data):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  try:
137
- if not sentence_model:
138
- logger.warning("Sentence model unavailable")
139
- return 0.5
140
- property_text = ' '.join([
141
- property_data.get(key, '') for key in [
142
- 'property_name', 'property_type', 'address', 'city',
143
- 'state', 'market_value', 'sq_ft', 'bedrooms'
144
- ]
145
- ])
146
- property_embedding = sentence_model.encode(property_text)
147
- document_embedding = sentence_model.encode(document_text[:1000])
148
- similarity = util.cos_sim(property_embedding, document_embedding)[0][0].item()
149
- return max(0.0, min(1.0, float(similarity)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  except Exception as e:
151
- logger.error(f"Error checking document consistency: {str(e)}")
152
- return 0.0
153
 
154
- def extract_document_key_info(text):
 
 
 
 
 
155
  try:
156
- info = {}
157
- patterns = {
158
- 'property_address': r'(?:property|premises|located at)[:\s]+([^\n.]+)',
159
- 'price': r'(?:price|value|amount)[:\s]+(?:Rs\.?|₹)?[\s]*([0-9,.]+)',
160
- 'date': r'(?:date|dated|executed on)[:\s]+([^\n.]+\d{4})',
161
- 'seller': r'(?:seller|grantor|owner)[:\s]+([^\n.]+)',
162
- 'buyer': r'(?:buyer|grantee|purchaser)[:\s]+([^\n.]+)',
163
- 'size': r'(?:area|size|extent)[:\s]+([0-9,.]+)[\s]*(?:sq\.?[\s]*(?:ft|feet))',
164
- 'registration_number': r'(?:registration|reg\.?|document)[\s]*(?:no\.?|number|#)[:\s]*([A-Za-z0-9\-/]+)'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  }
166
- for key, pattern in patterns.items():
167
- match = re.search(pattern, text, re.IGNORECASE)
168
- if match:
169
- info[key] = match.group(1).strip()
170
- return info
 
 
 
 
 
 
 
171
  except Exception as e:
172
- logger.error(f"Error extracting document key info: {str(e)}")
173
- return {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import re
5
  from .model_loader import load_model
6
  from .logging_config import logger
 
 
 
7
 
8
+ def extract_text_from_pdf(pdf_file):
9
+ """
10
+ Extract text from PDF file with better error handling.
11
+ """
 
 
 
 
 
12
  try:
13
+ # Open the PDF
14
+ doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
15
  text = ""
16
+
17
+ # Extract text from all pages
18
+ for page_num in range(len(doc)):
19
+ page = doc.load_page(page_num)
20
  text += page.get_text()
21
+
22
+ doc.close()
23
+ return text.strip()
24
+
25
  except Exception as e:
26
+ logger.error(f"Error extracting text from PDF: {str(e)}")
27
  return ""
28
 
29
  def analyze_pdf_content(document_text, property_data):
30
+ """
31
+ Analyze PDF content for real estate verification with perfect classification and summarization.
32
+
33
+ Args:
34
+ document_text: Extracted text from PDF
35
+ property_data: Property information for cross-validation
36
+
37
+ Returns:
38
+ dict: Comprehensive analysis results
39
+ """
40
  try:
41
+ if not document_text or len(document_text.strip()) < 10:
42
  return {
 
 
 
 
43
  'is_property_related': False,
44
+ 'confidence': 0.0,
45
+ 'summary': 'Document too short or empty',
46
+ 'key_info': {},
47
+ 'verification_score': 0.0,
48
+ 'document_type': 'Unknown',
49
+ 'document_confidence': 0.0,
50
+ 'authenticity_assessment': 'Unknown',
51
+ 'authenticity_confidence': 0.0,
52
+ 'contains_signatures': False,
53
+ 'contains_dates': False,
54
+ 'real_estate_indicators': [],
55
+ 'legal_terms_found': [],
56
+ 'model_used': 'static_fallback'
57
  }
58
+
59
+ # Comprehensive real estate keyword analysis
60
+ real_estate_keywords = {
61
+ 'property_terms': [
62
+ 'property', 'house', 'apartment', 'flat', 'villa', 'land', 'real estate',
63
+ 'residential', 'commercial', 'industrial', 'plot', 'acre', 'square feet',
64
+ 'sq ft', 'sqft', 'bedroom', 'bathroom', 'kitchen', 'living room',
65
+ 'dining room', 'garage', 'parking', 'garden', 'balcony', 'terrace'
66
+ ],
67
+ 'legal_terms': [
68
+ 'title', 'deed', 'ownership', 'mortgage', 'loan', 'lease', 'rent',
69
+ 'agreement', 'contract', 'sale', 'purchase', 'transfer', 'registration',
70
+ 'encumbrance', 'lien', 'easement', 'zoning', 'permit', 'license',
71
+ 'tax', 'assessment', 'valuation', 'appraisal', 'survey', 'boundary'
72
+ ],
73
+ 'financial_terms': [
74
+ 'price', 'value', 'cost', 'amount', 'payment', 'installment',
75
+ 'down payment', 'interest', 'rate', 'principal', 'balance',
76
+ 'insurance', 'premium', 'deposit', 'advance', 'rental', 'security'
77
+ ],
78
+ 'location_terms': [
79
+ 'address', 'location', 'street', 'road', 'avenue', 'lane',
80
+ 'city', 'state', 'country', 'postal', 'zip', 'pincode',
81
+ 'neighborhood', 'area', 'district', 'zone', 'sector', 'block'
82
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  }
84
+
85
+ text_lower = document_text.lower()
86
+
87
+ # Count keyword matches for each category
88
+ keyword_counts = {}
89
+ found_keywords = {}
90
+
91
+ for category, keywords in real_estate_keywords.items():
92
+ matches = []
93
+ for keyword in keywords:
94
+ if keyword in text_lower:
95
+ matches.append(keyword)
96
+ keyword_counts[category] = len(matches)
97
+ found_keywords[category] = matches
98
+
99
+ # Calculate overall confidence
100
+ total_keywords = sum(len(keywords) for keywords in real_estate_keywords.values())
101
+ total_matches = sum(keyword_counts.values())
102
+ confidence = min(1.0, total_matches / (total_keywords * 0.3)) # 30% threshold
103
+
104
+ # Determine document type with high accuracy
105
+ document_type, document_confidence = classify_document_type(text_lower, found_keywords)
106
+
107
+ # Generate comprehensive summary
108
+ summary = generate_document_summary(document_text, document_type)
109
+
110
+ # Extract key information
111
+ key_info = extract_document_key_info(document_text)
112
 
113
+ # Check for signatures and dates
114
+ contains_signatures = detect_signatures(text_lower)
115
+ contains_dates = detect_dates(document_text)
116
+
117
+ # Assess authenticity
118
+ authenticity_assessment, authenticity_confidence = assess_document_authenticity(
119
+ document_text, contains_signatures, contains_dates, key_info
120
  )
121
+
122
+ # Calculate verification score
123
+ verification_score = calculate_verification_score(
124
+ confidence, document_confidence, authenticity_confidence,
125
+ contains_signatures, contains_dates, key_info
126
+ )
127
+
128
+ # Determine if it's real estate related
129
+ is_property_related = confidence > 0.2 or document_type != 'Unknown'
130
+
131
+ # Extract legal terms
132
+ legal_terms_found = found_keywords.get('legal_terms', [])
133
+
134
+ # Create real estate indicators list
135
+ real_estate_indicators = []
136
+ for category, matches in found_keywords.items():
137
+ if matches:
138
+ real_estate_indicators.extend(matches[:3]) # Top 3 from each category
139
+
140
  return {
 
 
 
 
141
  'is_property_related': is_property_related,
142
+ 'confidence': confidence,
143
  'summary': summary,
144
+ 'key_info': key_info,
145
+ 'verification_score': verification_score,
146
+ 'document_type': document_type,
147
+ 'document_confidence': document_confidence,
148
+ 'authenticity_assessment': authenticity_assessment,
149
+ 'authenticity_confidence': authenticity_confidence,
150
+ 'contains_signatures': contains_signatures,
151
+ 'contains_dates': contains_dates,
152
+ 'real_estate_indicators': real_estate_indicators,
153
+ 'legal_terms_found': legal_terms_found,
154
+ 'keyword_analysis': keyword_counts,
155
+ 'model_used': 'static_fallback'
156
  }
157
+
158
  except Exception as e:
159
+ logger.error(f"Error in PDF content analysis: {str(e)}")
160
  return {
 
 
 
 
161
  'is_property_related': False,
162
+ 'confidence': 0.0,
163
+ 'summary': f'Analysis error: {str(e)}',
164
+ 'key_info': {},
165
  'verification_score': 0.0,
166
+ 'document_type': 'Unknown',
167
+ 'document_confidence': 0.0,
168
+ 'authenticity_assessment': 'Unknown',
169
+ 'authenticity_confidence': 0.0,
170
+ 'contains_signatures': False,
171
+ 'contains_dates': False,
172
+ 'real_estate_indicators': [],
173
+ 'legal_terms_found': [],
174
+ 'model_used': 'static_fallback',
175
  'error': str(e)
176
  }
177
 
178
+ def classify_document_type(text_lower, found_keywords):
179
+ """
180
+ Classify document type with high accuracy.
181
+ """
182
+ # Document type patterns
183
+ document_patterns = {
184
+ 'Property Title Deed': {
185
+ 'keywords': ['title', 'deed', 'ownership', 'property', 'owner'],
186
+ 'confidence': 0.9
187
+ },
188
+ 'Mortgage Document': {
189
+ 'keywords': ['mortgage', 'loan', 'bank', 'lender', 'borrower', 'principal', 'interest'],
190
+ 'confidence': 0.85
191
+ },
192
+ 'Lease Agreement': {
193
+ 'keywords': ['lease', 'rent', 'tenant', 'landlord', 'rental', 'agreement'],
194
+ 'confidence': 0.8
195
+ },
196
+ 'Sale Contract': {
197
+ 'keywords': ['sale', 'purchase', 'buyer', 'seller', 'contract', 'agreement'],
198
+ 'confidence': 0.8
199
+ },
200
+ 'Tax Assessment': {
201
+ 'keywords': ['tax', 'assessment', 'valuation', 'appraisal', 'property tax'],
202
+ 'confidence': 0.75
203
+ },
204
+ 'Building Permit': {
205
+ 'keywords': ['permit', 'building', 'construction', 'approval', 'zoning'],
206
+ 'confidence': 0.7
207
+ },
208
+ 'Property Survey': {
209
+ 'keywords': ['survey', 'boundary', 'measurement', 'plot', 'dimension'],
210
+ 'confidence': 0.7
211
+ },
212
+ 'Insurance Document': {
213
+ 'keywords': ['insurance', 'policy', 'premium', 'coverage', 'claim'],
214
+ 'confidence': 0.65
215
+ }
216
+ }
217
+
218
+ best_match = 'Unknown'
219
+ best_confidence = 0.0
220
+
221
+ for doc_type, pattern in document_patterns.items():
222
+ matches = sum(1 for keyword in pattern['keywords'] if keyword in text_lower)
223
+ if matches > 0:
224
+ # Calculate confidence based on matches
225
+ match_ratio = matches / len(pattern['keywords'])
226
+ confidence = pattern['confidence'] * match_ratio
227
+
228
+ if confidence > best_confidence:
229
+ best_match = doc_type
230
+ best_confidence = confidence
231
+
232
+ return best_match, best_confidence
233
+
234
+ def generate_document_summary(document_text, document_type):
235
+ """
236
+ Generate comprehensive document summary.
237
+ """
238
  try:
239
+ # Try to use summarization model if available
240
+ try:
241
+ summarizer = load_model("summarization")
242
+ if hasattr(summarizer, 'fallback_used') and not summarizer.fallback_used:
243
+ # Use model for summarization
244
+ summary_result = summarizer(document_text[:1000], max_length=150, min_length=50)
245
+ if isinstance(summary_result, list) and len(summary_result) > 0:
246
+ return summary_result[0].get('summary_text', '')
247
+ except Exception as e:
248
+ logger.warning(f"Summarization model failed: {str(e)}")
249
+
250
+ # Fallback to extractive summarization
251
+ sentences = document_text.split('.')
252
+ sentences = [s.strip() for s in sentences if len(s.strip()) > 20]
253
+
254
+ if not sentences:
255
+ return "Document contains insufficient text for summarization."
256
+
257
+ # Select key sentences based on document type
258
+ key_sentences = []
259
+
260
+ if document_type != 'Unknown':
261
+ # Look for sentences containing document type keywords
262
+ type_keywords = document_type.lower().split()
263
+ for sentence in sentences:
264
+ if any(keyword in sentence.lower() for keyword in type_keywords):
265
+ key_sentences.append(sentence)
266
+ if len(key_sentences) >= 2:
267
+ break
268
+
269
+ # If no type-specific sentences, take first few meaningful sentences
270
+ if not key_sentences:
271
+ key_sentences = sentences[:3]
272
+
273
+ # Combine sentences
274
+ summary = '. '.join(key_sentences) + '.'
275
+
276
+ # Truncate if too long
277
+ if len(summary) > 300:
278
+ summary = summary[:297] + '...'
279
+
280
+ return summary
281
+
282
  except Exception as e:
283
+ logger.error(f"Error generating summary: {str(e)}")
284
+ return "Summary generation failed."
285
 
286
+ def extract_document_key_info(document_text):
287
+ """
288
+ Extract key information from document.
289
+ """
290
+ key_info = {}
291
+
292
  try:
293
+ # Extract addresses
294
+ address_patterns = [
295
+ r'\b\d+\s+[A-Za-z\s]+(?:Street|St|Road|Rd|Avenue|Ave|Lane|Ln|Drive|Dr|Boulevard|Blvd)\b',
296
+ r'\b[A-Za-z\s]+,\s*[A-Za-z\s]+,\s*[A-Z]{2}\s*\d{5}\b'
297
+ ]
298
+
299
+ for pattern in address_patterns:
300
+ matches = re.findall(pattern, document_text, re.IGNORECASE)
301
+ if matches:
302
+ key_info['addresses'] = matches[:3] # Top 3 addresses
303
+ break
304
+
305
+ # Extract dates
306
+ date_patterns = [
307
+ r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b',
308
+ r'\b\d{4}[/-]\d{1,2}[/-]\d{1,2}\b',
309
+ r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b'
310
+ ]
311
+
312
+ dates = []
313
+ for pattern in date_patterns:
314
+ dates.extend(re.findall(pattern, document_text, re.IGNORECASE))
315
+ if dates:
316
+ key_info['dates'] = dates[:5] # Top 5 dates
317
+
318
+ # Extract amounts/money
319
+ amount_patterns = [
320
+ r'\$\d{1,3}(?:,\d{3})*(?:\.\d{2})?',
321
+ r'₹\d{1,3}(?:,\d{3})*(?:\.\d{2})?',
322
+ r'\d{1,3}(?:,\d{3})*(?:\.\d{2})?\s*(?:dollars?|rupees?|rs?)',
323
+ ]
324
+
325
+ amounts = []
326
+ for pattern in amount_patterns:
327
+ amounts.extend(re.findall(pattern, document_text, re.IGNORECASE))
328
+ if amounts:
329
+ key_info['amounts'] = amounts[:5] # Top 5 amounts
330
+
331
+ # Extract phone numbers
332
+ phone_pattern = r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b'
333
+ phones = re.findall(phone_pattern, document_text)
334
+ if phones:
335
+ key_info['phone_numbers'] = phones[:3] # Top 3 phone numbers
336
+
337
+ # Extract property details
338
+ property_patterns = {
339
+ 'bedrooms': r'\b(\d+)\s*(?:bedroom|bed|br)\b',
340
+ 'bathrooms': r'\b(\d+)\s*(?:bathroom|bath|ba)\b',
341
+ 'square_feet': r'\b(\d{1,3}(?:,\d{3})*)\s*(?:square\s*feet|sq\s*ft|sqft)\b',
342
+ 'acres': r'\b(\d+(?:\.\d+)?)\s*acres?\b'
343
  }
344
+
345
+ for key, pattern in property_patterns.items():
346
+ matches = re.findall(pattern, document_text, re.IGNORECASE)
347
+ if matches:
348
+ key_info[key] = matches[0] # First match
349
+
350
+ # Extract names
351
+ name_pattern = r'\b[A-Z][a-z]+\s+[A-Z][a-z]+\b'
352
+ names = re.findall(name_pattern, document_text)
353
+ if names:
354
+ key_info['names'] = names[:5] # Top 5 names
355
+
356
  except Exception as e:
357
+ logger.warning(f"Error extracting key info: {str(e)}")
358
+
359
+ return key_info
360
+
361
+ def detect_signatures(text_lower):
362
+ """
363
+ Detect signatures in document.
364
+ """
365
+ signature_indicators = [
366
+ 'signature', 'signed', 'sign', 'signatory', 'witness',
367
+ 'notary', 'notarized', 'attorney', 'lawyer', 'agent'
368
+ ]
369
+
370
+ return any(indicator in text_lower for indicator in signature_indicators)
371
+
372
+ def detect_dates(document_text):
373
+ """
374
+ Detect dates in document.
375
+ """
376
+ date_patterns = [
377
+ r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b',
378
+ r'\b\d{4}[/-]\d{1,2}[/-]\d{1,2}\b',
379
+ r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b'
380
+ ]
381
+
382
+ for pattern in date_patterns:
383
+ if re.search(pattern, document_text, re.IGNORECASE):
384
+ return True
385
+
386
+ return False
387
+
388
+ def assess_document_authenticity(document_text, has_signatures, has_dates, key_info):
389
+ """
390
+ Assess document authenticity.
391
+ """
392
+ authenticity_score = 0.0
393
+
394
+ # Base score
395
+ if has_signatures:
396
+ authenticity_score += 0.3
397
+ if has_dates:
398
+ authenticity_score += 0.2
399
+ if key_info.get('addresses'):
400
+ authenticity_score += 0.2
401
+ if key_info.get('amounts'):
402
+ authenticity_score += 0.1
403
+ if key_info.get('names'):
404
+ authenticity_score += 0.1
405
+ if len(document_text) > 500:
406
+ authenticity_score += 0.1
407
+
408
+ # Determine assessment
409
+ if authenticity_score >= 0.7:
410
+ assessment = 'Authentic'
411
+ elif authenticity_score >= 0.4:
412
+ assessment = 'Likely Authentic'
413
+ elif authenticity_score >= 0.2:
414
+ assessment = 'Suspicious'
415
+ else:
416
+ assessment = 'Potentially Fake'
417
+
418
+ return assessment, authenticity_score
419
+
420
+ def calculate_verification_score(confidence, document_confidence, authenticity_confidence, has_signatures, has_dates, key_info):
421
+ """
422
+ Calculate overall verification score.
423
+ """
424
+ score = 0.0
425
+
426
+ # Base confidence
427
+ score += confidence * 0.3
428
+
429
+ # Document type confidence
430
+ score += document_confidence * 0.2
431
+
432
+ # Authenticity confidence
433
+ score += authenticity_confidence * 0.2
434
+
435
+ # Additional factors
436
+ if has_signatures:
437
+ score += 0.1
438
+ if has_dates:
439
+ score += 0.1
440
+ if key_info.get('addresses'):
441
+ score += 0.05
442
+ if key_info.get('amounts'):
443
+ score += 0.05
444
+
445
+ return min(100.0, score * 100)
446
+
447
+ def check_document_consistency(document_text, property_data):
448
+ """
449
+ Check document consistency with property data.
450
+ """
451
+ try:
452
+ if not property_data:
453
+ return {
454
+ 'is_consistent': True,
455
+ 'confidence': 0.5,
456
+ 'issues': [],
457
+ 'model_used': 'static_fallback'
458
+ }
459
+
460
+ consistency_score = 0.5 # Base score
461
+ issues = []
462
+
463
+ # Check address consistency
464
+ if property_data.get('address'):
465
+ property_address = property_data['address'].lower()
466
+ doc_addresses = re.findall(r'\b\d+\s+[A-Za-z\s]+(?:Street|St|Road|Rd|Avenue|Ave)\b', document_text, re.IGNORECASE)
467
+
468
+ for doc_addr in doc_addresses:
469
+ if any(word in doc_addr.lower() for word in property_address.split()):
470
+ consistency_score += 0.2
471
+ break
472
+ else:
473
+ issues.append("Address mismatch between document and property data")
474
+
475
+ # Check property type consistency
476
+ if property_data.get('property_type'):
477
+ property_type = property_data['property_type'].lower()
478
+ if property_type in document_text.lower():
479
+ consistency_score += 0.1
480
+ else:
481
+ issues.append("Property type mismatch")
482
+
483
+ # Check size consistency
484
+ if property_data.get('sq_ft'):
485
+ property_size = property_data['sq_ft']
486
+ size_matches = re.findall(r'\b(\d{1,3}(?:,\d{3})*)\s*(?:square\s*feet|sq\s*ft|sqft)\b', document_text, re.IGNORECASE)
487
+ if size_matches:
488
+ doc_size = size_matches[0].replace(',', '')
489
+ if abs(int(doc_size) - int(property_size)) < 100: # Within 100 sq ft
490
+ consistency_score += 0.1
491
+ else:
492
+ issues.append("Property size mismatch")
493
+
494
+ return {
495
+ 'is_consistent': consistency_score > 0.6,
496
+ 'confidence': min(1.0, consistency_score),
497
+ 'issues': issues,
498
+ 'model_used': 'static_fallback'
499
+ }
500
+
501
+ except Exception as e:
502
+ logger.error(f"Error checking document consistency: {str(e)}")
503
+ return {
504
+ 'is_consistent': False,
505
+ 'confidence': 0.0,
506
+ 'issues': [f"Consistency check error: {str(e)}"],
507
+ 'model_used': 'static_fallback'
508
+ }
models/performance_optimizer.py CHANGED
@@ -95,7 +95,7 @@ def optimize_model_loading():
95
  try:
96
  from .model_loader import load_model
97
 
98
- # Pre-load models in background threads
99
  import concurrent.futures
100
  import threading
101
 
@@ -108,18 +108,16 @@ def optimize_model_loading():
108
  logger.warning(f"Failed to pre-load model {model_name}: {str(e)}")
109
  return None
110
 
111
- # Load models in parallel
112
- with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
113
  model_names = [
114
- "zero-shot-classification",
115
- "summarization",
116
- "text-classification",
117
- "text-generation"
118
  ]
119
 
120
  futures = {executor.submit(load_model_async, name): name for name in model_names}
121
 
122
- for future in concurrent.futures.as_completed(futures, timeout=60):
123
  model_name = futures[future]
124
  try:
125
  future.result()
 
95
  try:
96
  from .model_loader import load_model
97
 
98
+ # Pre-load only essential models in background threads
99
  import concurrent.futures
100
  import threading
101
 
 
108
  logger.warning(f"Failed to pre-load model {model_name}: {str(e)}")
109
  return None
110
 
111
+ # Load only essential models in parallel with timeout
112
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: # Reduced workers
113
  model_names = [
114
+ "zero-shot-classification", # Most important
115
+ "summarization" # Second most important
 
 
116
  ]
117
 
118
  futures = {executor.submit(load_model_async, name): name for name in model_names}
119
 
120
+ for future in concurrent.futures.as_completed(futures, timeout=30): # 30 second timeout
121
  model_name = futures[future]
122
  try:
123
  future.result()
models/property_relation.py CHANGED
@@ -14,10 +14,23 @@ def check_if_property_related(text):
14
  return {
15
  'is_related': False,
16
  'confidence': 0.0,
17
- 'error': 'No text provided'
 
18
  }
19
- classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")
 
 
 
 
 
 
 
 
 
 
 
20
  result = classifier(text[:1000], ["property-related", "non-property-related"])
 
21
  # Defensive: ensure result structure
22
  labels = result.get('labels', [])
23
  scores = result.get('scores', [])
@@ -26,17 +39,23 @@ def check_if_property_related(text):
26
  return {
27
  'is_related': False,
28
  'confidence': 0.0,
29
- 'error': 'Model output error'
 
30
  }
 
31
  is_related = labels[0] == "property-related"
 
 
32
  return {
33
  'is_related': is_related,
34
- 'confidence': float(scores[0]) if is_related else float(scores[1])
 
35
  }
36
  except Exception as e:
37
  logger.error(f"Error checking property relation: {str(e)}")
38
  return {
39
  'is_related': False,
40
  'confidence': 0.0,
41
- 'error': str(e)
 
42
  }
 
14
  return {
15
  'is_related': False,
16
  'confidence': 0.0,
17
+ 'error': 'No text provided',
18
+ 'model_used': 'static_fallback'
19
  }
20
+
21
+ try:
22
+ classifier = load_model("zero-shot-classification") # Use standard model instead of typeform
23
+ except Exception as e:
24
+ logger.error(f"Error loading model in property relation: {str(e)}")
25
+ return {
26
+ 'is_related': False,
27
+ 'confidence': 0.0,
28
+ 'error': f'Model loading error: {str(e)}',
29
+ 'model_used': 'static_fallback'
30
+ }
31
+
32
  result = classifier(text[:1000], ["property-related", "non-property-related"])
33
+
34
  # Defensive: ensure result structure
35
  labels = result.get('labels', [])
36
  scores = result.get('scores', [])
 
39
  return {
40
  'is_related': False,
41
  'confidence': 0.0,
42
+ 'error': 'Model output error',
43
+ 'model_used': 'static_fallback'
44
  }
45
+
46
  is_related = labels[0] == "property-related"
47
+ model_used = getattr(classifier, 'fallback_model', 'primary_model')
48
+
49
  return {
50
  'is_related': is_related,
51
+ 'confidence': float(scores[0]) if is_related else float(scores[1]),
52
+ 'model_used': model_used
53
  }
54
  except Exception as e:
55
  logger.error(f"Error checking property relation: {str(e)}")
56
  return {
57
  'is_related': False,
58
  'confidence': 0.0,
59
+ 'error': str(e),
60
+ 'model_used': 'static_fallback'
61
  }
models/property_summary.py CHANGED
@@ -124,69 +124,85 @@ Property Summary:"""
124
  return prompt
125
 
126
  def generate_dynamic_summary_with_slm(data):
127
- """Generate property summary using CPU-based Small Language Model"""
128
  try:
129
  # Validate and format data
130
  data = validate_and_format_data(data)
131
 
132
- # Create the prompt
133
- prompt = create_property_prompt(data)
134
 
135
- # Try best SLMs in order
136
- slm_models = ["TinyLlama/TinyLlama-1.1B-Chat-v1.0", "gpt2", "distilgpt2"]
137
- for model_name in slm_models:
138
- try:
139
- # Try to load a text generation model
140
- generator = load_model("text-generation", model_name)
141
-
142
- # Generate the summary
143
- if hasattr(generator, 'task_type') and generator.task_type == "text-generation":
144
- # Using fallback generator - it will handle the prompt parsing
145
- result = generator(prompt, max_length=512, do_sample=True, temperature=0.7)
146
- summary = result[0]['generated_text'] if result else ""
147
- else:
148
- # Using actual model
149
- result = generator(
150
- prompt,
151
- max_length=512,
152
- do_sample=True,
153
- temperature=0.7,
154
- top_p=0.9,
155
- repetition_penalty=1.1
156
- )
157
- summary = result[0]['generated_text'] if result else ""
158
-
159
- # Clean up the generated text
160
- if summary:
161
- # Remove the prompt from the beginning if present
162
- if prompt in summary:
163
- summary = summary.replace(prompt, "").strip()
164
-
165
- # Clean up any remaining artifacts
166
- summary = re.sub(r'\n+', '\n', summary)
167
- summary = re.sub(r'\s+', ' ', summary)
168
- summary = summary.strip()
169
-
170
- # Ensure it's not too long
171
- if len(summary) > 512:
172
- summary = summary[:512].rsplit(' ', 1)[0] + "..."
173
-
174
- return summary
175
- else:
176
- raise Exception("No text generated")
177
-
178
- except Exception as model_error:
179
- logger.warning(f"SLM model {model_name} failed: {str(model_error)}")
180
- continue
181
- # If all SLMs fail, use fallback
182
- return generate_fallback_summary(data)
183
 
184
  except Exception as e:
185
  logger.error(f"Error in dynamic summary generation: {str(e)}")
186
- return generate_fallback_summary(data)
187
 
188
- def generate_fallback_summary(data):
189
- """Enhanced fallback summary generation when SLM fails"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  try:
191
  data = validate_and_format_data(data)
192
 
@@ -223,80 +239,56 @@ def generate_fallback_summary(data):
223
 
224
  # Pricing information
225
  if data.get('market_value'):
226
- price = format_price(data['market_value'])
227
- summary_parts.append(f"Priced at {price}, this property offers excellent value for money in today's competitive market.")
228
-
229
- # Year built information
230
- if data.get('year_built') and data['year_built'] != 'N/A':
231
- summary_parts.append(f"Built in {data['year_built']}, this property combines modern amenities with solid construction.")
232
 
233
- # Amenities section
234
- if data.get('amenities'):
235
- amenities = data['amenities'][:5] # Limit to 5 amenities
236
- if amenities:
237
- summary_parts.append(f"Residents can enjoy access to {', '.join(amenities)}.")
238
 
239
- # Nearby landmarks
240
  landmarks = data.get('nearby_landmarks', '')
241
  if landmarks:
242
- if isinstance(landmarks, str):
243
- landmarks_list = [l.strip() for l in landmarks.split(',') if l.strip()][:3]
244
- if landmarks_list:
245
- summary_parts.append(f"The property is conveniently located near {', '.join(landmarks_list)}.")
246
 
247
- # Possession information
248
- if data.get('possession_date') and data['possession_date'] != 'Immediate':
249
- summary_parts.append(f"Ready for possession from {data['possession_date']}.")
250
 
251
- # Property description
252
- if data.get('property_description'):
253
- desc = data['property_description'][:200] + "..." if len(data['property_description']) > 200 else data['property_description']
254
- summary_parts.append(f"Property highlights: {desc}")
255
-
256
- # Call to action
257
- summary_parts.append("Don't miss this opportunity to own a piece of prime real estate. Contact us today for a detailed viewing and exclusive offers.")
258
-
259
- # Combine all parts
260
- summary = " ".join(summary_parts)
261
-
262
- # Ensure it's around 512 words
263
- words = summary.split()
264
- if len(words) > 512:
265
- summary = " ".join(words[:512]) + "..."
266
-
267
- return summary
268
 
269
  except Exception as e:
270
- logger.error(f"Error in fallback summary generation: {str(e)}")
271
- return "Property summary unavailable. Please contact us for more details."
272
 
273
  def generate_property_summary(data):
274
- """Main function to generate property summary using SLM"""
275
  try:
276
  # Validate input data
277
  if not data or not isinstance(data, dict):
278
  return "A beautiful property with excellent features and prime location. Contact us for detailed information and exclusive offers."
279
 
280
- # Create a more robust fallback summary for any data
281
  try:
282
  # Use the new dynamic SLM-based approach
283
  summary = generate_dynamic_summary_with_slm(data)
284
 
285
  # Ensure summary is a proper string
286
  if not summary or not isinstance(summary, str):
287
- summary = generate_fallback_summary(data)
288
 
289
  if not summary or not summary.strip():
290
- summary = generate_fallback_summary(data)
291
 
292
  # Final fallback - always return something meaningful
293
  if not summary or not summary.strip():
294
- summary = "A beautiful property with excellent features and prime location. Contact us for detailed information and exclusive offers."
295
 
296
  # Ensure it's a string and clean it up
297
  summary = str(summary).strip()
298
  if summary == '[object Object]' or summary == 'null' or summary == 'undefined':
299
- summary = generate_fallback_summary(data)
300
 
301
  # If still no valid summary, create a basic one
302
  if not summary or len(summary) < 50:
@@ -305,12 +297,12 @@ def generate_property_summary(data):
305
  return summary
306
 
307
  except Exception as e:
308
- logger.error(f"Error in summary generation: {str(e)}")
309
  return create_basic_summary(data)
310
 
311
  except Exception as e:
312
  logger.error(f"Error generating property summary: {str(e)}")
313
- return "A beautiful property with excellent features and prime location. Contact us for detailed information and exclusive offers."
314
 
315
  def create_basic_summary(data):
316
  """Create a basic summary even for invalid data"""
 
124
  return prompt
125
 
126
  def generate_dynamic_summary_with_slm(data):
127
+ """Generate property summary using AI summarization model"""
128
  try:
129
  # Validate and format data
130
  data = validate_and_format_data(data)
131
 
132
+ # Create the property description text
133
+ property_text = create_property_description_text(data)
134
 
135
+ # Try to use summarization model
136
+ try:
137
+ summarizer = load_model("summarization")
138
+
139
+ # Check if we have a proper summarization model
140
+ if hasattr(summarizer, 'fallback_used') and not summarizer.fallback_used:
141
+ # Use the actual AI model for summarization
142
+ result = summarizer(property_text, max_length=150, min_length=50, do_sample=False)
143
+ if isinstance(result, list) and len(result) > 0:
144
+ summary = result[0].get('summary_text', '')
145
+ if summary and len(summary.strip()) > 20:
146
+ return summary.strip()
147
+
148
+ # If AI model fails or returns poor results, use enhanced fallback
149
+ return generate_enhanced_fallback_summary(data)
150
+
151
+ except Exception as model_error:
152
+ logger.warning(f"Summarization model failed: {str(model_error)}")
153
+ return generate_enhanced_fallback_summary(data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
  except Exception as e:
156
  logger.error(f"Error in dynamic summary generation: {str(e)}")
157
+ return generate_enhanced_fallback_summary(data)
158
 
159
+ def create_property_description_text(data):
160
+ """Create a comprehensive property description text for summarization"""
161
+ try:
162
+ # Build location string
163
+ location_parts = []
164
+ if data.get('address'):
165
+ location_parts.append(data['address'])
166
+ if data.get('city'):
167
+ location_parts.append(data['city'])
168
+ if data.get('state'):
169
+ location_parts.append(data['state'])
170
+ location = ', '.join(location_parts) if location_parts else 'Prime location'
171
+
172
+ # Build amenities string
173
+ amenities = data.get('amenities', [])
174
+ amenities_str = ', '.join(amenities[:5]) if amenities else 'Modern amenities'
175
+
176
+ # Build landmarks string
177
+ landmarks = data.get('nearby_landmarks', '')
178
+ if isinstance(landmarks, str) and landmarks:
179
+ landmarks_list = [l.strip() for l in landmarks.split(',') if l.strip()]
180
+ landmarks_str = ', '.join(landmarks_list[:3])
181
+ else:
182
+ landmarks_str = 'Convenient location'
183
+
184
+ # Create comprehensive property description
185
+ description_parts = [
186
+ f"This is a {data.get('property_type', 'property')} located in {location}.",
187
+ f"The property is currently {data.get('status', 'available')} for sale.",
188
+ f"It features {data.get('bedrooms', '0')} bedrooms and {data.get('bathrooms', '0')} bathrooms.",
189
+ f"The total area is {data.get('sq_ft', '0')} square feet.",
190
+ f"The property is priced at {format_price(data.get('market_value', '0'))}.",
191
+ f"It includes amenities such as {amenities_str}.",
192
+ f"The property is near {landmarks_str}.",
193
+ f"It was built in {data.get('year_built', 'recent years')}.",
194
+ f"The property offers {data.get('parking_spaces', '0')} parking spaces.",
195
+ f"This is an excellent investment opportunity in a prime location with modern facilities and strategic connectivity."
196
+ ]
197
+
198
+ return " ".join(description_parts)
199
+
200
+ except Exception as e:
201
+ logger.error(f"Error creating property description text: {str(e)}")
202
+ return f"This is a {data.get('property_type', 'property')} located in {data.get('city', 'prime location')} with excellent features and amenities."
203
+
204
+ def generate_enhanced_fallback_summary(data):
205
+ """Enhanced fallback summary generation with better AI-like text"""
206
  try:
207
  data = validate_and_format_data(data)
208
 
 
239
 
240
  # Pricing information
241
  if data.get('market_value'):
242
+ price_str = format_price(data['market_value'])
243
+ summary_parts.append(f"Priced at {price_str}, this property offers excellent value for money and represents a sound investment opportunity.")
 
 
 
 
244
 
245
+ # Amenities and facilities
246
+ amenities = data.get('amenities', [])
247
+ if amenities:
248
+ amenities_str = ', '.join(amenities[:3])
249
+ summary_parts.append(f"The property includes modern amenities such as {amenities_str}.")
250
 
251
+ # Location benefits
252
  landmarks = data.get('nearby_landmarks', '')
253
  if landmarks:
254
+ summary_parts.append(f"Conveniently located near {landmarks}, this property offers easy access to essential facilities and transportation.")
 
 
 
255
 
256
+ # Closing statement
257
+ summary_parts.append("Perfect for families and investors alike, this property combines modern amenities with strategic location. Don't miss this opportunity to own a piece of prime real estate. Contact us today for a detailed viewing and exclusive offers.")
 
258
 
259
+ return " ".join(summary_parts)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
 
261
  except Exception as e:
262
+ logger.error(f"Error in enhanced fallback summary: {str(e)}")
263
+ return create_basic_summary(data)
264
 
265
  def generate_property_summary(data):
266
+ """Main function to generate property summary using AI model"""
267
  try:
268
  # Validate input data
269
  if not data or not isinstance(data, dict):
270
  return "A beautiful property with excellent features and prime location. Contact us for detailed information and exclusive offers."
271
 
272
+ # Try to use AI model for summary generation
273
  try:
274
  # Use the new dynamic SLM-based approach
275
  summary = generate_dynamic_summary_with_slm(data)
276
 
277
  # Ensure summary is a proper string
278
  if not summary or not isinstance(summary, str):
279
+ summary = generate_enhanced_fallback_summary(data)
280
 
281
  if not summary or not summary.strip():
282
+ summary = generate_enhanced_fallback_summary(data)
283
 
284
  # Final fallback - always return something meaningful
285
  if not summary or not summary.strip():
286
+ summary = create_basic_summary(data)
287
 
288
  # Ensure it's a string and clean it up
289
  summary = str(summary).strip()
290
  if summary == '[object Object]' or summary == 'null' or summary == 'undefined':
291
+ summary = generate_enhanced_fallback_summary(data)
292
 
293
  # If still no valid summary, create a basic one
294
  if not summary or len(summary) < 50:
 
297
  return summary
298
 
299
  except Exception as e:
300
+ logger.error(f"Error in AI summary generation: {str(e)}")
301
  return create_basic_summary(data)
302
 
303
  except Exception as e:
304
  logger.error(f"Error generating property summary: {str(e)}")
305
+ return create_basic_summary(data)
306
 
307
  def create_basic_summary(data):
308
  """Create a basic summary even for invalid data"""
models/suggestions.py CHANGED
@@ -23,12 +23,13 @@ def generate_suggestions(text, data=None):
23
  'improvements': [],
24
  'warnings': [],
25
  'recommendations': [],
26
- 'confidence': 0.0
 
27
  }
28
 
29
  # Load model for analysis
30
  try:
31
- classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")
32
  except Exception as e:
33
  logger.error(f"Error loading model in suggestions: {str(e)}")
34
  suggestions['warnings'].append({'type': 'error', 'confidence': 0.0, 'details': {'title': 'Model Error', 'message': f'Model loading error: {str(e)}', 'priority': 'high'}})
@@ -50,111 +51,127 @@ def generate_suggestions(text, data=None):
50
 
51
  # Analyze text with context
52
  context = f"{text} property_data:{str(data) if data else ''}"
 
53
  try:
54
- result = classifier(context, categories, multi_label=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  except Exception as e:
56
- logger.error(f"Error in suggestions model inference: {str(e)}")
57
- suggestions['warnings'].append({'type': 'error', 'confidence': 0.0, 'details': {'title': 'Model Error', 'message': f'Model inference error: {str(e)}', 'priority': 'high'}})
58
- return suggestions
59
-
60
- # Process results
61
- for label, score in zip(result['labels'], result['scores']):
62
- if score > 0.3: # Only include high confidence suggestions
63
- suggestion = {
64
- 'type': label,
65
- 'confidence': float(score),
66
- 'details': generate_suggestion_details(label, text, data)
67
- }
68
-
69
- if 'improvement' in label or 'update' in label:
70
- suggestions['improvements'].append(suggestion)
71
- elif 'warning' in label or 'issue' in label:
72
- suggestions['warnings'].append(suggestion)
73
- else:
74
- suggestions['recommendations'].append(suggestion)
75
-
76
- # Calculate overall confidence
77
- if result['scores']:
78
- suggestions['confidence'] = float(max(result['scores']))
79
 
80
  return suggestions
81
-
82
  except Exception as e:
83
  logger.error(f"Error generating suggestions: {str(e)}")
84
  return {
85
  'improvements': [],
86
- 'warnings': [{'type': 'error', 'confidence': 0.0, 'details': {'title': 'Error', 'message': f'Error generating suggestions: {str(e)}', 'priority': 'high'}}],
87
  'recommendations': [],
88
  'confidence': 0.0,
89
- 'error': str(e)
90
  }
91
 
92
  def generate_suggestion_details(suggestion_type, text, data):
93
- """Generate detailed suggestions based on the type."""
94
  try:
95
- details = {
96
- 'property description improvement': {
97
- 'title': 'Improve Property Description',
98
- 'message': 'Add more detailed information about the property features and amenities.',
99
- 'priority': 'medium'
100
- },
101
- 'price adjustment needed': {
102
- 'title': 'Review Property Price',
103
- 'message': 'Consider adjusting the price based on market conditions and property specifications.',
104
- 'priority': 'high'
105
- },
106
- 'documentation required': {
107
- 'title': 'Additional Documentation Needed',
108
- 'message': 'Please provide more property-related documents for verification.',
109
- 'priority': 'high'
110
- },
111
- 'verification needed': {
112
- 'title': 'Property Verification Required',
113
- 'message': 'Additional verification steps are needed for property authenticity.',
114
- 'priority': 'high'
115
- },
116
- 'legal compliance issue': {
117
- 'title': 'Legal Compliance Check',
118
- 'message': 'Review property legal documentation and compliance status.',
119
- 'priority': 'high'
120
- },
121
- 'location verification needed': {
122
- 'title': 'Location Verification',
123
- 'message': 'Verify property location details and coordinates.',
124
- 'priority': 'medium'
125
- },
126
- 'property specification update': {
127
- 'title': 'Update Property Specifications',
128
- 'message': 'Review and update property specifications for accuracy.',
129
- 'priority': 'medium'
130
- },
131
- 'image quality improvement': {
132
- 'title': 'Improve Image Quality',
133
- 'message': 'Add more high-quality images of the property.',
134
- 'priority': 'low'
135
- },
136
- 'market value adjustment': {
137
- 'title': 'Market Value Review',
138
- 'message': 'Review and adjust market value based on current market conditions.',
139
- 'priority': 'high'
140
- },
141
- 'contact information update': {
142
- 'title': 'Update Contact Information',
143
- 'message': 'Ensure contact information is complete and up-to-date.',
144
- 'priority': 'low'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  }
146
- }
147
-
148
- return details.get(suggestion_type, {
149
- 'title': 'General Suggestion',
150
- 'message': 'Review property listing for improvements.',
151
- 'priority': 'medium'
152
- })
153
-
154
  except Exception as e:
155
  logger.error(f"Error generating suggestion details: {str(e)}")
156
- return {
157
- 'title': 'Error',
158
- 'message': 'Could not generate detailed suggestion.',
159
- 'priority': 'low'
160
- }
 
23
  'improvements': [],
24
  'warnings': [],
25
  'recommendations': [],
26
+ 'confidence': 0.0,
27
+ 'model_used': 'static_fallback'
28
  }
29
 
30
  # Load model for analysis
31
  try:
32
+ classifier = load_model("zero-shot-classification") # Use standard model instead of typeform
33
  except Exception as e:
34
  logger.error(f"Error loading model in suggestions: {str(e)}")
35
  suggestions['warnings'].append({'type': 'error', 'confidence': 0.0, 'details': {'title': 'Model Error', 'message': f'Model loading error: {str(e)}', 'priority': 'high'}})
 
51
 
52
  # Analyze text with context
53
  context = f"{text} property_data:{str(data) if data else ''}"
54
+
55
  try:
56
+ result = classifier(context[:1000], categories, multi_label=True)
57
+
58
+ # Process results and generate suggestions
59
+ for label, score in zip(result['labels'], result['scores']):
60
+ if score > 0.3: # Only include if confidence is above 30%
61
+ suggestion_details = generate_suggestion_details(label, text, data)
62
+ if suggestion_details:
63
+ if 'improvement' in label.lower():
64
+ suggestions['improvements'].append(suggestion_details)
65
+ elif 'warning' in label.lower() or 'issue' in label.lower():
66
+ suggestions['warnings'].append(suggestion_details)
67
+ else:
68
+ suggestions['recommendations'].append(suggestion_details)
69
+
70
+ # Calculate overall confidence
71
+ if result['scores']:
72
+ suggestions['confidence'] = max(result['scores'])
73
+
74
+ suggestions['model_used'] = getattr(classifier, 'fallback_model', 'primary_model')
75
+
76
  except Exception as e:
77
+ logger.error(f"Error in suggestions analysis: {str(e)}")
78
+ suggestions['warnings'].append({'type': 'error', 'confidence': 0.0, 'details': {'title': 'Analysis Error', 'message': f'Analysis error: {str(e)}', 'priority': 'medium'}})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  return suggestions
81
+
82
  except Exception as e:
83
  logger.error(f"Error generating suggestions: {str(e)}")
84
  return {
85
  'improvements': [],
86
+ 'warnings': [{'type': 'error', 'confidence': 0.0, 'details': {'title': 'System Error', 'message': f'System error: {str(e)}', 'priority': 'high'}}],
87
  'recommendations': [],
88
  'confidence': 0.0,
89
+ 'model_used': 'static_fallback'
90
  }
91
 
92
  def generate_suggestion_details(suggestion_type, text, data):
93
+ """Generate detailed suggestions based on type"""
94
  try:
95
+ if 'description improvement' in suggestion_type.lower():
96
+ return {
97
+ 'type': 'description_improvement',
98
+ 'confidence': 0.8,
99
+ 'details': {
100
+ 'title': 'Improve Property Description',
101
+ 'message': 'Add more details about amenities, location benefits, and unique features.',
102
+ 'priority': 'medium',
103
+ 'suggestions': [
104
+ 'Include nearby landmarks and transportation',
105
+ 'Describe interior features and finishes',
106
+ 'Mention parking and security features',
107
+ 'Add information about neighborhood'
108
+ ]
109
+ }
110
+ }
111
+ elif 'price adjustment' in suggestion_type.lower():
112
+ return {
113
+ 'type': 'price_adjustment',
114
+ 'confidence': 0.7,
115
+ 'details': {
116
+ 'title': 'Review Property Price',
117
+ 'message': 'Consider adjusting the price based on market conditions and property features.',
118
+ 'priority': 'high',
119
+ 'suggestions': [
120
+ 'Compare with similar properties in the area',
121
+ 'Consider current market trends',
122
+ 'Factor in property condition and age',
123
+ 'Include all amenities in pricing'
124
+ ]
125
+ }
126
+ }
127
+ elif 'documentation required' in suggestion_type.lower():
128
+ return {
129
+ 'type': 'documentation_required',
130
+ 'confidence': 0.9,
131
+ 'details': {
132
+ 'title': 'Additional Documentation Needed',
133
+ 'message': 'Provide more documents to increase property verification.',
134
+ 'priority': 'high',
135
+ 'suggestions': [
136
+ 'Upload property title documents',
137
+ 'Include recent utility bills',
138
+ 'Add property tax receipts',
139
+ 'Provide floor plan or layout'
140
+ ]
141
+ }
142
+ }
143
+ elif 'verification needed' in suggestion_type.lower():
144
+ return {
145
+ 'type': 'verification_needed',
146
+ 'confidence': 0.8,
147
+ 'details': {
148
+ 'title': 'Property Verification Required',
149
+ 'message': 'Additional verification steps needed for property authenticity.',
150
+ 'priority': 'high',
151
+ 'suggestions': [
152
+ 'Verify property ownership',
153
+ 'Check for any legal disputes',
154
+ 'Confirm property dimensions',
155
+ 'Validate address details'
156
+ ]
157
+ }
158
+ }
159
+ else:
160
+ return {
161
+ 'type': 'general_suggestion',
162
+ 'confidence': 0.6,
163
+ 'details': {
164
+ 'title': 'General Improvement',
165
+ 'message': 'Consider improving overall property listing quality.',
166
+ 'priority': 'medium',
167
+ 'suggestions': [
168
+ 'Add more high-quality images',
169
+ 'Include detailed specifications',
170
+ 'Provide contact information',
171
+ 'Update property status regularly'
172
+ ]
173
+ }
174
  }
 
 
 
 
 
 
 
 
175
  except Exception as e:
176
  logger.error(f"Error generating suggestion details: {str(e)}")
177
+ return None
 
 
 
 
models/text_quality.py CHANGED
@@ -11,10 +11,11 @@ def assess_text_quality(text):
11
  'score': 0,
12
  'reasoning': 'Text too short.',
13
  'is_ai_generated': False,
14
- 'quality_metrics': {}
 
15
  }
16
  try:
17
- classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")
18
  except Exception as e:
19
  logger.error(f"Error loading model in text quality: {str(e)}")
20
  return {
@@ -23,7 +24,8 @@ def assess_text_quality(text):
23
  'reasoning': f'Model loading error: {str(e)}',
24
  'is_ai_generated': False,
25
  'quality_metrics': {},
26
- 'top_classifications': []
 
27
  }
28
 
29
  # Enhanced quality categories with more specific indicators
@@ -52,79 +54,59 @@ def assess_text_quality(text):
52
  'confidence': float(score)
53
  })
54
 
55
- # AI generation detection with multiple models
56
- ai_check = classifier(text[:1000], ["human-written", "AI-generated", "template-based", "authentic"])
57
- is_ai_generated = (
58
- (ai_check['labels'][0] == "AI-generated" and ai_check['scores'][0] > 0.6) or
59
- (ai_check['labels'][0] == "template-based" and ai_check['scores'][0] > 0.7)
60
- )
61
-
62
- # Calculate quality metrics
63
- quality_metrics = {
64
- 'detail_level': sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
65
- if label in ['detailed and informative', 'adequately detailed']),
66
- 'professionalism': sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
67
- if label in ['professional listing', 'authentic description']),
68
- 'clarity': sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
69
- if label not in ['vague description', 'misleading content', 'spam-like content']),
70
- 'authenticity': 1.0 - sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
71
- if label in ['template-based content', 'spam-like content'])
72
- }
73
-
74
- # Calculate overall score with weighted metrics
75
- weights = {
76
- 'detail_level': 0.3,
77
- 'professionalism': 0.25,
78
- 'clarity': 0.25,
79
- 'authenticity': 0.2
80
- }
81
-
82
- score = sum(metric * weights[metric_name] for metric_name, metric in quality_metrics.items())
83
- score = score * 100 # Convert to percentage
84
-
85
- # Adjust score for AI-generated content
86
- if is_ai_generated:
87
- score = score * 0.7 # Reduce score by 30% for AI-generated content
88
-
89
- # Generate detailed reasoning
90
- reasoning_parts = []
91
- if top_classifications:
92
- primary_class = top_classifications[0]['classification']
93
- reasoning_parts.append(f"Primary assessment: {primary_class}")
94
-
95
- if quality_metrics['detail_level'] > 0.7:
96
- reasoning_parts.append("Contains comprehensive details")
97
- elif quality_metrics['detail_level'] > 0.4:
98
- reasoning_parts.append("Contains adequate details")
99
  else:
100
- reasoning_parts.append("Lacks important details")
101
 
102
- if quality_metrics['professionalism'] > 0.7:
103
- reasoning_parts.append("Professional listing style")
104
- elif quality_metrics['professionalism'] < 0.4:
105
- reasoning_parts.append("Amateur listing style")
106
-
107
- if quality_metrics['clarity'] < 0.5:
108
- reasoning_parts.append("Content clarity issues detected")
109
-
110
- if is_ai_generated:
111
- reasoning_parts.append("Content appears to be AI-generated")
112
 
113
  return {
114
- 'assessment': top_classifications[0]['classification'] if top_classifications else 'could not assess',
115
- 'score': int(score),
116
- 'reasoning': '. '.join(reasoning_parts),
117
  'is_ai_generated': is_ai_generated,
118
- 'quality_metrics': quality_metrics,
119
- 'top_classifications': top_classifications
 
 
 
 
 
 
 
120
  }
121
  except Exception as e:
122
- logger.error(f"Error assessing text quality: {str(e)}")
123
  return {
124
- 'assessment': 'could not assess',
125
- 'score': 50,
126
- 'reasoning': 'Technical error.',
127
  'is_ai_generated': False,
128
  'quality_metrics': {},
129
- 'top_classifications': []
 
130
  }
 
11
  'score': 0,
12
  'reasoning': 'Text too short.',
13
  'is_ai_generated': False,
14
+ 'quality_metrics': {},
15
+ 'model_used': 'static_fallback'
16
  }
17
  try:
18
+ classifier = load_model("zero-shot-classification") # Use standard model instead of typeform
19
  except Exception as e:
20
  logger.error(f"Error loading model in text quality: {str(e)}")
21
  return {
 
24
  'reasoning': f'Model loading error: {str(e)}',
25
  'is_ai_generated': False,
26
  'quality_metrics': {},
27
+ 'top_classifications': [],
28
+ 'model_used': 'static_fallback'
29
  }
30
 
31
  # Enhanced quality categories with more specific indicators
 
54
  'confidence': float(score)
55
  })
56
 
57
+ # Calculate overall quality score
58
+ positive_categories = ["detailed and informative", "adequately detailed", "professional listing", "authentic description"]
59
+ negative_categories = ["vague description", "misleading content", "amateur listing", "spam-like content", "template-based content"]
60
+
61
+ positive_score = sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
62
+ if label in positive_categories)
63
+ negative_score = sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
64
+ if label in negative_categories)
65
+
66
+ # Calculate final score (0-100)
67
+ quality_score = max(0, min(100, int((positive_score - negative_score + 1) * 50)))
68
+
69
+ # Determine assessment
70
+ if quality_score >= 80:
71
+ assessment = 'excellent'
72
+ elif quality_score >= 60:
73
+ assessment = 'good'
74
+ elif quality_score >= 40:
75
+ assessment = 'adequate'
76
+ elif quality_score >= 20:
77
+ assessment = 'poor'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  else:
79
+ assessment = 'very poor'
80
 
81
+ # Simple AI detection (basic heuristic)
82
+ is_ai_generated = len(text) > 500 and (
83
+ 'beautiful' in text.lower() and 'excellent' in text.lower() and 'prime' in text.lower() or
84
+ text.count('.') > 10 and len(text.split()) > 100
85
+ )
 
 
 
 
 
86
 
87
  return {
88
+ 'assessment': assessment,
89
+ 'score': quality_score,
90
+ 'reasoning': f'Quality score: {quality_score}/100 based on {len(top_classifications)} classifications.',
91
  'is_ai_generated': is_ai_generated,
92
+ 'quality_metrics': {
93
+ 'text_length': len(text),
94
+ 'word_count': len(text.split()),
95
+ 'sentence_count': text.count('.') + text.count('!') + text.count('?'),
96
+ 'positive_score': positive_score,
97
+ 'negative_score': negative_score
98
+ },
99
+ 'top_classifications': top_classifications,
100
+ 'model_used': getattr(classifier, 'fallback_model', 'primary_model')
101
  }
102
  except Exception as e:
103
+ logger.error(f"Error in text quality assessment: {str(e)}")
104
  return {
105
+ 'assessment': 'error',
106
+ 'score': 0,
107
+ 'reasoning': f'Error: {str(e)}',
108
  'is_ai_generated': False,
109
  'quality_metrics': {},
110
+ 'top_classifications': [],
111
+ 'model_used': 'static_fallback'
112
  }
models/trust_score.py CHANGED
@@ -5,133 +5,63 @@ from .logging_config import logger
5
 
6
  def generate_trust_score(text, image_analysis, pdf_analysis):
7
  try:
8
- try:
9
- classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")
10
- except Exception as e:
11
- logger.error(f"Error loading model in trust score: {str(e)}")
12
- return 35, f"Model loading error: {str(e)}"
13
- aspects = [
14
- "complete information provided",
15
- "verified location",
16
- "consistent data",
17
- "authentic documents",
18
- "authentic images",
19
- "reasonable pricing",
20
- "verified ownership",
21
- "proper documentation"
22
- ]
23
- try:
24
- result = classifier(str(text)[:1000], aspects, multi_label=True)
25
- except Exception as e:
26
- logger.error(f"Error in trust score model inference: {str(e)}")
27
- return 35, f"Model inference error: {str(e)}"
28
-
29
- # More balanced weights
30
- weights = {
31
- "complete information provided": 0.20,
32
- "verified location": 0.20,
33
- "consistent data": 0.15,
34
- "authentic documents": 0.15,
35
- "authentic images": 0.10,
36
- "reasonable pricing": 0.10,
37
- "verified ownership": 0.05,
38
- "proper documentation": 0.05
39
- }
40
-
41
- score = 0
42
  reasoning_parts = []
43
 
44
- # More reasonable scoring for each aspect
45
- for label, confidence in zip(result['labels'], result['scores']):
46
- adjusted_confidence = confidence
47
-
48
- # Document verification
49
- if label == "authentic documents":
50
- if not pdf_analysis or len(pdf_analysis) == 0:
51
- adjusted_confidence = 0.3 # Base score for no documents
52
- else:
53
- doc_scores = [p.get('verification_score', 0) for p in pdf_analysis]
54
- adjusted_confidence = sum(doc_scores) / max(1, len(doc_scores))
55
- # Moderate penalty for low verification scores
56
- if any(score < 0.5 for score in doc_scores):
57
- adjusted_confidence *= 0.7
58
- # Small penalty for missing documents
59
- if len(doc_scores) < 2:
60
- adjusted_confidence *= 0.8
61
-
62
- # Image verification
63
- elif label == "authentic images":
64
- if not image_analysis or len(image_analysis) == 0:
65
- adjusted_confidence = 0.3 # Base score for no images
66
- else:
67
- img_scores = [i.get('authenticity_score', 0) for i in image_analysis]
68
- adjusted_confidence = sum(img_scores) / max(1, len(img_scores))
69
- # Moderate penalty for low authenticity scores
70
- if any(score < 0.6 for score in img_scores):
71
- adjusted_confidence *= 0.7
72
- # Small penalty for AI-generated images
73
- if any(i.get('is_ai_generated', False) for i in image_analysis):
74
- adjusted_confidence *= 0.8
75
- # Small penalty for non-property related images
76
- if any(not i.get('is_property_related', False) for i in image_analysis):
77
- adjusted_confidence *= 0.8
78
-
79
- # Consistency check
80
- elif label == "consistent data":
81
- # Check for inconsistencies in the data
82
- if "inconsistent" in text.lower() or "suspicious" in text.lower():
83
- adjusted_confidence *= 0.6
84
- # Check for impossible values
85
- if "impossible" in text.lower() or "invalid" in text.lower():
86
- adjusted_confidence *= 0.5
87
- # Check for missing critical information
88
- if "missing" in text.lower() or "not provided" in text.lower():
89
- adjusted_confidence *= 0.7
90
-
91
- # Completeness check
92
- elif label == "complete information provided":
93
- # Check for missing critical information
94
- if len(text) < 200 or "not provided" in text.lower() or "missing" in text.lower():
95
- adjusted_confidence *= 0.7
96
- # Check for vague or generic descriptions
97
- if "generic" in text.lower() or "vague" in text.lower():
98
- adjusted_confidence *= 0.8
99
- # Check for suspiciously short descriptions
100
- if len(text) < 100:
101
- adjusted_confidence *= 0.6
102
-
103
- score += adjusted_confidence * weights.get(label, 0.1)
104
- reasoning_parts.append(f"{label} ({adjusted_confidence:.0%})")
105
-
106
- # Apply moderate penalties for suspicious patterns
107
- if "suspicious" in text.lower() or "fraudulent" in text.lower():
108
- score *= 0.7
109
-
110
- # Apply moderate penalties for suspiciously low values
111
- if "suspiciously low" in text.lower() or "unusually small" in text.lower():
112
- score *= 0.8
113
-
114
- # Apply moderate penalties for inconsistencies
115
- if "inconsistent" in text.lower() or "mismatch" in text.lower():
116
- score *= 0.8
117
-
118
- # Apply moderate penalties for missing critical information
119
- if "missing critical" in text.lower() or "incomplete" in text.lower():
120
- score *= 0.8
121
-
122
- # Ensure minimum score for any valid data
123
- if score < 0.1:
124
- score = 0.1 # Minimum 10% score for any data
125
-
126
- # Ensure score is between 0 and 100
127
- score = min(100, max(0, int(score * 100)))
128
 
129
- # Ensure minimum score of 25% for any valid data
130
- if score < 25:
131
- score = 25
132
-
133
- reasoning = f"Based on: {', '.join(reasoning_parts)}"
134
- return score, reasoning
135
  except Exception as e:
136
- logger.error(f"Error generating trust score: {str(e)}")
137
- return 35, "Could not assess trust."
 
5
 
6
  def generate_trust_score(text, image_analysis, pdf_analysis):
7
  try:
8
+ # Use a simpler approach to avoid timeouts
9
+ trust_score = 50.0 # Start with neutral score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  reasoning_parts = []
11
 
12
+ # Simple text-based trust indicators
13
+ text_lower = str(text).lower()
14
+
15
+ # Positive indicators
16
+ positive_indicators = [
17
+ 'verified', 'authentic', 'genuine', 'real', 'legitimate',
18
+ 'complete', 'detailed', 'professional', 'official', 'certified'
19
+ ]
20
+
21
+ # Negative indicators
22
+ negative_indicators = [
23
+ 'fake', 'scam', 'fraud', 'suspicious', 'unverified',
24
+ 'incomplete', 'missing', 'unclear', 'doubtful', 'questionable'
25
+ ]
26
+
27
+ # Count positive and negative indicators
28
+ positive_count = sum(1 for indicator in positive_indicators if indicator in text_lower)
29
+ negative_count = sum(1 for indicator in negative_indicators if indicator in text_lower)
30
+
31
+ # Adjust score based on indicators
32
+ if positive_count > 0:
33
+ trust_score += min(20, positive_count * 5)
34
+ reasoning_parts.append(f"Found {positive_count} positive trust indicators")
35
+
36
+ if negative_count > 0:
37
+ trust_score -= min(30, negative_count * 10)
38
+ reasoning_parts.append(f"Found {negative_count} negative trust indicators")
39
+
40
+ # Image analysis contribution
41
+ if image_analysis:
42
+ image_count = len(image_analysis) if isinstance(image_analysis, list) else 1
43
+ if image_count > 0:
44
+ trust_score += min(15, image_count * 3)
45
+ reasoning_parts.append(f"Property has {image_count} images")
46
+
47
+ # PDF analysis contribution
48
+ if pdf_analysis:
49
+ pdf_count = len(pdf_analysis) if isinstance(pdf_analysis, list) else 1
50
+ if pdf_count > 0:
51
+ trust_score += min(15, pdf_count * 5)
52
+ reasoning_parts.append(f"Property has {pdf_count} documents")
53
+
54
+ # Ensure score is within bounds
55
+ trust_score = max(0, min(100, trust_score))
56
+
57
+ # Create reasoning
58
+ if reasoning_parts:
59
+ reasoning = ". ".join(reasoning_parts) + "."
60
+ else:
61
+ reasoning = "Basic trust assessment completed."
62
+
63
+ return trust_score, reasoning
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
 
 
 
 
 
 
65
  except Exception as e:
66
+ logger.error(f"Error in trust score generation: {str(e)}")
67
+ return 35.0, f"Trust analysis failed: {str(e)}"
templates/index.html CHANGED
@@ -1951,18 +1951,36 @@
1951
  const propertyForm = document.getElementById('propertyForm');
1952
  const loadingIndicator = document.getElementById('loadingIndicator');
1953
  const resultsContainer = document.getElementById('resultsContainer');
 
 
 
1954
  loadingIndicator.style.display = 'block';
1955
  resultsContainer.style.display = 'none';
 
1956
  const formData = new FormData(propertyForm);
 
 
 
 
 
 
 
 
 
1957
  // Add images and PDFs from preview arrays if needed
1958
  fetch('/verify', {
1959
  method: 'POST',
1960
  body: formData
1961
  })
1962
- .then(response => response.json())
 
 
 
1963
  .then(data => {
 
1964
  loadingIndicator.style.display = 'none';
1965
  if (data.status === 'error' || data.error) {
 
1966
  showError(data.error || 'An error occurred. Please check your input and try again.');
1967
  return;
1968
  }
@@ -1970,6 +1988,7 @@
1970
  resultsContainer.style.display = 'block';
1971
  })
1972
  .catch(error => {
 
1973
  loadingIndicator.style.display = 'none';
1974
  showError('Server error: ' + (error.message || error));
1975
  });
@@ -2444,45 +2463,292 @@
2444
  documentDiv.innerHTML = '';
2445
 
2446
  if (data.document_analysis && data.document_analysis.pdf_count > 0) {
2447
- documentDiv.innerHTML = `<p><strong>Documents Analyzed:</strong> ${data.document_analysis.pdf_count}</p>`;
2448
-
2449
- data.document_analysis.pdf_analysis.forEach((pdf, index) => {
2450
- documentDiv.innerHTML += `
2451
- <div class="pdf-preview">
2452
- <p><strong>Document ${index + 1}</strong></p>
2453
- <p><strong>Type:</strong> ${pdf.document_type.classification} (${Math.round(pdf.document_type.confidence * 100)}% confidence)</p>
2454
- <p><strong>Authenticity:</strong> ${pdf.authenticity.assessment} (${Math.round(pdf.authenticity.confidence * 100)}% confidence)</p>
2455
- <p><strong>Summary:</strong> ${pdf.summary}</p>
2456
- <p><strong>Contains Signatures:</strong> ${pdf.contains_signatures ? 'Yes' : 'No'}</p>
2457
- <p><strong>Contains Dates:</strong> ${pdf.contains_dates ? 'Yes' : 'No'}</p>
2458
- </div>
2459
- `;
2460
- });
2461
-
2462
- // Update Document Chart
2463
  let authenticCount = 0;
2464
  let suspiciousCount = 0;
2465
  let incompleteCount = 0;
 
 
 
 
 
 
2466
 
2467
- data.document_analysis.pdf_analysis.forEach(pdf => {
2468
- if (pdf.authenticity.assessment.includes('authentic')) {
 
 
 
 
 
 
 
 
 
 
2469
  authenticCount++;
2470
- } else if (pdf.authenticity.assessment.includes('fraudulent')) {
2471
  suspiciousCount++;
2472
  } else {
2473
  incompleteCount++;
2474
  }
 
2475
  });
2476
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2477
  updateChart(documentChart, {
 
2478
  datasets: [{
2479
- data: [
2480
- authenticCount,
2481
- suspiciousCount,
2482
- incompleteCount
2483
- ]
2484
  }]
2485
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2486
  } else {
2487
  documentDiv.innerHTML = '<p>No documents were uploaded for analysis.</p>';
2488
  }
@@ -2505,46 +2771,160 @@
2505
  nonRealEstateContainer.innerHTML = '<h4>Non-Real Estate Images</h4>';
2506
 
2507
  let propertyRelatedCount = 0;
 
 
 
 
 
2508
  data.image_analysis.image_analysis.forEach((img, index) => {
2509
  if (img && img.is_property_related) {
2510
  propertyRelatedCount++;
2511
  }
 
 
 
 
 
 
 
 
2512
  });
2513
 
 
 
 
 
2514
  imageAnalysisDiv.innerHTML = `
2515
  <div class="analysis-summary">
2516
- <p><strong>Total Images Analyzed:</strong> ${data.image_analysis.image_count}</p>
2517
- <p><strong>Property-Related Images:</strong> ${propertyRelatedCount} of ${data.image_analysis.image_count}</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2518
  </div>
2519
  `;
2520
 
2521
- // Display images in appropriate containers
2522
  data.images.forEach((imgData, index) => {
2523
  const imgAnalysis = data.image_analysis.image_analysis[index];
2524
  const galleryItem = document.createElement('div');
2525
  galleryItem.className = 'gallery-item';
2526
 
2527
- // Create image container with label
 
 
 
 
2528
  const imageContainer = document.createElement('div');
2529
  imageContainer.className = 'image-container';
2530
-
2531
- // Add the image
2532
  imageContainer.innerHTML = `
2533
  <img src="data:image/jpeg;base64,${imgData}" alt="Property Image ${index + 1}">
2534
  <div class="image-overlay">
2535
- ${imgAnalysis && imgAnalysis.is_property_related ?
2536
- `<div class="image-label">${imgAnalysis.predicted_label || 'Property Image'}</div>` :
2537
- '<div class="image-label">Non-Property Image</div>'}
2538
  </div>
2539
  `;
2540
 
2541
- galleryItem.appendChild(imageContainer);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2542
 
2543
  // Add to appropriate container based on classification
2544
  if (imgAnalysis && imgAnalysis.is_property_related) {
2545
- realEstateContainer.appendChild(galleryItem);
2546
  } else {
2547
- nonRealEstateContainer.appendChild(galleryItem);
2548
  }
2549
  });
2550
 
@@ -2552,7 +2932,7 @@
2552
  imageGallery.appendChild(realEstateContainer);
2553
  imageGallery.appendChild(nonRealEstateContainer);
2554
 
2555
- // Add some CSS for the new image display
2556
  const style = document.createElement('style');
2557
  style.textContent = `
2558
  .image-section {
@@ -2561,29 +2941,159 @@
2561
  .image-section h4 {
2562
  margin-bottom: 15px;
2563
  color: var(--primary);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2564
  }
2565
  .image-container {
2566
  position: relative;
 
 
 
 
 
2567
  width: 100%;
2568
  height: 100%;
 
2569
  }
2570
  .image-overlay {
2571
  position: absolute;
2572
  bottom: 0;
2573
  left: 0;
2574
  right: 0;
2575
- background: rgba(0, 0, 0, 0.7);
2576
- padding: 8px;
2577
  color: white;
2578
- text-align: center;
 
 
2579
  }
2580
  .image-label {
2581
  font-size: 0.9rem;
2582
  font-weight: 500;
2583
  }
2584
- .gallery-item {
2585
- position: relative;
2586
- margin-bottom: 20px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2587
  }
2588
  `;
2589
  document.head.appendChild(style);
@@ -2774,7 +3284,7 @@
2774
  pdf => pdf.verification_score || 0
2775
  );
2776
  docScore = verificationScores.length > 0 ?
2777
- Math.round((verificationScores.reduce((a, b) => a + b, 0) / verificationScores.length) * 100) : 0;
2778
  }
2779
  updateScoreBar('documentBar', 'documentValue', docScore);
2780
 
 
1951
  const propertyForm = document.getElementById('propertyForm');
1952
  const loadingIndicator = document.getElementById('loadingIndicator');
1953
  const resultsContainer = document.getElementById('resultsContainer');
1954
+
1955
+ console.log("🚀 Starting form submission...");
1956
+
1957
  loadingIndicator.style.display = 'block';
1958
  resultsContainer.style.display = 'none';
1959
+
1960
  const formData = new FormData(propertyForm);
1961
+
1962
+ // Debug: Log form data
1963
+ console.log("📋 Form data being sent:");
1964
+ for (let [key, value] of formData.entries()) {
1965
+ console.log(` ${key}: ${value}`);
1966
+ }
1967
+
1968
+ console.log("🌐 Making request to /verify endpoint...");
1969
+
1970
  // Add images and PDFs from preview arrays if needed
1971
  fetch('/verify', {
1972
  method: 'POST',
1973
  body: formData
1974
  })
1975
+ .then(response => {
1976
+ console.log("📡 Response received:", response.status, response.statusText);
1977
+ return response.json();
1978
+ })
1979
  .then(data => {
1980
+ console.log("✅ Data received:", data);
1981
  loadingIndicator.style.display = 'none';
1982
  if (data.status === 'error' || data.error) {
1983
+ console.error("❌ Server error:", data.error);
1984
  showError(data.error || 'An error occurred. Please check your input and try again.');
1985
  return;
1986
  }
 
1988
  resultsContainer.style.display = 'block';
1989
  })
1990
  .catch(error => {
1991
+ console.error("❌ Fetch error:", error);
1992
  loadingIndicator.style.display = 'none';
1993
  showError('Server error: ' + (error.message || error));
1994
  });
 
2463
  documentDiv.innerHTML = '';
2464
 
2465
  if (data.document_analysis && data.document_analysis.pdf_count > 0) {
2466
+ // Calculate summary statistics
2467
+ let totalVerificationScore = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2468
  let authenticCount = 0;
2469
  let suspiciousCount = 0;
2470
  let incompleteCount = 0;
2471
+ let totalConfidence = 0;
2472
+ let totalDocumentConfidence = 0;
2473
+ let totalAuthenticityConfidence = 0;
2474
+ let documentsWithSignatures = 0;
2475
+ let documentsWithDates = 0;
2476
+ let propertyRelatedCount = 0;
2477
 
2478
+ data.document_analysis.pdf_analysis.forEach((pdf, index) => {
2479
+ if (pdf) {
2480
+ totalVerificationScore += pdf.verification_score || 0;
2481
+ totalConfidence += pdf.confidence || 0;
2482
+ totalDocumentConfidence += pdf.document_confidence || 0;
2483
+ totalAuthenticityConfidence += pdf.authenticity_confidence || 0;
2484
+
2485
+ if (pdf.contains_signatures) documentsWithSignatures++;
2486
+ if (pdf.contains_dates) documentsWithDates++;
2487
+ if (pdf.is_property_related) propertyRelatedCount++;
2488
+
2489
+ if (pdf.authenticity_assessment && pdf.authenticity_assessment.toLowerCase().includes('authentic')) {
2490
  authenticCount++;
2491
+ } else if (pdf.authenticity_assessment && pdf.authenticity_assessment.toLowerCase().includes('suspicious')) {
2492
  suspiciousCount++;
2493
  } else {
2494
  incompleteCount++;
2495
  }
2496
+ }
2497
  });
2498
 
2499
+ const avgVerificationScore = data.document_analysis.pdf_count > 0 ? (totalVerificationScore / data.document_analysis.pdf_count).toFixed(1) : 0;
2500
+ const avgConfidence = data.document_analysis.pdf_count > 0 ? (totalConfidence / data.document_analysis.pdf_count * 100).toFixed(1) : 0;
2501
+ const avgDocumentConfidence = data.document_analysis.pdf_count > 0 ? (totalDocumentConfidence / data.document_analysis.pdf_count * 100).toFixed(1) : 0;
2502
+ const avgAuthenticityConfidence = data.document_analysis.pdf_count > 0 ? (totalAuthenticityConfidence / data.document_analysis.pdf_count * 100).toFixed(1) : 0;
2503
+
2504
+ // Create summary section
2505
+ documentDiv.innerHTML = `
2506
+ <div class="document-summary">
2507
+ <h4>Document Analysis Summary</h4>
2508
+ <div class="summary-grid">
2509
+ <div class="summary-item">
2510
+ <h5>Total Documents</h5>
2511
+ <p class="summary-value">${data.document_analysis.pdf_count}</p>
2512
+ </div>
2513
+ <div class="summary-item">
2514
+ <h5>Property Related</h5>
2515
+ <p class="summary-value">${propertyRelatedCount} of ${data.document_analysis.pdf_count}</p>
2516
+ </div>
2517
+ <div class="summary-item">
2518
+ <h5>Avg Verification Score</h5>
2519
+ <p class="summary-value">${avgVerificationScore}%</p>
2520
+ </div>
2521
+ <div class="summary-item">
2522
+ <h5>Avg Confidence</h5>
2523
+ <p class="summary-value">${avgConfidence}%</p>
2524
+ </div>
2525
+ <div class="summary-item">
2526
+ <h5>With Signatures</h5>
2527
+ <p class="summary-value">${documentsWithSignatures}</p>
2528
+ </div>
2529
+ <div class="summary-item">
2530
+ <h5>With Dates</h5>
2531
+ <p class="summary-value">${documentsWithDates}</p>
2532
+ </div>
2533
+ </div>
2534
+ </div>
2535
+ `;
2536
+
2537
+ // Create detailed analysis for each document
2538
+ const documentsContainer = document.createElement('div');
2539
+ documentsContainer.className = 'documents-container';
2540
+ documentsContainer.innerHTML = '<h4>Detailed Document Analysis</h4>';
2541
+
2542
+ data.document_analysis.pdf_analysis.forEach((pdf, index) => {
2543
+ if (pdf) {
2544
+ const documentCard = document.createElement('div');
2545
+ documentCard.className = 'document-card';
2546
+
2547
+ // Create key information display
2548
+ let keyInfoHtml = '';
2549
+ if (pdf.key_info && Object.keys(pdf.key_info).length > 0) {
2550
+ keyInfoHtml = '<div class="key-info-section"><h6>Key Information:</h6><ul>';
2551
+ Object.entries(pdf.key_info).forEach(([key, value]) => {
2552
+ if (Array.isArray(value)) {
2553
+ keyInfoHtml += `<li><strong>${key}:</strong> ${value.slice(0, 3).join(', ')}${value.length > 3 ? '...' : ''}</li>`;
2554
+ } else {
2555
+ keyInfoHtml += `<li><strong>${key}:</strong> ${value}</li>`;
2556
+ }
2557
+ });
2558
+ keyInfoHtml += '</ul></div>';
2559
+ }
2560
+
2561
+ // Create real estate indicators display
2562
+ let indicatorsHtml = '';
2563
+ if (pdf.real_estate_indicators && pdf.real_estate_indicators.length > 0) {
2564
+ indicatorsHtml = '<div class="indicators-section"><h6>Real Estate Indicators:</h6><ul>';
2565
+ pdf.real_estate_indicators.slice(0, 5).forEach(indicator => {
2566
+ indicatorsHtml += `<li>${indicator}</li>`;
2567
+ });
2568
+ if (pdf.real_estate_indicators.length > 5) {
2569
+ indicatorsHtml += `<li>... and ${pdf.real_estate_indicators.length - 5} more</li>`;
2570
+ }
2571
+ indicatorsHtml += '</ul></div>';
2572
+ }
2573
+
2574
+ // Create legal terms display
2575
+ let legalTermsHtml = '';
2576
+ if (pdf.legal_terms_found && pdf.legal_terms_found.length > 0) {
2577
+ legalTermsHtml = '<div class="legal-terms-section"><h6>Legal Terms Found:</h6><ul>';
2578
+ pdf.legal_terms_found.slice(0, 5).forEach(term => {
2579
+ legalTermsHtml += `<li>${term}</li>`;
2580
+ });
2581
+ if (pdf.legal_terms_found.length > 5) {
2582
+ legalTermsHtml += `<li>... and ${pdf.legal_terms_found.length - 5} more</li>`;
2583
+ }
2584
+ legalTermsHtml += '</ul></div>';
2585
+ }
2586
+
2587
+ // Create keyword analysis display
2588
+ let keywordAnalysisHtml = '';
2589
+ if (pdf.keyword_analysis && Object.keys(pdf.keyword_analysis).length > 0) {
2590
+ keywordAnalysisHtml = '<div class="keyword-analysis-section"><h6>Keyword Analysis:</h6><ul>';
2591
+ Object.entries(pdf.keyword_analysis).forEach(([category, count]) => {
2592
+ keywordAnalysisHtml += `<li><strong>${category}:</strong> ${count} matches</li>`;
2593
+ });
2594
+ keywordAnalysisHtml += '</ul></div>';
2595
+ }
2596
+
2597
+ documentCard.innerHTML = `
2598
+ <div class="document-header">
2599
+ <h5>Document ${index + 1}</h5>
2600
+ <span class="status-badge ${pdf.is_property_related ? 'success' : 'warning'}">
2601
+ ${pdf.is_property_related ? 'Property Related' : 'Non-Property'}
2602
+ </span>
2603
+ </div>
2604
+ <div class="document-metrics">
2605
+ <div class="metric">
2606
+ <span class="metric-label">Document Type:</span>
2607
+ <span class="metric-value">${typeof pdf.document_type === 'object' ? (pdf.document_type.classification || 'Unknown') : (pdf.document_type || 'Unknown')}</span>
2608
+ </div>
2609
+ <div class="metric">
2610
+ <span class="metric-label">Type Confidence:</span>
2611
+ <span class="metric-value">${Math.round((typeof pdf.document_type === 'object' ? (pdf.document_type.confidence || 0) : (pdf.document_confidence || 0)) * 100)}%</span>
2612
+ </div>
2613
+ <div class="metric">
2614
+ <span class="metric-label">Authenticity:</span>
2615
+ <span class="metric-value">${typeof pdf.authenticity === 'object' ? (pdf.authenticity.assessment || 'Unknown') : (pdf.authenticity_assessment || 'Unknown')}</span>
2616
+ </div>
2617
+ <div class="metric">
2618
+ <span class="metric-label">Auth Confidence:</span>
2619
+ <span class="metric-value">${Math.round((typeof pdf.authenticity === 'object' ? (pdf.authenticity.confidence || 0) : (pdf.authenticity_confidence || 0)) * 100)}%</span>
2620
+ </div>
2621
+ <div class="metric">
2622
+ <span class="metric-label">Verification Score:</span>
2623
+ <span class="metric-value">${Math.round(pdf.verification_score || 0)}%</span>
2624
+ </div>
2625
+ <div class="metric">
2626
+ <span class="metric-label">Overall Confidence:</span>
2627
+ <span class="metric-value">${Math.round((pdf.confidence || 0) * 100)}%</span>
2628
+ </div>
2629
+ </div>
2630
+ <div class="document-details">
2631
+ <div class="detail-section">
2632
+ <h6>Summary:</h6>
2633
+ <p>${pdf.summary || 'No summary available'}</p>
2634
+ </div>
2635
+ <div class="detail-section">
2636
+ <h6>Document Features:</h6>
2637
+ <ul>
2638
+ <li><strong>Contains Signatures:</strong> ${pdf.contains_signatures ? 'Yes' : 'No'}</li>
2639
+ <li><strong>Contains Dates:</strong> ${pdf.contains_dates ? 'Yes' : 'No'}</li>
2640
+ </ul>
2641
+ </div>
2642
+ ${keyInfoHtml}
2643
+ ${indicatorsHtml}
2644
+ ${legalTermsHtml}
2645
+ ${keywordAnalysisHtml}
2646
+ </div>
2647
+ <div class="model-info">
2648
+ <small>Model: ${pdf.model_used || 'Static Analysis'}</small>
2649
+ </div>
2650
+ `;
2651
+
2652
+ documentsContainer.appendChild(documentCard);
2653
+ }
2654
+ });
2655
+
2656
+ documentDiv.appendChild(documentsContainer);
2657
+
2658
+ // Update Document Chart
2659
  updateChart(documentChart, {
2660
+ labels: ['Authentic', 'Suspicious', 'Incomplete'],
2661
  datasets: [{
2662
+ data: [authenticCount, suspiciousCount, incompleteCount],
2663
+ backgroundColor: ['#28a745', '#ffc107', '#dc3545']
 
 
 
2664
  }]
2665
  });
2666
+
2667
+ // Add CSS for document analysis
2668
+ const style = document.createElement('style');
2669
+ style.textContent = `
2670
+ .document-summary {
2671
+ margin-bottom: 30px;
2672
+ }
2673
+ .document-summary h4 {
2674
+ color: var(--primary);
2675
+ border-bottom: 2px solid var(--primary);
2676
+ padding-bottom: 5px;
2677
+ margin-bottom: 20px;
2678
+ }
2679
+ .documents-container {
2680
+ margin-top: 30px;
2681
+ }
2682
+ .documents-container h4 {
2683
+ color: var(--primary);
2684
+ margin-bottom: 20px;
2685
+ }
2686
+ .document-card {
2687
+ background: white;
2688
+ border-radius: var(--border-radius);
2689
+ box-shadow: var(--box-shadow);
2690
+ padding: 20px;
2691
+ margin-bottom: 20px;
2692
+ }
2693
+ .document-header {
2694
+ display: flex;
2695
+ justify-content: space-between;
2696
+ align-items: center;
2697
+ margin-bottom: 15px;
2698
+ }
2699
+ .document-header h5 {
2700
+ margin: 0;
2701
+ color: var(--dark);
2702
+ }
2703
+ .document-metrics {
2704
+ display: grid;
2705
+ grid-template-columns: repeat(2, 1fr);
2706
+ gap: 10px;
2707
+ margin-bottom: 20px;
2708
+ }
2709
+ .document-details {
2710
+ margin-bottom: 15px;
2711
+ }
2712
+ .detail-section {
2713
+ margin-bottom: 15px;
2714
+ }
2715
+ .detail-section h6 {
2716
+ color: var(--dark);
2717
+ margin-bottom: 8px;
2718
+ }
2719
+ .detail-section p {
2720
+ color: var(--gray);
2721
+ margin: 0;
2722
+ line-height: 1.5;
2723
+ }
2724
+ .detail-section ul {
2725
+ list-style: none;
2726
+ padding: 0;
2727
+ margin: 0;
2728
+ }
2729
+ .detail-section li {
2730
+ padding: 4px 0;
2731
+ color: var(--gray);
2732
+ }
2733
+ .key-info-section, .indicators-section, .legal-terms-section, .keyword-analysis-section {
2734
+ margin-bottom: 15px;
2735
+ }
2736
+ .key-info-section h6, .indicators-section h6, .legal-terms-section h6, .keyword-analysis-section h6 {
2737
+ color: var(--dark);
2738
+ margin-bottom: 8px;
2739
+ }
2740
+ .key-info-section ul, .indicators-section ul, .legal-terms-section ul, .keyword-analysis-section ul {
2741
+ list-style: none;
2742
+ padding: 0;
2743
+ margin: 0;
2744
+ }
2745
+ .key-info-section li, .indicators-section li, .legal-terms-section li, .keyword-analysis-section li {
2746
+ padding: 4px 0;
2747
+ font-size: 0.85rem;
2748
+ color: var(--gray);
2749
+ }
2750
+ `;
2751
+ document.head.appendChild(style);
2752
  } else {
2753
  documentDiv.innerHTML = '<p>No documents were uploaded for analysis.</p>';
2754
  }
 
2771
  nonRealEstateContainer.innerHTML = '<h4>Non-Real Estate Images</h4>';
2772
 
2773
  let propertyRelatedCount = 0;
2774
+ let totalConfidence = 0;
2775
+ let totalRealEstateConfidence = 0;
2776
+ let totalAuthenticityScore = 0;
2777
+ let aiGeneratedCount = 0;
2778
+
2779
  data.image_analysis.image_analysis.forEach((img, index) => {
2780
  if (img && img.is_property_related) {
2781
  propertyRelatedCount++;
2782
  }
2783
+ if (img) {
2784
+ totalConfidence += img.confidence || 0;
2785
+ totalRealEstateConfidence += img.real_estate_confidence || 0;
2786
+ totalAuthenticityScore += img.authenticity_score || 0;
2787
+ if (img.is_ai_generated) {
2788
+ aiGeneratedCount++;
2789
+ }
2790
+ }
2791
  });
2792
 
2793
+ const avgConfidence = data.image_analysis.image_count > 0 ? (totalConfidence / data.image_analysis.image_count * 100).toFixed(1) : 0;
2794
+ const avgRealEstateConfidence = data.image_analysis.image_count > 0 ? (totalRealEstateConfidence / data.image_analysis.image_count * 100).toFixed(1) : 0;
2795
+ const avgAuthenticityScore = data.image_analysis.image_count > 0 ? (totalAuthenticityScore / data.image_analysis.image_count * 100).toFixed(1) : 0;
2796
+
2797
  imageAnalysisDiv.innerHTML = `
2798
  <div class="analysis-summary">
2799
+ <div class="summary-grid">
2800
+ <div class="summary-item">
2801
+ <h5>Total Images Analyzed</h5>
2802
+ <p class="summary-value">${data.image_analysis.image_count}</p>
2803
+ </div>
2804
+ <div class="summary-item">
2805
+ <h5>Property-Related Images</h5>
2806
+ <p class="summary-value">${propertyRelatedCount} of ${data.image_analysis.image_count}</p>
2807
+ </div>
2808
+ <div class="summary-item">
2809
+ <h5>Average Confidence</h5>
2810
+ <p class="summary-value">${avgConfidence}%</p>
2811
+ </div>
2812
+ <div class="summary-item">
2813
+ <h5>Real Estate Confidence</h5>
2814
+ <p class="summary-value">${avgRealEstateConfidence}%</p>
2815
+ </div>
2816
+ <div class="summary-item">
2817
+ <h5>Authenticity Score</h5>
2818
+ <p class="summary-value">${avgAuthenticityScore}%</p>
2819
+ </div>
2820
+ <div class="summary-item">
2821
+ <h5>AI Generated</h5>
2822
+ <p class="summary-value">${aiGeneratedCount} images</p>
2823
+ </div>
2824
+ </div>
2825
+ <div class="model-info">
2826
+ <p><strong>Model Used:</strong> ${data.image_analysis.image_model_used ? data.image_analysis.image_model_used.join(', ') : 'Static Analysis'}</p>
2827
+ </div>
2828
  </div>
2829
  `;
2830
 
2831
+ // Display images with detailed analysis
2832
  data.images.forEach((imgData, index) => {
2833
  const imgAnalysis = data.image_analysis.image_analysis[index];
2834
  const galleryItem = document.createElement('div');
2835
  galleryItem.className = 'gallery-item';
2836
 
2837
+ // Create detailed analysis card
2838
+ const analysisCard = document.createElement('div');
2839
+ analysisCard.className = 'analysis-card';
2840
+
2841
+ // Create image container
2842
  const imageContainer = document.createElement('div');
2843
  imageContainer.className = 'image-container';
 
 
2844
  imageContainer.innerHTML = `
2845
  <img src="data:image/jpeg;base64,${imgData}" alt="Property Image ${index + 1}">
2846
  <div class="image-overlay">
2847
+ <div class="image-label">${imgAnalysis && imgAnalysis.predicted_label ? imgAnalysis.predicted_label : 'Unknown'}</div>
2848
+ <div class="confidence-badge">${imgAnalysis ? Math.round((imgAnalysis.confidence || 0) * 100) : 0}%</div>
 
2849
  </div>
2850
  `;
2851
 
2852
+ // Create analysis details
2853
+ const analysisDetails = document.createElement('div');
2854
+ analysisDetails.className = 'analysis-details';
2855
+
2856
+ if (imgAnalysis) {
2857
+ const isPropertyRelated = imgAnalysis.is_property_related ? 'Yes' : 'No';
2858
+ const isAiGenerated = imgAnalysis.is_ai_generated ? 'Yes' : 'No';
2859
+ const authenticityScore = Math.round((imgAnalysis.authenticity_score || 0) * 100);
2860
+ const realEstateConfidence = Math.round((imgAnalysis.real_estate_confidence || 0) * 100);
2861
+
2862
+ // Create top predictions list
2863
+ let topPredictionsHtml = '';
2864
+ if (imgAnalysis.top_predictions && imgAnalysis.top_predictions.length > 0) {
2865
+ topPredictionsHtml = '<div class="top-predictions"><h6>Top Predictions:</h6><ul>';
2866
+ imgAnalysis.top_predictions.slice(0, 3).forEach(pred => {
2867
+ const confidence = Math.round((pred.confidence || 0) * 100);
2868
+ topPredictionsHtml += `<li>${pred.label} (${confidence}%)</li>`;
2869
+ });
2870
+ topPredictionsHtml += '</ul></div>';
2871
+ }
2872
+
2873
+ // Create image quality info
2874
+ let qualityInfo = '';
2875
+ if (imgAnalysis.image_quality) {
2876
+ qualityInfo = `
2877
+ <div class="quality-info">
2878
+ <h6>Image Quality:</h6>
2879
+ <p>Resolution: ${imgAnalysis.image_quality.resolution || 'Unknown'}</p>
2880
+ <p>Quality Score: ${Math.round((imgAnalysis.image_quality.quality_score || 0) * 100)}%</p>
2881
+ </div>
2882
+ `;
2883
+ }
2884
+
2885
+ analysisDetails.innerHTML = `
2886
+ <div class="analysis-header">
2887
+ <h5>Image Analysis #${index + 1}</h5>
2888
+ <span class="status-badge ${imgAnalysis.is_property_related ? 'success' : 'warning'}">
2889
+ ${imgAnalysis.is_property_related ? 'Real Estate' : 'Non-Real Estate'}
2890
+ </span>
2891
+ </div>
2892
+ <div class="analysis-metrics">
2893
+ <div class="metric">
2894
+ <span class="metric-label">Confidence:</span>
2895
+ <span class="metric-value">${Math.round((imgAnalysis.confidence || 0) * 100)}%</span>
2896
+ </div>
2897
+ <div class="metric">
2898
+ <span class="metric-label">Real Estate Confidence:</span>
2899
+ <span class="metric-value">${realEstateConfidence}%</span>
2900
+ </div>
2901
+ <div class="metric">
2902
+ <span class="metric-label">Authenticity:</span>
2903
+ <span class="metric-value">${authenticityScore}%</span>
2904
+ </div>
2905
+ <div class="metric">
2906
+ <span class="metric-label">AI Generated:</span>
2907
+ <span class="metric-value">${isAiGenerated}</span>
2908
+ </div>
2909
+ </div>
2910
+ ${topPredictionsHtml}
2911
+ ${qualityInfo}
2912
+ <div class="model-info">
2913
+ <small>Model: ${imgAnalysis.model_used || 'Static Analysis'}</small>
2914
+ </div>
2915
+ `;
2916
+ } else {
2917
+ analysisDetails.innerHTML = '<p>Analysis not available</p>';
2918
+ }
2919
+
2920
+ analysisCard.appendChild(imageContainer);
2921
+ analysisCard.appendChild(analysisDetails);
2922
 
2923
  // Add to appropriate container based on classification
2924
  if (imgAnalysis && imgAnalysis.is_property_related) {
2925
+ realEstateContainer.appendChild(analysisCard);
2926
  } else {
2927
+ nonRealEstateContainer.appendChild(analysisCard);
2928
  }
2929
  });
2930
 
 
2932
  imageGallery.appendChild(realEstateContainer);
2933
  imageGallery.appendChild(nonRealEstateContainer);
2934
 
2935
+ // Add enhanced CSS for the new image display
2936
  const style = document.createElement('style');
2937
  style.textContent = `
2938
  .image-section {
 
2941
  .image-section h4 {
2942
  margin-bottom: 15px;
2943
  color: var(--primary);
2944
+ border-bottom: 2px solid var(--primary);
2945
+ padding-bottom: 5px;
2946
+ }
2947
+ .summary-grid {
2948
+ display: grid;
2949
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
2950
+ gap: 15px;
2951
+ margin-bottom: 20px;
2952
+ }
2953
+ .summary-item {
2954
+ background: #f8f9fa;
2955
+ padding: 15px;
2956
+ border-radius: var(--border-radius);
2957
+ text-align: center;
2958
+ }
2959
+ .summary-item h5 {
2960
+ font-size: 0.9rem;
2961
+ color: var(--gray);
2962
+ margin-bottom: 5px;
2963
+ }
2964
+ .summary-value {
2965
+ font-size: 1.2rem;
2966
+ font-weight: 600;
2967
+ color: var(--primary);
2968
+ margin: 0;
2969
+ }
2970
+ .analysis-card {
2971
+ background: white;
2972
+ border-radius: var(--border-radius);
2973
+ box-shadow: var(--box-shadow);
2974
+ overflow: hidden;
2975
+ margin-bottom: 20px;
2976
  }
2977
  .image-container {
2978
  position: relative;
2979
+ width: 100%;
2980
+ height: 200px;
2981
+ overflow: hidden;
2982
+ }
2983
+ .image-container img {
2984
  width: 100%;
2985
  height: 100%;
2986
+ object-fit: cover;
2987
  }
2988
  .image-overlay {
2989
  position: absolute;
2990
  bottom: 0;
2991
  left: 0;
2992
  right: 0;
2993
+ background: rgba(0, 0, 0, 0.8);
2994
+ padding: 10px;
2995
  color: white;
2996
+ display: flex;
2997
+ justify-content: space-between;
2998
+ align-items: center;
2999
  }
3000
  .image-label {
3001
  font-size: 0.9rem;
3002
  font-weight: 500;
3003
  }
3004
+ .confidence-badge {
3005
+ background: var(--primary);
3006
+ color: white;
3007
+ padding: 2px 8px;
3008
+ border-radius: 12px;
3009
+ font-size: 0.8rem;
3010
+ font-weight: 500;
3011
+ }
3012
+ .analysis-details {
3013
+ padding: 15px;
3014
+ }
3015
+ .analysis-header {
3016
+ display: flex;
3017
+ justify-content: space-between;
3018
+ align-items: center;
3019
+ margin-bottom: 15px;
3020
+ }
3021
+ .analysis-header h5 {
3022
+ margin: 0;
3023
+ color: var(--dark);
3024
+ }
3025
+ .status-badge {
3026
+ padding: 4px 12px;
3027
+ border-radius: 20px;
3028
+ font-size: 0.8rem;
3029
+ font-weight: 500;
3030
+ }
3031
+ .status-badge.success {
3032
+ background: #d4edda;
3033
+ color: #155724;
3034
+ }
3035
+ .status-badge.warning {
3036
+ background: #fff3cd;
3037
+ color: #856404;
3038
+ }
3039
+ .analysis-metrics {
3040
+ display: grid;
3041
+ grid-template-columns: repeat(2, 1fr);
3042
+ gap: 10px;
3043
+ margin-bottom: 15px;
3044
+ }
3045
+ .metric {
3046
+ display: flex;
3047
+ justify-content: space-between;
3048
+ padding: 8px;
3049
+ background: #f8f9fa;
3050
+ border-radius: 6px;
3051
+ }
3052
+ .metric-label {
3053
+ font-size: 0.9rem;
3054
+ color: var(--gray);
3055
+ }
3056
+ .metric-value {
3057
+ font-weight: 600;
3058
+ color: var(--dark);
3059
+ }
3060
+ .top-predictions {
3061
+ margin-bottom: 15px;
3062
+ }
3063
+ .top-predictions h6 {
3064
+ font-size: 0.9rem;
3065
+ color: var(--dark);
3066
+ margin-bottom: 8px;
3067
+ }
3068
+ .top-predictions ul {
3069
+ list-style: none;
3070
+ padding: 0;
3071
+ margin: 0;
3072
+ }
3073
+ .top-predictions li {
3074
+ padding: 4px 0;
3075
+ font-size: 0.85rem;
3076
+ color: var(--gray);
3077
+ }
3078
+ .quality-info {
3079
+ margin-bottom: 15px;
3080
+ }
3081
+ .quality-info h6 {
3082
+ font-size: 0.9rem;
3083
+ color: var(--dark);
3084
+ margin-bottom: 8px;
3085
+ }
3086
+ .quality-info p {
3087
+ font-size: 0.85rem;
3088
+ color: var(--gray);
3089
+ margin: 2px 0;
3090
+ }
3091
+ .model-info {
3092
+ text-align: right;
3093
+ }
3094
+ .model-info small {
3095
+ color: var(--gray);
3096
+ font-style: italic;
3097
  }
3098
  `;
3099
  document.head.appendChild(style);
 
3284
  pdf => pdf.verification_score || 0
3285
  );
3286
  docScore = verificationScores.length > 0 ?
3287
+ Math.round(verificationScores.reduce((a, b) => a + b, 0) / verificationScores.length) : 0;
3288
  }
3289
  updateScoreBar('documentBar', 'documentValue', docScore);
3290