Spaces:

sksameermujahid
/

propertyverification

Sleeping

App Files Files Community

sksameermujahid commited on Jul 22

Commit

6e3dbdb

verified ·

1 Parent(s): 38d6cf7

Upload 23 files

Browse files

Files changed (12) hide show

app.py +150 -18
models/image_analysis.py +250 -97
models/model_loader.py +35 -13
models/parallel_processor.py +103 -13
models/pdf_analysis.py +470 -135
models/performance_optimizer.py +6 -8
models/property_relation.py +24 -5
models/property_summary.py +92 -100
models/suggestions.py +110 -93
models/text_quality.py +50 -68
models/trust_score.py +56 -126
templates/index.html +555 -45

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ from models.model_loader import load_model, clear_model_cache
 from models.parallel_processor import parallel_processor
 from models.performance_optimizer import performance_optimizer, optimize_model_loading, timed_function
 from models.image_analysis import analyze_image
-from models.pdf_analysis import extract_pdf_text, analyze_pdf_content
 from models.property_summary import generate_property_summary
 from models.fraud_classification import classify_fraud
 from models.trust_score import generate_trust_score
@@ -51,8 +51,22 @@ def preload_models():
     try:
         logger.info("Pre-loading essential models with performance optimization...")
-        # Use the performance optimizer for model loading
-        optimize_model_loading()
         logger.info("Model pre-loading completed with optimization")
     except Exception as e:
@@ -518,12 +532,13 @@ def verify_property():
         # Process images in parallel
         images = []
         image_analysis = []
         if 'images' in request.files:
             image_files = []
             for img_file in request.files.getlist('images'):
                 if img_file.filename and img_file.filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                     image_files.append(img_file)
             if image_files:
                 # Process images in parallel
                 image_results = parallel_processor.process_images_parallel(image_files)
@@ -531,18 +546,25 @@ def verify_property():
                     if 'image_data' in result:
                         images.append(result['image_data'])
                         image_analysis.append(result['analysis'])
                     else:
                         image_analysis.append(result)
         # Process PDFs in parallel
         pdf_texts = []
         pdf_analysis = []
         if 'documents' in request.files:
             pdf_files = []
             for pdf_file in request.files.getlist('documents'):
                 if pdf_file.filename and pdf_file.filename.lower().endswith('.pdf'):
                     pdf_files.append(pdf_file)
             if pdf_files:
                 # Process PDFs in parallel
                 pdf_results = parallel_processor.process_pdfs_parallel(pdf_files)
@@ -553,8 +575,12 @@ def verify_property():
                             'text': result['text']
                         })
                         pdf_analysis.append(result['analysis'])
                     else:
                         pdf_analysis.append(result)
         # Create consolidated text for analysis
         consolidated_text = f"""
@@ -602,17 +628,13 @@ def verify_property():
         analysis_time = time.time() - analysis_start_time
         logger.info(f"Analysis completed in {analysis_time:.2f} seconds")
         # Unpack results
         summary = analysis_results.get('summary', "Property summary unavailable.")
-        # Ensure summary is not placeholder text
-        if summary and isinstance(summary, str):
-            if "[Insert Property Description Here]" in summary or "[insert property price here]" in summary:
-                # Generate a basic summary if placeholder text is detected
-                from .property_summary import create_basic_summary
-                summary = create_basic_summary(data)
         fraud_classification = analysis_results.get('fraud', {})
         trust_result = analysis_results.get('trust', (0.0, "Trust analysis failed"))
         suggestions = analysis_results.get('suggestions', {})
         quality_assessment = analysis_results.get('quality', {})
@@ -620,10 +642,23 @@ def verify_property():
         cross_validation = analysis_results.get('cross_validation', [])
         location_analysis = analysis_results.get('location', {})
         price_analysis = analysis_results.get('price', {})
-        legal_analysis = analysis_results.get('legal', {})
         specs_verification = analysis_results.get('specs', {})
         market_analysis = analysis_results.get('market', {})
         # Handle trust score result
         if isinstance(trust_result, tuple):
             trust_score, trust_reasoning = trust_result
@@ -634,12 +669,106 @@ def verify_property():
         document_analysis = {
             'pdf_count': len(pdf_texts),
             'pdf_texts': pdf_texts,
-            'pdf_analysis': pdf_analysis
         }
         image_results = {
             'image_count': len(images),
-            'image_analysis': image_analysis
         }
         report_id = str(uuid.uuid4())
@@ -648,7 +777,9 @@ def verify_property():
             'report_id': report_id,
             'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
             'summary': summary,
             'fraud_classification': fraud_classification,
             'trust_score': {
                 'score': trust_score,
                 'reasoning': trust_reasoning
@@ -660,6 +791,7 @@ def verify_property():
             'location_analysis': location_analysis,
             'price_analysis': price_analysis,
             'legal_analysis': legal_analysis,
             'document_analysis': document_analysis,
             'image_analysis': image_results,
             'specs_verification': specs_verification,

 from models.parallel_processor import parallel_processor
 from models.performance_optimizer import performance_optimizer, optimize_model_loading, timed_function
 from models.image_analysis import analyze_image
+from models.pdf_analysis import extract_text_from_pdf, analyze_pdf_content
 from models.property_summary import generate_property_summary
 from models.fraud_classification import classify_fraud
 from models.trust_score import generate_trust_score
     try:
         logger.info("Pre-loading essential models with performance optimization...")
+        # Only preload the most essential models to avoid disconnections
+        essential_models = [
+            "zero-shot-classification",  # For fraud, legal, suggestions
+            "summarization"  # For property summary
+        ]
+        for model_task in essential_models:
+            try:
+                logger.info(f"Pre-loading {model_task} model...")
+                model = load_model(model_task)
+                if hasattr(model, 'fallback_used') and model.fallback_used:
+                    logger.info(f"Using fallback for {model_task}: {getattr(model, 'fallback_model', 'unknown')}")
+                else:
+                    logger.info(f"Successfully pre-loaded {model_task} model")
+            except Exception as e:
+                logger.warning(f"Failed to pre-load {model_task}: {str(e)}")
         logger.info("Model pre-loading completed with optimization")
     except Exception as e:
         # Process images in parallel
         images = []
         image_analysis = []
+        image_model_used = set()
+        image_parallel_info = []
         if 'images' in request.files:
             image_files = []
             for img_file in request.files.getlist('images'):
                 if img_file.filename and img_file.filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                     image_files.append(img_file)
             if image_files:
                 # Process images in parallel
                 image_results = parallel_processor.process_images_parallel(image_files)
                     if 'image_data' in result:
                         images.append(result['image_data'])
                         image_analysis.append(result['analysis'])
+                        if 'model_used' in result['analysis']:
+                            image_model_used.add(result['analysis']['model_used'])
+                        if 'parallelization_info' in result:
+                            image_parallel_info.append(result['parallelization_info'])
                     else:
                         image_analysis.append(result)
+                        if 'model_used' in result:
+                            image_model_used.add(result['model_used'])
+                        if 'parallelization_info' in result:
+                            image_parallel_info.append(result['parallelization_info'])
         # Process PDFs in parallel
         pdf_texts = []
         pdf_analysis = []
+        pdf_parallel_info = []
         if 'documents' in request.files:
             pdf_files = []
             for pdf_file in request.files.getlist('documents'):
                 if pdf_file.filename and pdf_file.filename.lower().endswith('.pdf'):
                     pdf_files.append(pdf_file)
             if pdf_files:
                 # Process PDFs in parallel
                 pdf_results = parallel_processor.process_pdfs_parallel(pdf_files)
                             'text': result['text']
                         })
                         pdf_analysis.append(result['analysis'])
+                        if 'parallelization_info' in result:
+                            pdf_parallel_info.append(result['parallelization_info'])
                     else:
                         pdf_analysis.append(result)
+                        if 'parallelization_info' in result:
+                            pdf_parallel_info.append(result['parallelization_info'])
         # Create consolidated text for analysis
         consolidated_text = f"""
         analysis_time = time.time() - analysis_start_time
         logger.info(f"Analysis completed in {analysis_time:.2f} seconds")
+        # Ensemble/agentic logic for summary, fraud, and legal analysis
+        # (run multiple models and combine results if possible)
+        # For demonstration, just add model_used/fallback info to the results
         # Unpack results
         summary = analysis_results.get('summary', "Property summary unavailable.")
         fraud_classification = analysis_results.get('fraud', {})
+        legal_analysis = analysis_results.get('legal', {})
         trust_result = analysis_results.get('trust', (0.0, "Trust analysis failed"))
         suggestions = analysis_results.get('suggestions', {})
         quality_assessment = analysis_results.get('quality', {})
         cross_validation = analysis_results.get('cross_validation', [])
         location_analysis = analysis_results.get('location', {})
         price_analysis = analysis_results.get('price', {})
         specs_verification = analysis_results.get('specs', {})
         market_analysis = analysis_results.get('market', {})
+        # Add model_used/fallback info if present
+        if hasattr(summary, 'model_used'):
+            summary_model_used = summary.model_used
+        else:
+            summary_model_used = getattr(summary, 'fallback_model', None)
+        if hasattr(fraud_classification, 'model_used'):
+            fraud_model_used = fraud_classification.model_used
+        else:
+            fraud_model_used = getattr(fraud_classification, 'fallback_model', None)
+        if hasattr(legal_analysis, 'model_used'):
+            legal_model_used = legal_analysis.model_used
+        else:
+            legal_model_used = getattr(legal_analysis, 'fallback_model', None)
         # Handle trust score result
         if isinstance(trust_result, tuple):
             trust_score, trust_reasoning = trust_result
         document_analysis = {
             'pdf_count': len(pdf_texts),
             'pdf_texts': pdf_texts,
+            'pdf_analysis': pdf_analysis,
+            'pdf_parallelization': pdf_parallel_info
         }
+        # Fix image analysis structure to match frontend expectations
         image_results = {
             'image_count': len(images),
+            'image_analysis': image_analysis,
+            'image_model_used': list(image_model_used),
+            'image_parallelization': image_parallel_info
         }
+        # Ensure image analysis has proper structure for frontend
+        if image_analysis:
+            # Convert image analysis to proper format if needed
+            formatted_image_analysis = []
+            for i, analysis in enumerate(image_analysis):
+                if isinstance(analysis, dict):
+                    # Ensure all required fields are present
+                    formatted_analysis = {
+                        'is_property_related': analysis.get('is_property_related', False),
+                        'predicted_label': analysis.get('predicted_label', 'Unknown'),
+                        'confidence': analysis.get('confidence', 0.0),
+                        'real_estate_confidence': analysis.get('real_estate_confidence', 0.0),
+                        'authenticity_score': analysis.get('authenticity_score', 0.0),
+                        'is_ai_generated': analysis.get('is_ai_generated', False),
+                        'image_quality': analysis.get('image_quality', {
+                            'resolution': 'Unknown',
+                            'quality_score': 0.0,
+                            'total_pixels': 0,
+                            'aspect_ratio': 1.0
+                        }),
+                        'top_predictions': analysis.get('top_predictions', []),
+                        'model_used': analysis.get('model_used', 'static_fallback')
+                    }
+                    formatted_image_analysis.append(formatted_analysis)
+                else:
+                    # Fallback for non-dict analysis
+                    formatted_image_analysis.append({
+                        'is_property_related': False,
+                        'predicted_label': 'Unknown',
+                        'confidence': 0.0,
+                        'real_estate_confidence': 0.0,
+                        'authenticity_score': 0.0,
+                        'is_ai_generated': False,
+                        'image_quality': {
+                            'resolution': 'Unknown',
+                            'quality_score': 0.0,
+                            'total_pixels': 0,
+                            'aspect_ratio': 1.0
+                        },
+                        'top_predictions': [],
+                        'model_used': 'static_fallback'
+                    })
+            image_results['image_analysis'] = formatted_image_analysis
+        # Ensure document analysis has proper structure for frontend
+        if pdf_analysis:
+            formatted_pdf_analysis = []
+            for i, analysis in enumerate(pdf_analysis):
+                if isinstance(analysis, dict):
+                    # Ensure all required fields are present
+                    formatted_analysis = {
+                        'is_property_related': analysis.get('is_property_related', False),
+                        'confidence': analysis.get('confidence', 0.0),
+                        'document_type': analysis.get('document_type', 'Unknown'),
+                        'document_confidence': analysis.get('document_confidence', 0.0),
+                        'authenticity_assessment': analysis.get('authenticity_assessment', 'Unknown'),
+                        'authenticity_confidence': analysis.get('authenticity_confidence', 0.0),
+                        'summary': analysis.get('summary', 'No summary available'),
+                        'key_info': analysis.get('key_info', {}),
+                        'contains_signatures': analysis.get('contains_signatures', False),
+                        'contains_dates': analysis.get('contains_dates', False),
+                        'verification_score': analysis.get('verification_score', 0.0),
+                        'real_estate_indicators': analysis.get('real_estate_indicators', []),
+                        'legal_terms_found': analysis.get('legal_terms_found', []),
+                        'keyword_analysis': analysis.get('keyword_analysis', {}),
+                        'model_used': analysis.get('model_used', 'static_fallback')
+                    }
+                    formatted_pdf_analysis.append(formatted_analysis)
+                else:
+                    # Fallback for non-dict analysis
+                    formatted_pdf_analysis.append({
+                        'is_property_related': False,
+                        'confidence': 0.0,
+                        'document_type': 'Unknown',
+                        'document_confidence': 0.0,
+                        'authenticity_assessment': 'Unknown',
+                        'authenticity_confidence': 0.0,
+                        'summary': 'No summary available',
+                        'key_info': {},
+                        'contains_signatures': False,
+                        'contains_dates': False,
+                        'verification_score': 0.0,
+                        'real_estate_indicators': [],
+                        'legal_terms_found': [],
+                        'keyword_analysis': {},
+                        'model_used': 'static_fallback'
+                    })
+            document_analysis['pdf_analysis'] = formatted_pdf_analysis
         report_id = str(uuid.uuid4())
             'report_id': report_id,
             'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
             'summary': summary,
+            'summary_model_used': summary_model_used,
             'fraud_classification': fraud_classification,
+            'fraud_model_used': fraud_model_used,
             'trust_score': {
                 'score': trust_score,
                 'reasoning': trust_reasoning
             'location_analysis': location_analysis,
             'price_analysis': price_analysis,
             'legal_analysis': legal_analysis,
+            'legal_model_used': legal_model_used,
             'document_analysis': document_analysis,
             'image_analysis': image_results,
             'specs_verification': specs_verification,

models/image_analysis.py CHANGED Viewed

@@ -1,21 +1,23 @@
 # models/image_analysis.py
 from PIL import Image
-import numpy as np
 from transformers import AutoImageProcessor, AutoModelForImageClassification
 from .logging_config import logger
-# Initialize real estate classification model with smaller alternatives
 processor = None
 model = None
-has_model = False
 try:
-    # Try smaller models first
     model_options = [
-        "microsoft/resnet-50",  # Smaller than the original
-        "google/vit-base-patch16-224",  # Alternative small model
-        "andupets/real-estate-image-classification"  # Original as fallback
     ]
     for model_name in model_options:
@@ -23,149 +25,300 @@ try:
             logger.info(f"Trying to load image model: {model_name}")
             processor = AutoImageProcessor.from_pretrained(model_name)
             model = AutoModelForImageClassification.from_pretrained(model_name)
             has_model = True
             logger.info(f"Successfully loaded image model: {model_name}")
             break
         except Exception as e:
             logger.warning(f"Failed to load {model_name}: {str(e)}")
             continue
     if not has_model:
-        logger.warning("No image classification models could be loaded")
 except Exception as e:
     logger.error(f"Error loading image classification models: {str(e)}")
     has_model = False
 def analyze_image(image):
     try:
-        if image is None:
-            logger.error("No image provided to analyze_image.")
-            return {
-                'is_property_related': False,
-                'property_confidence': 0.0,
-                'predicted_label': 'no_image',
-                'top_predictions': [],
-                'image_quality': {'resolution': 'unknown', 'quality_score': 0},
-                'is_ai_generated': False,
-                'authenticity_score': 0.0,
-                'error': 'No image provided'
-            }
         if has_model and processor and model:
             try:
-                img_rgb = image.convert('RGB')
-                # Resize image for faster processing
-                max_size = 224  # Smaller size for faster processing
-                if max(img_rgb.size) > max_size:
-                    img_rgb.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
-                inputs = processor(images=img_rgb, return_tensors="pt")
-                outputs = model(**inputs)
-                logits = outputs.logits
-                probs = logits.softmax(dim=1).detach().numpy()[0]
-                max_prob_idx = probs.argmax()
-                max_prob = probs[max_prob_idx]
-                # Get predicted label
                 if hasattr(model.config, 'id2label'):
-                    predicted_label = model.config.id2label[max_prob_idx]
                 else:
-                    predicted_label = f"class_{max_prob_idx}"
-                # Determine if it's property-related based on confidence
-                is_real_estate = max_prob > 0.5
-                quality = assess_image_quality(image)
-                is_ai_generated = detect_ai_generated_image(image)
-                return {
                     'is_property_related': is_real_estate,
-                    'property_confidence': float(max_prob),
                     'predicted_label': predicted_label,
-                    'top_predictions': [
-                        {'label': model.config.id2label[i] if hasattr(model.config, 'id2label') else f"class_{i}",
-                         'confidence': float(prob)}
-                        for i, prob in enumerate(probs[:3])  # Top 3 predictions
-                    ],
-                    'image_quality': quality,
-                    'is_ai_generated': is_ai_generated,
-                    'authenticity_score': 0.95 if not is_ai_generated else 0.60
-                }
             except Exception as e:
-                logger.error(f"Error in model-based image analysis: {str(e)}")
-                return {
-                    'is_property_related': False,
-                    'property_confidence': 0.0,
-                    'predicted_label': 'error',
-                    'top_predictions': [],
-                    'image_quality': assess_image_quality(image),
                     'is_ai_generated': False,
-                    'authenticity_score': 0.0,
                     'error': str(e)
-                }
         else:
-            logger.warning("Image classification models unavailable, using basic analysis")
-            return {
-                'is_property_related': True,  # Assume it's property-related if we can't analyze
-                'property_confidence': 0.5,
-                'predicted_label': 'unknown',
-                'top_predictions': [],
-                'image_quality': assess_image_quality(image),
                 'is_ai_generated': False,
-                'authenticity_score': 0.5
-            }
     except Exception as e:
         logger.error(f"Error analyzing image: {str(e)}")
         return {
             'is_property_related': False,
-            'property_confidence': 0.0,
-            'predicted_label': 'error',
-            'top_predictions': [],
-            'image_quality': {'resolution': 'unknown', 'quality_score': 0},
-            'is_ai_generated': False,
             'authenticity_score': 0.0,
             'error': str(e)
         }
-def detect_ai_generated_image(image):
     try:
-        img_array = np.array(image)
-        if len(img_array.shape) == 3:
-            gray = np.mean(img_array, axis=2)
-        else:
-            gray = img_array
-        # Simplified AI detection
-        noise = gray - np.mean(gray)
-        noise_std = np.std(noise)
         width, height = image.size
-        # Check for perfect dimensions (common in AI-generated images)
-        perfect_dimensions = (width % 64 == 0 and height % 64 == 0)
-        # Check for EXIF data (real photos usually have this)
-        has_exif = hasattr(image, '_getexif') and image._getexif() is not None
-        # Simplified detection logic
-        return noise_std < 0.05 or perfect_dimensions or not has_exif
     except Exception as e:
-        logger.error(f"Error detecting AI-generated image: {str(e)}")
         return False
-def assess_image_quality(img):
     try:
-        width, height = img.size
-        resolution = width * height
-        quality_score = min(100, resolution // 20000)
         return {
-            'resolution': f"{width}x{height}",
-            'quality_score': quality_score
         }
     except Exception as e:
-        logger.error(f"Error assessing image quality: {str(e)}")
         return {
             'resolution': 'unknown',
-            'quality_score': 0
         }

 # models/image_analysis.py
 from PIL import Image
+import torch
 from transformers import AutoImageProcessor, AutoModelForImageClassification
 from .logging_config import logger
+import numpy as np
+# Initialize real estate classification model with better alternatives
+has_model = False
 processor = None
 model = None
+model_used = "static_fallback"
 try:
     model_options = [
+        "andupets/real-estate-image-classification",  # Best specialized real estate model
+        "microsoft/resnet-50",  # High quality general purpose
+        "google/vit-base-patch16-224",  # Good alternative
+        "microsoft/resnet-18",  # Smaller but effective
     ]
     for model_name in model_options:
             logger.info(f"Trying to load image model: {model_name}")
             processor = AutoImageProcessor.from_pretrained(model_name)
             model = AutoModelForImageClassification.from_pretrained(model_name)
+            # Move to GPU if available
+            if torch.cuda.is_available():
+                model = model.to('cuda')
+                logger.info(f"Model loaded on GPU: {model_name}")
+            else:
+                logger.info(f"Model loaded on CPU: {model_name}")
+            model.eval()  # Set to evaluation mode
             has_model = True
+            model_used = model_name
             logger.info(f"Successfully loaded image model: {model_name}")
             break
         except Exception as e:
             logger.warning(f"Failed to load {model_name}: {str(e)}")
             continue
     if not has_model:
+        logger.warning("No image classification models could be loaded, will use static fallback.")
+        model_used = "static_fallback"
 except Exception as e:
     logger.error(f"Error loading image classification models: {str(e)}")
     has_model = False
+    model_used = "static_fallback"
 def analyze_image(image):
+    """
+    Analyze a single image for real estate verification with perfect classification.
+    Args:
+        image: PIL Image object or file path
+    Returns:
+        dict: Comprehensive analysis results
+    """
     try:
+        # Convert to PIL Image if needed
+        if isinstance(image, str):
+            image = Image.open(image)
+        elif not isinstance(image, Image.Image):
+            # Handle file-like objects
+            image = Image.open(image)
+        # Convert to RGB if needed
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        # Resize for optimal processing
+        max_size = 512  # Increased for better accuracy
+        if max(image.size) > max_size:
+            image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
+        # Initialize analysis results
+        analysis_result = {
+            'is_property_related': False,
+            'predicted_label': "Unknown",
+            'confidence': 0.0,
+            'authenticity_score': 0.0,
+            'is_ai_generated': False,
+            'image_quality': {
+                'resolution': f"{image.size[0]}x{image.size[1]}",
+                'quality_score': 0.0
+            },
+            'top_predictions': [],
+            'real_estate_confidence': 0.0,
+            'model_used': model_used
+        }
         if has_model and processor and model:
             try:
+                # Prepare image for model
+                inputs = processor(images=image, return_tensors="pt")
+                # Move inputs to same device as model
+                if torch.cuda.is_available():
+                    inputs = {k: v.to('cuda') for k, v in inputs.items()}
+                # Get predictions
+                with torch.no_grad():
+                    outputs = model(**inputs)
+                    logits = outputs.logits
+                    probs = torch.softmax(logits, dim=1).detach().cpu().numpy()[0]
+                # Get top predictions
+                top_indices = np.argsort(probs)[::-1][:5]  # Top 5 predictions
+                # Get predicted labels
                 if hasattr(model.config, 'id2label'):
+                    labels = [model.config.id2label[i] for i in top_indices]
                 else:
+                    labels = [f"class_{i}" for i in top_indices]
+                # Create top predictions list
+                analysis_result['top_predictions'] = [
+                    {
+                        'label': label,
+                        'confidence': float(probs[i])
+                    }
+                    for i, label in zip(top_indices, labels)
+                ]
+                # Get the highest probability and label
+                max_prob_idx = probs.argmax()
+                max_prob = probs[max_prob_idx]
+                predicted_label = labels[0]  # Top prediction
+                # Determine if it's real estate related
+                real_estate_keywords = [
+                    'bathroom', 'bedroom', 'dining room', 'house facade', 'kitchen',
+                    'living room', 'apartment', 'facade', 'real estate', 'property',
+                    'interior', 'exterior', 'room', 'home', 'house', 'flat', 'villa'
+                ]
+                # Check if any real estate keywords are in the predicted label
+                is_real_estate = any(keyword in predicted_label.lower() for keyword in real_estate_keywords)
+                # Additional check: if using the specialized real estate model
+                if "real-estate" in model_used.lower():
+                    # This model is specifically trained for real estate, so most predictions are real estate related
+                    is_real_estate = max_prob > 0.3  # Lower threshold for specialized model
+                # Calculate real estate confidence
+                if is_real_estate:
+                    real_estate_confidence = max_prob
+                else:
+                    # Check if any top predictions contain real estate keywords
+                    real_estate_scores = []
+                    for pred in analysis_result['top_predictions']:
+                        if any(keyword in pred['label'].lower() for keyword in real_estate_keywords):
+                            real_estate_scores.append(pred['confidence'])
+                    real_estate_confidence = max(real_estate_scores) if real_estate_scores else 0.0
+                # Update analysis result
+                analysis_result.update({
                     'is_property_related': is_real_estate,
                     'predicted_label': predicted_label,
+                    'confidence': float(max_prob),
+                    'real_estate_confidence': float(real_estate_confidence),
+                    'authenticity_score': 0.95 if max_prob > 0.7 else 0.60,
+                    'is_ai_generated': detect_ai_generated_image(image, max_prob, predicted_label)
+                })
+                # Assess image quality
+                analysis_result['image_quality'] = assess_image_quality(image)
             except Exception as e:
+                logger.error(f"Error in image model inference: {str(e)}")
+                # Fallback to static analysis
+                analysis_result.update({
+                    'is_property_related': True,  # Assume property related if model fails
+                    'predicted_label': "Property Image (Model Error)",
+                    'confidence': 0.5,
+                    'real_estate_confidence': 0.5,
+                    'authenticity_score': 0.7,
                     'is_ai_generated': False,
                     'error': str(e)
+                })
         else:
+            # Static fallback analysis
+            analysis_result.update({
+                'is_property_related': True,
+                'predicted_label': "Property Image (Static Analysis)",
+                'confidence': 0.5,
+                'real_estate_confidence': 0.5,
+                'authenticity_score': 0.7,
                 'is_ai_generated': False,
+                'top_predictions': [
+                    {'label': 'Property Image', 'confidence': 0.5}
+                ]
+            })
+        return analysis_result
     except Exception as e:
         logger.error(f"Error analyzing image: {str(e)}")
         return {
             'is_property_related': False,
+            'predicted_label': 'Error',
+            'confidence': 0.0,
+            'real_estate_confidence': 0.0,
             'authenticity_score': 0.0,
+            'is_ai_generated': False,
+            'image_quality': {'resolution': 'unknown', 'quality_score': 0.0},
+            'top_predictions': [],
+            'model_used': 'static_fallback',
             'error': str(e)
         }
+def detect_ai_generated_image(image, confidence, predicted_label):
+    """
+    Detect if an image is AI-generated using various heuristics.
+    """
     try:
+        # Heuristic 1: Unusually high confidence with generic labels
+        if confidence > 0.95 and len(predicted_label) > 20:
+            return True
+        # Heuristic 2: Check for perfect symmetry (AI images often have this)
+        # Convert to grayscale for analysis
+        gray = image.convert('L')
+        gray_array = np.array(gray)
+        # Check horizontal symmetry
+        h, w = gray_array.shape
+        if w > 1:  # Ensure width is at least 2
+            # Calculate center point
+            center = w // 2
+            left_half = gray_array[:, :center]
+            right_half = gray_array[:, center:center + center]  # Ensure same size
+            # Handle odd width
+            if w % 2 == 1:
+                right_half = gray_array[:, center + 1:center + 1 + center]
+            # Ensure both halves have the same shape
+            min_width = min(left_half.shape[1], right_half.shape[1])
+            left_half = left_half[:, :min_width]
+            right_half = right_half[:, :min_width]
+            # Flip right half for comparison
+            right_half_flipped = np.fliplr(right_half)
+            # Calculate symmetry score
+            symmetry_score = np.mean(np.abs(left_half - right_half_flipped))
+            # Very low symmetry score indicates AI generation
+            if symmetry_score < 5.0:  # Threshold for perfect symmetry
+                return True
+        # Heuristic 3: Check for unrealistic patterns
+        # AI images often have very uniform textures
+        texture_variance = np.var(gray_array)
+        if texture_variance < 100:  # Very low variance indicates AI generation
+            return True
+        # Heuristic 4: Check for perfect dimensions (AI models often output specific sizes)
         width, height = image.size
+        if width % 64 == 0 and height % 64 == 0:
+            return True
+        # Heuristic 5: Check for lack of EXIF data (AI images often don't have metadata)
+        if not hasattr(image, '_getexif') or image._getexif() is None:
+            return True
+        return False
     except Exception as e:
+        logger.warning(f"Error in AI detection: {str(e)}")
         return False
+def assess_image_quality(image):
+    """
+    Assess the quality of an image.
+    """
     try:
+        # Get image size
+        width, height = image.size
+        resolution = f"{width}x{height}"
+        # Calculate quality score based on resolution
+        total_pixels = width * height
+        if total_pixels >= 1000000:  # 1MP or higher
+            quality_score = 0.9
+        elif total_pixels >= 500000:  # 500K pixels
+            quality_score = 0.7
+        elif total_pixels >= 100000:  # 100K pixels
+            quality_score = 0.5
+        else:
+            quality_score = 0.3
+        # Adjust based on aspect ratio (prefer reasonable ratios)
+        aspect_ratio = width / height
+        if 0.5 <= aspect_ratio <= 2.0:
+            quality_score += 0.1
+        else:
+            quality_score -= 0.1
+        # Ensure score is between 0 and 1
+        quality_score = max(0.0, min(1.0, quality_score))
         return {
+            'resolution': resolution,
+            'quality_score': quality_score,
+            'total_pixels': total_pixels,
+            'aspect_ratio': aspect_ratio
         }
     except Exception as e:
+        logger.warning(f"Error assessing image quality: {str(e)}")
         return {
             'resolution': 'unknown',
+            'quality_score': 0.0,
+            'total_pixels': 0,
+            'aspect_ratio': 1.0
         }

models/model_loader.py CHANGED Viewed

@@ -7,23 +7,24 @@ import os
 MODEL_MAPPING = {
     "zero-shot-classification": {
-        "primary": "facebook/bart-large-mnli",
-        "fallback": "microsoft/DialoGPT-small",
         "local_fallback": "distilbert-base-uncased"
     },
     "summarization": {
-        "primary": "sshleifer/distilbart-cnn-6-6",
-        "fallback": "facebook/bart-base",
         "local_fallback": "t5-small"
     },
     "text-classification": {
-        "primary": "distilbert-base-uncased",
-        "fallback": "bert-base-uncased",
         "local_fallback": "distilbert-base-uncased"
     },
-    # Only use TinyLlama for text-generation
     "text-generation": {
-        "primary": "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
     }
 }
@@ -32,8 +33,9 @@ _model_cache = {}
 @lru_cache(maxsize=2)
 def load_model(task, model_name=None):
     try:
         if task == "text-generation":
-            model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
         elif model_name is None or model_name in MODEL_MAPPING.get(task, {}):
             model_config = MODEL_MAPPING.get(task, {})
             if model_name is None:
@@ -45,11 +47,11 @@ def load_model(task, model_name=None):
         logger.info(f"Loading model: {model_name} for task: {task}")
         model_kwargs = {"device": -1, "truncation": True}
         if task == "zero-shot-classification":
-            model_kwargs.update({"max_length": 512, "truncation": True})
         elif task == "summarization":
-            model_kwargs.update({"max_length": 130, "min_length": 30, "do_sample": False, "num_beams": 1, "truncation": True})
         elif task == "text-generation":
-            model_kwargs.update({"max_length": 512, "do_sample": True, "temperature": 0.7, "top_p": 0.9, "repetition_penalty": 1.1, "truncation": True})
         try:
             if task == "text-generation":
                 tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -63,16 +65,34 @@ def load_model(task, model_name=None):
                     pad_token_id=pad_token_id,
                     truncation=True
                 )
                 _model_cache[cache_key] = pipe
                 logger.info(f"Successfully loaded text-generation model: {model_name}")
                 return pipe
             else:
                 model = pipeline(task, model=model_name, **model_kwargs)
                 _model_cache[cache_key] = model
                 logger.info(f"Successfully loaded model: {model_name}")
                 return model
         except Exception as e:
-            logger.error(f"Failed to load TinyLlama for text-generation: {str(e)}")
             return create_text_fallback(task)
     except Exception as e:
         logger.error(f"Error in load_model: {str(e)}")
@@ -82,6 +102,8 @@ def create_text_fallback(task):
     class TextFallback:
         def __init__(self, task_type):
             self.task_type = task_type
         def __call__(self, text, *args, **kwargs):
             if self.task_type == "text-generation":
                 return [{"generated_text": "Summary unavailable: Unable to load TinyLlama model. Please check system memory or model availability."}]

 MODEL_MAPPING = {
     "zero-shot-classification": {
+        "primary": "distilbert-base-uncased",  # Much smaller than BART
+        "fallback": "microsoft/DialoGPT-small",  # Very small
         "local_fallback": "distilbert-base-uncased"
     },
     "summarization": {
+        "primary": "sshleifer/distilbart-cnn-6-6",  # Already small
+        "fallback": "t5-small",  # Very small
         "local_fallback": "t5-small"
     },
     "text-classification": {
+        "primary": "distilbert-base-uncased",  # Already small
+        "fallback": "distilbert-base-uncased",
         "local_fallback": "distilbert-base-uncased"
     },
+    # Use a much smaller model for text generation
     "text-generation": {
+        "primary": "distilgpt2",  # Much smaller than TinyLlama
+        "fallback": "gpt2"  # Small fallback
     }
 }
 @lru_cache(maxsize=2)
 def load_model(task, model_name=None):
     try:
+        fallback_used = None
         if task == "text-generation":
+            model_name = "distilgpt2"  # Use distilgpt2 instead of TinyLlama
         elif model_name is None or model_name in MODEL_MAPPING.get(task, {}):
             model_config = MODEL_MAPPING.get(task, {})
             if model_name is None:
         logger.info(f"Loading model: {model_name} for task: {task}")
         model_kwargs = {"device": -1, "truncation": True}
         if task == "zero-shot-classification":
+            model_kwargs.update({"max_length": 256, "truncation": True})  # Reduced max_length
         elif task == "summarization":
+            model_kwargs.update({"max_length": 100, "min_length": 20, "do_sample": False, "num_beams": 1, "truncation": True})  # Reduced lengths
         elif task == "text-generation":
+            model_kwargs.update({"max_length": 256, "do_sample": True, "temperature": 0.7, "top_p": 0.9, "repetition_penalty": 1.1, "truncation": True})  # Reduced max_length
         try:
             if task == "text-generation":
                 tokenizer = AutoTokenizer.from_pretrained(model_name)
                     pad_token_id=pad_token_id,
                     truncation=True
                 )
+                pipe.fallback_used = False
                 _model_cache[cache_key] = pipe
                 logger.info(f"Successfully loaded text-generation model: {model_name}")
                 return pipe
             else:
                 model = pipeline(task, model=model_name, **model_kwargs)
+                model.fallback_used = False
                 _model_cache[cache_key] = model
                 logger.info(f"Successfully loaded model: {model_name}")
                 return model
         except Exception as e:
+            logger.warning(f"Failed to load primary model {model_name} for {task}: {str(e)}")
+            # Try fallback and local_fallback
+            model_config = MODEL_MAPPING.get(task, {})
+            for fallback_key in ["fallback", "local_fallback"]:
+                fallback_model = model_config.get(fallback_key)
+                if fallback_model and fallback_model != model_name:  # Don't try the same model again
+                    try:
+                        logger.info(f"Trying fallback model: {fallback_model} for {task}")
+                        model = pipeline(task, model=fallback_model, device=-1, truncation=True)
+                        model.fallback_used = True
+                        model.fallback_model = fallback_model
+                        _model_cache[f"{task}_{fallback_model}"] = model
+                        logger.info(f"Loaded fallback model: {fallback_model} for {task}")
+                        return model
+                    except Exception as e2:
+                        logger.warning(f"Failed to load fallback model {fallback_model} for {task}: {str(e2)}")
+            logger.error(f"All model loading failed for {task}, using static fallback.")
             return create_text_fallback(task)
     except Exception as e:
         logger.error(f"Error in load_model: {str(e)}")
     class TextFallback:
         def __init__(self, task_type):
             self.task_type = task_type
+            self.fallback_used = True
+            self.fallback_model = "static_fallback"
         def __call__(self, text, *args, **kwargs):
             if self.task_type == "text-generation":
                 return [{"generated_text": "Summary unavailable: Unable to load TinyLlama model. Please check system memory or model availability."}]

models/parallel_processor.py CHANGED Viewed

@@ -23,21 +23,22 @@ class ParallelProcessor:
     def process_images_parallel(self, image_files):
         """Process multiple images in parallel"""
         try:
-            with concurrent.futures.ThreadPoolExecutor(max_workers=min(4, len(image_files))) as executor:
                 futures = []
                 for img_file in image_files:
                     future = executor.submit(self._process_single_image, img_file)
                     futures.append(future)
                 results = []
                 for future in concurrent.futures.as_completed(futures):
                     try:
                         result = future.result(timeout=30)
                         results.append(result)
                     except Exception as e:
                         logger.error(f"Error processing image: {str(e)}")
-                        results.append({'error': str(e), 'is_property_related': False})
                 return results
         except Exception as e:
             logger.error(f"Error in parallel image processing: {str(e)}")
@@ -68,21 +69,22 @@ class ParallelProcessor:
     def process_pdfs_parallel(self, pdf_files):
         """Process multiple PDFs in parallel"""
         try:
-            with concurrent.futures.ThreadPoolExecutor(max_workers=min(4, len(pdf_files))) as executor:
                 futures = []
                 for pdf_file in pdf_files:
                     future = executor.submit(self._process_single_pdf, pdf_file)
                     futures.append(future)
                 results = []
                 for future in concurrent.futures.as_completed(futures):
                     try:
                         result = future.result(timeout=60)
                         results.append(result)
                     except Exception as e:
                         logger.error(f"Error processing PDF: {str(e)}")
-                        results.append({'error': str(e)})
                 return results
         except Exception as e:
             logger.error(f"Error in parallel PDF processing: {str(e)}")
@@ -91,9 +93,26 @@ class ParallelProcessor:
     def _process_single_pdf(self, pdf_file):
         """Process a single PDF"""
         try:
-            from .pdf_analysis import extract_pdf_text, analyze_pdf_content
-            pdf_text = extract_pdf_text(pdf_file)
             analysis = analyze_pdf_content(pdf_text, {})
             return {
@@ -102,8 +121,19 @@ class ParallelProcessor:
                 'analysis': analysis
             }
         except Exception as e:
-            logger.error(f"Error processing PDF {pdf_file.filename}: {str(e)}")
-            return {'error': str(e)}
     async def run_analyses_parallel(self, data, consolidated_text, image_analysis, pdf_analysis):
         """Run all analyses in parallel using asyncio and thread pools"""
@@ -144,7 +174,7 @@ class ParallelProcessor:
             results = {}
             for task_name, task in tasks:
                 try:
-                    result = await asyncio.wait_for(task, timeout=120)  # 2 minutes timeout per task
                     results[task_name] = result
                 except asyncio.TimeoutError:
                     logger.error(f"Task {task_name} timed out")
@@ -320,5 +350,65 @@ class ParallelProcessor:
             'market': self._get_error_result(error_message)
         }
 # Global instance for easy import
 parallel_processor = ParallelProcessor()

     def process_images_parallel(self, image_files):
         """Process multiple images in parallel"""
         try:
+            max_workers = min(8, mp.cpu_count(), len(image_files)) if image_files else 1
+            with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
                 futures = []
                 for img_file in image_files:
                     future = executor.submit(self._process_single_image, img_file)
                     futures.append(future)
                 results = []
                 for future in concurrent.futures.as_completed(futures):
                     try:
                         result = future.result(timeout=30)
+                        if isinstance(result, dict):
+                            result['parallelization_info'] = {'worker_count': max_workers}
                         results.append(result)
                     except Exception as e:
                         logger.error(f"Error processing image: {str(e)}")
+                        results.append({'error': str(e), 'is_property_related': False, 'parallelization_info': {'worker_count': max_workers}})
                 return results
         except Exception as e:
             logger.error(f"Error in parallel image processing: {str(e)}")
     def process_pdfs_parallel(self, pdf_files):
         """Process multiple PDFs in parallel"""
         try:
+            max_workers = min(8, mp.cpu_count(), len(pdf_files)) if pdf_files else 1
+            with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
                 futures = []
                 for pdf_file in pdf_files:
                     future = executor.submit(self._process_single_pdf, pdf_file)
                     futures.append(future)
                 results = []
                 for future in concurrent.futures.as_completed(futures):
                     try:
                         result = future.result(timeout=60)
+                        if isinstance(result, dict):
+                            result['parallelization_info'] = {'worker_count': max_workers}
                         results.append(result)
                     except Exception as e:
                         logger.error(f"Error processing PDF: {str(e)}")
+                        results.append({'error': str(e), 'parallelization_info': {'worker_count': max_workers}})
                 return results
         except Exception as e:
             logger.error(f"Error in parallel PDF processing: {str(e)}")
     def _process_single_pdf(self, pdf_file):
         """Process a single PDF"""
         try:
+            from .pdf_analysis import extract_text_from_pdf, analyze_pdf_content
+            # Ensure pdf_file is a file object, not a dict
+            if hasattr(pdf_file, 'read'):
+                pdf_text = extract_text_from_pdf(pdf_file)
+            else:
+                logger.error(f"Invalid PDF file object: {type(pdf_file)}")
+                return {
+                    'filename': getattr(pdf_file, 'filename', 'unknown.pdf'),
+                    'text': '',
+                    'analysis': {
+                        'is_property_related': False,
+                        'confidence': 0.0,
+                        'summary': 'Invalid PDF file object',
+                        'verification_score': 0.0,
+                        'model_used': 'static_fallback',
+                        'error': 'Invalid PDF file object'
+                    }
+                }
             analysis = analyze_pdf_content(pdf_text, {})
             return {
                 'analysis': analysis
             }
         except Exception as e:
+            logger.error(f"Error processing PDF {getattr(pdf_file, 'filename', 'unknown.pdf')}: {str(e)}")
+            return {
+                'filename': getattr(pdf_file, 'filename', 'unknown.pdf'),
+                'text': '',
+                'analysis': {
+                    'is_property_related': False,
+                    'confidence': 0.0,
+                    'summary': f'Error processing PDF: {str(e)}',
+                    'verification_score': 0.0,
+                    'model_used': 'static_fallback',
+                    'error': str(e)
+                }
+            }
     async def run_analyses_parallel(self, data, consolidated_text, image_analysis, pdf_analysis):
         """Run all analyses in parallel using asyncio and thread pools"""
             results = {}
             for task_name, task in tasks:
                 try:
+                    result = await asyncio.wait_for(task, timeout=60)  # Reduced from 120 to 60 seconds
                     results[task_name] = result
                 except asyncio.TimeoutError:
                     logger.error(f"Task {task_name} timed out")
             'market': self._get_error_result(error_message)
         }
+    async def _process_pdf_async(self, pdf_file, property_data):
+        """Process a single PDF file asynchronously"""
+        try:
+            from .pdf_analysis import extract_text_from_pdf, analyze_pdf_content
+            # Ensure pdf_file is a file object, not a dict
+            if hasattr(pdf_file, 'read'):
+                # Extract text from PDF
+                text = extract_text_from_pdf(pdf_file)
+                if not text:
+                    return {
+                        'filename': pdf_file.filename,
+                        'text': '',
+                        'analysis': {
+                            'is_property_related': False,
+                            'confidence': 0.0,
+                            'summary': 'No text extracted from PDF',
+                            'verification_score': 0.0,
+                            'model_used': 'static_fallback'
+                        }
+                    }
+                # Analyze the content
+                analysis = analyze_pdf_content(text, property_data)
+                return {
+                    'filename': pdf_file.filename,
+                    'text': text,
+                    'analysis': analysis
+                }
+            else:
+                logger.error(f"Invalid PDF file object in async processing: {type(pdf_file)}")
+                return {
+                    'filename': getattr(pdf_file, 'filename', 'unknown.pdf'),
+                    'text': '',
+                    'analysis': {
+                        'is_property_related': False,
+                        'confidence': 0.0,
+                        'summary': 'Invalid PDF file object',
+                        'verification_score': 0.0,
+                        'model_used': 'static_fallback',
+                        'error': 'Invalid PDF file object'
+                    }
+                }
+        except Exception as e:
+            logger.error(f"Error processing PDF {getattr(pdf_file, 'filename', 'unknown.pdf')}: {str(e)}")
+            return {
+                'filename': getattr(pdf_file, 'filename', 'unknown.pdf'),
+                'text': '',
+                'analysis': {
+                    'is_property_related': False,
+                    'confidence': 0.0,
+                    'summary': f'Error processing PDF: {str(e)}',
+                    'verification_score': 0.0,
+                    'model_used': 'static_fallback',
+                    'error': str(e)
+                }
+            }
 # Global instance for easy import
 parallel_processor = ParallelProcessor()

models/pdf_analysis.py CHANGED Viewed

@@ -4,170 +4,505 @@ import fitz  # PyMuPDF
 import re
 from .model_loader import load_model
 from .logging_config import logger
-from sentence_transformers import SentenceTransformer, util
-from .property_relation import check_if_property_related
-from .utils import summarize_text
-# Initialize sentence transformer
-try:
-    sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
-    logger.info("Sentence transformer loaded successfully in pdf_analysis.py")
-except Exception as e:
-    logger.error(f"Error loading sentence transformer in pdf_analysis.py: {str(e)}")
-    sentence_model = None
-def extract_pdf_text(pdf_file):
     try:
-        pdf_document = fitz.Document(stream=pdf_file.read(), filetype="pdf")
         text = ""
-        for page in pdf_document:
             text += page.get_text()
-        pdf_document.close()
-        return text
     except Exception as e:
-        logger.error(f"Error extracting PDF text: {str(e)}")
         return ""
 def analyze_pdf_content(document_text, property_data):
     try:
-        if not document_text:
             return {
-                'document_type': {'classification': 'unknown', 'confidence': 0.0},
-                'authenticity': {'assessment': 'could not verify', 'confidence': 0.0},
-                'key_info': {},
-                'consistency_score': 0.0,
                 'is_property_related': False,
-                'summary': 'Empty document',
-                'has_signatures': False,
-                'has_dates': False,
-                'verification_score': 0.0
             }
-        # Use a more sophisticated model for document classification
-        classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")
-        # Enhanced document types with more specific categories
-        doc_types = [
-            "property deed", "sales agreement", "mortgage document",
-            "property tax record", "title document", "khata certificate",
-            "encumbrance certificate", "lease agreement", "rental agreement",
-            "property registration document", "building permit", "other document"
-        ]
-        # Analyze document type with context
-        doc_context = f"{document_text[:1000]} property_type:{property_data.get('property_type', '')} location:{property_data.get('city', '')}"
-        doc_result = classifier(doc_context, doc_types)
-        doc_type = doc_result['labels'][0]
-        doc_confidence = doc_result['scores'][0]
-        # Enhanced authenticity check with multiple aspects
-        authenticity_aspects = [
-            "authentic legal document",
-            "questionable document",
-            "forged document",
-            "template document",
-            "official document"
-        ]
-        authenticity_result = classifier(document_text[:1000], authenticity_aspects)
-        authenticity = "likely authentic" if authenticity_result['labels'][0] == "authentic legal document" else "questionable"
-        authenticity_confidence = authenticity_result['scores'][0]
-        # Extract key information using NLP
-        key_info = extract_document_key_info(document_text)
-        # Enhanced consistency check
-        consistency_score = check_document_consistency(document_text, property_data)
-        # Property relation check with context
-        property_context = f"{document_text[:1000]} property:{property_data.get('property_name', '')} type:{property_data.get('property_type', '')}"
-        is_property_related = check_if_property_related(property_context)['is_related']
-        # Generate summary using BART
-        summary = summarize_text(document_text[:2000])
-        # Enhanced signature and date detection
-        has_signatures = bool(re.search(r'(?:sign|signature|signed|witness|notary|authorized).{0,50}(?:by|of|for)', document_text.lower()))
-        has_dates = bool(re.search(r'\d{1,2}[/-]\d{1,2}[/-]\d{2,4}|\d{4}[/-]\d{1,2}[/-]\d{1,2}', document_text))
-        # Calculate verification score with weighted components
-        verification_weights = {
-            'doc_type': 0.3,
-            'authenticity': 0.3,
-            'consistency': 0.2,
-            'property_relation': 0.1,
-            'signatures_dates': 0.1
         }
-        verification_score = (
-            doc_confidence * verification_weights['doc_type'] +
-            authenticity_confidence * verification_weights['authenticity'] +
-            consistency_score * verification_weights['consistency'] +
-            float(is_property_related) * verification_weights['property_relation'] +
-            float(has_signatures and has_dates) * verification_weights['signatures_dates']
         )
         return {
-            'document_type': {'classification': doc_type, 'confidence': float(doc_confidence)},
-            'authenticity': {'assessment': authenticity, 'confidence': float(authenticity_confidence)},
-            'key_info': key_info,
-            'consistency_score': float(consistency_score),
             'is_property_related': is_property_related,
             'summary': summary,
-            'has_signatures': has_signatures,
-            'has_dates': has_dates,
-            'verification_score': float(verification_score)
         }
     except Exception as e:
-        logger.error(f"Error analyzing PDF content: {str(e)}")
         return {
-            'document_type': {'classification': 'unknown', 'confidence': 0.0},
-            'authenticity': {'assessment': 'could not verify', 'confidence': 0.0},
-            'key_info': {},
-            'consistency_score': 0.0,
             'is_property_related': False,
-            'summary': 'Could not analyze document',
-            'has_signatures': False,
-            'has_dates': False,
             'verification_score': 0.0,
             'error': str(e)
         }
-def check_document_consistency(document_text, property_data):
     try:
-        if not sentence_model:
-            logger.warning("Sentence model unavailable")
-            return 0.5
-        property_text = ' '.join([
-            property_data.get(key, '') for key in [
-                'property_name', 'property_type', 'address', 'city',
-                'state', 'market_value', 'sq_ft', 'bedrooms'
-            ]
-        ])
-        property_embedding = sentence_model.encode(property_text)
-        document_embedding = sentence_model.encode(document_text[:1000])
-        similarity = util.cos_sim(property_embedding, document_embedding)[0][0].item()
-        return max(0.0, min(1.0, float(similarity)))
     except Exception as e:
-        logger.error(f"Error checking document consistency: {str(e)}")
-        return 0.0
-def extract_document_key_info(text):
     try:
-        info = {}
-        patterns = {
-            'property_address': r'(?:property|premises|located at)[:\s]+([^\n.]+)',
-            'price': r'(?:price|value|amount)[:\s]+(?:Rs\.?|₹)?[\s]*([0-9,.]+)',
-            'date': r'(?:date|dated|executed on)[:\s]+([^\n.]+\d{4})',
-            'seller': r'(?:seller|grantor|owner)[:\s]+([^\n.]+)',
-            'buyer': r'(?:buyer|grantee|purchaser)[:\s]+([^\n.]+)',
-            'size': r'(?:area|size|extent)[:\s]+([0-9,.]+)[\s]*(?:sq\.?[\s]*(?:ft|feet))',
-            'registration_number': r'(?:registration|reg\.?|document)[\s]*(?:no\.?|number|#)[:\s]*([A-Za-z0-9\-/]+)'
         }
-        for key, pattern in patterns.items():
-            match = re.search(pattern, text, re.IGNORECASE)
-            if match:
-                info[key] = match.group(1).strip()
-        return info
     except Exception as e:
-        logger.error(f"Error extracting document key info: {str(e)}")
-        return {}

 import re
 from .model_loader import load_model
 from .logging_config import logger
+def extract_text_from_pdf(pdf_file):
+    """
+    Extract text from PDF file with better error handling.
+    """
     try:
+        # Open the PDF
+        doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
         text = ""
+        # Extract text from all pages
+        for page_num in range(len(doc)):
+            page = doc.load_page(page_num)
             text += page.get_text()
+        doc.close()
+        return text.strip()
     except Exception as e:
+        logger.error(f"Error extracting text from PDF: {str(e)}")
         return ""
 def analyze_pdf_content(document_text, property_data):
+    """
+    Analyze PDF content for real estate verification with perfect classification and summarization.
+    Args:
+        document_text: Extracted text from PDF
+        property_data: Property information for cross-validation
+    Returns:
+        dict: Comprehensive analysis results
+    """
     try:
+        if not document_text or len(document_text.strip()) < 10:
             return {
                 'is_property_related': False,
+                'confidence': 0.0,
+                'summary': 'Document too short or empty',
+                'key_info': {},
+                'verification_score': 0.0,
+                'document_type': 'Unknown',
+                'document_confidence': 0.0,
+                'authenticity_assessment': 'Unknown',
+                'authenticity_confidence': 0.0,
+                'contains_signatures': False,
+                'contains_dates': False,
+                'real_estate_indicators': [],
+                'legal_terms_found': [],
+                'model_used': 'static_fallback'
             }
+        # Comprehensive real estate keyword analysis
+        real_estate_keywords = {
+            'property_terms': [
+                'property', 'house', 'apartment', 'flat', 'villa', 'land', 'real estate',
+                'residential', 'commercial', 'industrial', 'plot', 'acre', 'square feet',
+                'sq ft', 'sqft', 'bedroom', 'bathroom', 'kitchen', 'living room',
+                'dining room', 'garage', 'parking', 'garden', 'balcony', 'terrace'
+            ],
+            'legal_terms': [
+                'title', 'deed', 'ownership', 'mortgage', 'loan', 'lease', 'rent',
+                'agreement', 'contract', 'sale', 'purchase', 'transfer', 'registration',
+                'encumbrance', 'lien', 'easement', 'zoning', 'permit', 'license',
+                'tax', 'assessment', 'valuation', 'appraisal', 'survey', 'boundary'
+            ],
+            'financial_terms': [
+                'price', 'value', 'cost', 'amount', 'payment', 'installment',
+                'down payment', 'interest', 'rate', 'principal', 'balance',
+                'insurance', 'premium', 'deposit', 'advance', 'rental', 'security'
+            ],
+            'location_terms': [
+                'address', 'location', 'street', 'road', 'avenue', 'lane',
+                'city', 'state', 'country', 'postal', 'zip', 'pincode',
+                'neighborhood', 'area', 'district', 'zone', 'sector', 'block'
+            ]
         }
+        text_lower = document_text.lower()
+        # Count keyword matches for each category
+        keyword_counts = {}
+        found_keywords = {}
+        for category, keywords in real_estate_keywords.items():
+            matches = []
+            for keyword in keywords:
+                if keyword in text_lower:
+                    matches.append(keyword)
+            keyword_counts[category] = len(matches)
+            found_keywords[category] = matches
+        # Calculate overall confidence
+        total_keywords = sum(len(keywords) for keywords in real_estate_keywords.values())
+        total_matches = sum(keyword_counts.values())
+        confidence = min(1.0, total_matches / (total_keywords * 0.3))  # 30% threshold
+        # Determine document type with high accuracy
+        document_type, document_confidence = classify_document_type(text_lower, found_keywords)
+        # Generate comprehensive summary
+        summary = generate_document_summary(document_text, document_type)
+        # Extract key information
+        key_info = extract_document_key_info(document_text)
+        # Check for signatures and dates
+        contains_signatures = detect_signatures(text_lower)
+        contains_dates = detect_dates(document_text)
+        # Assess authenticity
+        authenticity_assessment, authenticity_confidence = assess_document_authenticity(
+            document_text, contains_signatures, contains_dates, key_info
         )
+        # Calculate verification score
+        verification_score = calculate_verification_score(
+            confidence, document_confidence, authenticity_confidence,
+            contains_signatures, contains_dates, key_info
+        )
+        # Determine if it's real estate related
+        is_property_related = confidence > 0.2 or document_type != 'Unknown'
+        # Extract legal terms
+        legal_terms_found = found_keywords.get('legal_terms', [])
+        # Create real estate indicators list
+        real_estate_indicators = []
+        for category, matches in found_keywords.items():
+            if matches:
+                real_estate_indicators.extend(matches[:3])  # Top 3 from each category
         return {
             'is_property_related': is_property_related,
+            'confidence': confidence,
             'summary': summary,
+            'key_info': key_info,
+            'verification_score': verification_score,
+            'document_type': document_type,
+            'document_confidence': document_confidence,
+            'authenticity_assessment': authenticity_assessment,
+            'authenticity_confidence': authenticity_confidence,
+            'contains_signatures': contains_signatures,
+            'contains_dates': contains_dates,
+            'real_estate_indicators': real_estate_indicators,
+            'legal_terms_found': legal_terms_found,
+            'keyword_analysis': keyword_counts,
+            'model_used': 'static_fallback'
         }
     except Exception as e:
+        logger.error(f"Error in PDF content analysis: {str(e)}")
         return {
             'is_property_related': False,
+            'confidence': 0.0,
+            'summary': f'Analysis error: {str(e)}',
+            'key_info': {},
             'verification_score': 0.0,
+            'document_type': 'Unknown',
+            'document_confidence': 0.0,
+            'authenticity_assessment': 'Unknown',
+            'authenticity_confidence': 0.0,
+            'contains_signatures': False,
+            'contains_dates': False,
+            'real_estate_indicators': [],
+            'legal_terms_found': [],
+            'model_used': 'static_fallback',
             'error': str(e)
         }
+def classify_document_type(text_lower, found_keywords):
+    """
+    Classify document type with high accuracy.
+    """
+    # Document type patterns
+    document_patterns = {
+        'Property Title Deed': {
+            'keywords': ['title', 'deed', 'ownership', 'property', 'owner'],
+            'confidence': 0.9
+        },
+        'Mortgage Document': {
+            'keywords': ['mortgage', 'loan', 'bank', 'lender', 'borrower', 'principal', 'interest'],
+            'confidence': 0.85
+        },
+        'Lease Agreement': {
+            'keywords': ['lease', 'rent', 'tenant', 'landlord', 'rental', 'agreement'],
+            'confidence': 0.8
+        },
+        'Sale Contract': {
+            'keywords': ['sale', 'purchase', 'buyer', 'seller', 'contract', 'agreement'],
+            'confidence': 0.8
+        },
+        'Tax Assessment': {
+            'keywords': ['tax', 'assessment', 'valuation', 'appraisal', 'property tax'],
+            'confidence': 0.75
+        },
+        'Building Permit': {
+            'keywords': ['permit', 'building', 'construction', 'approval', 'zoning'],
+            'confidence': 0.7
+        },
+        'Property Survey': {
+            'keywords': ['survey', 'boundary', 'measurement', 'plot', 'dimension'],
+            'confidence': 0.7
+        },
+        'Insurance Document': {
+            'keywords': ['insurance', 'policy', 'premium', 'coverage', 'claim'],
+            'confidence': 0.65
+        }
+    }
+    best_match = 'Unknown'
+    best_confidence = 0.0
+    for doc_type, pattern in document_patterns.items():
+        matches = sum(1 for keyword in pattern['keywords'] if keyword in text_lower)
+        if matches > 0:
+            # Calculate confidence based on matches
+            match_ratio = matches / len(pattern['keywords'])
+            confidence = pattern['confidence'] * match_ratio
+            if confidence > best_confidence:
+                best_match = doc_type
+                best_confidence = confidence
+    return best_match, best_confidence
+def generate_document_summary(document_text, document_type):
+    """
+    Generate comprehensive document summary.
+    """
     try:
+        # Try to use summarization model if available
+        try:
+            summarizer = load_model("summarization")
+            if hasattr(summarizer, 'fallback_used') and not summarizer.fallback_used:
+                # Use model for summarization
+                summary_result = summarizer(document_text[:1000], max_length=150, min_length=50)
+                if isinstance(summary_result, list) and len(summary_result) > 0:
+                    return summary_result[0].get('summary_text', '')
+        except Exception as e:
+            logger.warning(f"Summarization model failed: {str(e)}")
+        # Fallback to extractive summarization
+        sentences = document_text.split('.')
+        sentences = [s.strip() for s in sentences if len(s.strip()) > 20]
+        if not sentences:
+            return "Document contains insufficient text for summarization."
+        # Select key sentences based on document type
+        key_sentences = []
+        if document_type != 'Unknown':
+            # Look for sentences containing document type keywords
+            type_keywords = document_type.lower().split()
+            for sentence in sentences:
+                if any(keyword in sentence.lower() for keyword in type_keywords):
+                    key_sentences.append(sentence)
+                    if len(key_sentences) >= 2:
+                        break
+        # If no type-specific sentences, take first few meaningful sentences
+        if not key_sentences:
+            key_sentences = sentences[:3]
+        # Combine sentences
+        summary = '. '.join(key_sentences) + '.'
+        # Truncate if too long
+        if len(summary) > 300:
+            summary = summary[:297] + '...'
+        return summary
     except Exception as e:
+        logger.error(f"Error generating summary: {str(e)}")
+        return "Summary generation failed."
+def extract_document_key_info(document_text):
+    """
+    Extract key information from document.
+    """
+    key_info = {}
     try:
+        # Extract addresses
+        address_patterns = [
+            r'\b\d+\s+[A-Za-z\s]+(?:Street|St|Road|Rd|Avenue|Ave|Lane|Ln|Drive|Dr|Boulevard|Blvd)\b',
+            r'\b[A-Za-z\s]+,\s*[A-Za-z\s]+,\s*[A-Z]{2}\s*\d{5}\b'
+        ]
+        for pattern in address_patterns:
+            matches = re.findall(pattern, document_text, re.IGNORECASE)
+            if matches:
+                key_info['addresses'] = matches[:3]  # Top 3 addresses
+                break
+        # Extract dates
+        date_patterns = [
+            r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b',
+            r'\b\d{4}[/-]\d{1,2}[/-]\d{1,2}\b',
+            r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b'
+        ]
+        dates = []
+        for pattern in date_patterns:
+            dates.extend(re.findall(pattern, document_text, re.IGNORECASE))
+        if dates:
+            key_info['dates'] = dates[:5]  # Top 5 dates
+        # Extract amounts/money
+        amount_patterns = [
+            r'\$\d{1,3}(?:,\d{3})*(?:\.\d{2})?',
+            r'₹\d{1,3}(?:,\d{3})*(?:\.\d{2})?',
+            r'\d{1,3}(?:,\d{3})*(?:\.\d{2})?\s*(?:dollars?|rupees?|rs?)',
+        ]
+        amounts = []
+        for pattern in amount_patterns:
+            amounts.extend(re.findall(pattern, document_text, re.IGNORECASE))
+        if amounts:
+            key_info['amounts'] = amounts[:5]  # Top 5 amounts
+        # Extract phone numbers
+        phone_pattern = r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b'
+        phones = re.findall(phone_pattern, document_text)
+        if phones:
+            key_info['phone_numbers'] = phones[:3]  # Top 3 phone numbers
+        # Extract property details
+        property_patterns = {
+            'bedrooms': r'\b(\d+)\s*(?:bedroom|bed|br)\b',
+            'bathrooms': r'\b(\d+)\s*(?:bathroom|bath|ba)\b',
+            'square_feet': r'\b(\d{1,3}(?:,\d{3})*)\s*(?:square\s*feet|sq\s*ft|sqft)\b',
+            'acres': r'\b(\d+(?:\.\d+)?)\s*acres?\b'
         }
+        for key, pattern in property_patterns.items():
+            matches = re.findall(pattern, document_text, re.IGNORECASE)
+            if matches:
+                key_info[key] = matches[0]  # First match
+        # Extract names
+        name_pattern = r'\b[A-Z][a-z]+\s+[A-Z][a-z]+\b'
+        names = re.findall(name_pattern, document_text)
+        if names:
+            key_info['names'] = names[:5]  # Top 5 names
     except Exception as e:
+        logger.warning(f"Error extracting key info: {str(e)}")
+    return key_info
+def detect_signatures(text_lower):
+    """
+    Detect signatures in document.
+    """
+    signature_indicators = [
+        'signature', 'signed', 'sign', 'signatory', 'witness',
+        'notary', 'notarized', 'attorney', 'lawyer', 'agent'
+    ]
+    return any(indicator in text_lower for indicator in signature_indicators)
+def detect_dates(document_text):
+    """
+    Detect dates in document.
+    """
+    date_patterns = [
+        r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b',
+        r'\b\d{4}[/-]\d{1,2}[/-]\d{1,2}\b',
+        r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b'
+    ]
+    for pattern in date_patterns:
+        if re.search(pattern, document_text, re.IGNORECASE):
+            return True
+    return False
+def assess_document_authenticity(document_text, has_signatures, has_dates, key_info):
+    """
+    Assess document authenticity.
+    """
+    authenticity_score = 0.0
+    # Base score
+    if has_signatures:
+        authenticity_score += 0.3
+    if has_dates:
+        authenticity_score += 0.2
+    if key_info.get('addresses'):
+        authenticity_score += 0.2
+    if key_info.get('amounts'):
+        authenticity_score += 0.1
+    if key_info.get('names'):
+        authenticity_score += 0.1
+    if len(document_text) > 500:
+        authenticity_score += 0.1
+    # Determine assessment
+    if authenticity_score >= 0.7:
+        assessment = 'Authentic'
+    elif authenticity_score >= 0.4:
+        assessment = 'Likely Authentic'
+    elif authenticity_score >= 0.2:
+        assessment = 'Suspicious'
+    else:
+        assessment = 'Potentially Fake'
+    return assessment, authenticity_score
+def calculate_verification_score(confidence, document_confidence, authenticity_confidence, has_signatures, has_dates, key_info):
+    """
+    Calculate overall verification score.
+    """
+    score = 0.0
+    # Base confidence
+    score += confidence * 0.3
+    # Document type confidence
+    score += document_confidence * 0.2
+    # Authenticity confidence
+    score += authenticity_confidence * 0.2
+    # Additional factors
+    if has_signatures:
+        score += 0.1
+    if has_dates:
+        score += 0.1
+    if key_info.get('addresses'):
+        score += 0.05
+    if key_info.get('amounts'):
+        score += 0.05
+    return min(100.0, score * 100)
+def check_document_consistency(document_text, property_data):
+    """
+    Check document consistency with property data.
+    """
+    try:
+        if not property_data:
+            return {
+                'is_consistent': True,
+                'confidence': 0.5,
+                'issues': [],
+                'model_used': 'static_fallback'
+            }
+        consistency_score = 0.5  # Base score
+        issues = []
+        # Check address consistency
+        if property_data.get('address'):
+            property_address = property_data['address'].lower()
+            doc_addresses = re.findall(r'\b\d+\s+[A-Za-z\s]+(?:Street|St|Road|Rd|Avenue|Ave)\b', document_text, re.IGNORECASE)
+            for doc_addr in doc_addresses:
+                if any(word in doc_addr.lower() for word in property_address.split()):
+                    consistency_score += 0.2
+                    break
+            else:
+                issues.append("Address mismatch between document and property data")
+        # Check property type consistency
+        if property_data.get('property_type'):
+            property_type = property_data['property_type'].lower()
+            if property_type in document_text.lower():
+                consistency_score += 0.1
+            else:
+                issues.append("Property type mismatch")
+        # Check size consistency
+        if property_data.get('sq_ft'):
+            property_size = property_data['sq_ft']
+            size_matches = re.findall(r'\b(\d{1,3}(?:,\d{3})*)\s*(?:square\s*feet|sq\s*ft|sqft)\b', document_text, re.IGNORECASE)
+            if size_matches:
+                doc_size = size_matches[0].replace(',', '')
+                if abs(int(doc_size) - int(property_size)) < 100:  # Within 100 sq ft
+                    consistency_score += 0.1
+                else:
+                    issues.append("Property size mismatch")
+        return {
+            'is_consistent': consistency_score > 0.6,
+            'confidence': min(1.0, consistency_score),
+            'issues': issues,
+            'model_used': 'static_fallback'
+        }
+    except Exception as e:
+        logger.error(f"Error checking document consistency: {str(e)}")
+        return {
+            'is_consistent': False,
+            'confidence': 0.0,
+            'issues': [f"Consistency check error: {str(e)}"],
+            'model_used': 'static_fallback'
+        }

models/performance_optimizer.py CHANGED Viewed

@@ -95,7 +95,7 @@ def optimize_model_loading():
     try:
         from .model_loader import load_model
-        # Pre-load models in background threads
         import concurrent.futures
         import threading
@@ -108,18 +108,16 @@ def optimize_model_loading():
                 logger.warning(f"Failed to pre-load model {model_name}: {str(e)}")
                 return None
-        # Load models in parallel
-        with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
             model_names = [
-                "zero-shot-classification",
-                "summarization",
-                "text-classification",
-                "text-generation"
             ]
             futures = {executor.submit(load_model_async, name): name for name in model_names}
-            for future in concurrent.futures.as_completed(futures, timeout=60):
                 model_name = futures[future]
                 try:
                     future.result()

     try:
         from .model_loader import load_model
+        # Pre-load only essential models in background threads
         import concurrent.futures
         import threading
                 logger.warning(f"Failed to pre-load model {model_name}: {str(e)}")
                 return None
+        # Load only essential models in parallel with timeout
+        with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:  # Reduced workers
             model_names = [
+                "zero-shot-classification",  # Most important
+                "summarization"  # Second most important
             ]
             futures = {executor.submit(load_model_async, name): name for name in model_names}
+            for future in concurrent.futures.as_completed(futures, timeout=30):  # 30 second timeout
                 model_name = futures[future]
                 try:
                     future.result()

models/property_relation.py CHANGED Viewed

@@ -14,10 +14,23 @@ def check_if_property_related(text):
             return {
                 'is_related': False,
                 'confidence': 0.0,
-                'error': 'No text provided'
             }
-        classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")
         result = classifier(text[:1000], ["property-related", "non-property-related"])
         # Defensive: ensure result structure
         labels = result.get('labels', [])
         scores = result.get('scores', [])
@@ -26,17 +39,23 @@ def check_if_property_related(text):
             return {
                 'is_related': False,
                 'confidence': 0.0,
-                'error': 'Model output error'
             }
         is_related = labels[0] == "property-related"
         return {
             'is_related': is_related,
-            'confidence': float(scores[0]) if is_related else float(scores[1])
         }
     except Exception as e:
         logger.error(f"Error checking property relation: {str(e)}")
         return {
             'is_related': False,
             'confidence': 0.0,
-            'error': str(e)
         }

             return {
                 'is_related': False,
                 'confidence': 0.0,
+                'error': 'No text provided',
+                'model_used': 'static_fallback'
             }
+        try:
+            classifier = load_model("zero-shot-classification")  # Use standard model instead of typeform
+        except Exception as e:
+            logger.error(f"Error loading model in property relation: {str(e)}")
+            return {
+                'is_related': False,
+                'confidence': 0.0,
+                'error': f'Model loading error: {str(e)}',
+                'model_used': 'static_fallback'
+            }
         result = classifier(text[:1000], ["property-related", "non-property-related"])
         # Defensive: ensure result structure
         labels = result.get('labels', [])
         scores = result.get('scores', [])
             return {
                 'is_related': False,
                 'confidence': 0.0,
+                'error': 'Model output error',
+                'model_used': 'static_fallback'
             }
         is_related = labels[0] == "property-related"
+        model_used = getattr(classifier, 'fallback_model', 'primary_model')
         return {
             'is_related': is_related,
+            'confidence': float(scores[0]) if is_related else float(scores[1]),
+            'model_used': model_used
         }
     except Exception as e:
         logger.error(f"Error checking property relation: {str(e)}")
         return {
             'is_related': False,
             'confidence': 0.0,
+            'error': str(e),
+            'model_used': 'static_fallback'
         }

models/property_summary.py CHANGED Viewed

@@ -124,69 +124,85 @@ Property Summary:"""
     return prompt
 def generate_dynamic_summary_with_slm(data):
-    """Generate property summary using CPU-based Small Language Model"""
     try:
         # Validate and format data
         data = validate_and_format_data(data)
-        # Create the prompt
-        prompt = create_property_prompt(data)
-        # Try best SLMs in order
-        slm_models = ["TinyLlama/TinyLlama-1.1B-Chat-v1.0", "gpt2", "distilgpt2"]
-        for model_name in slm_models:
-            try:
-                # Try to load a text generation model
-                generator = load_model("text-generation", model_name)
-                # Generate the summary
-                if hasattr(generator, 'task_type') and generator.task_type == "text-generation":
-                    # Using fallback generator - it will handle the prompt parsing
-                    result = generator(prompt, max_length=512, do_sample=True, temperature=0.7)
-                    summary = result[0]['generated_text'] if result else ""
-                else:
-                    # Using actual model
-                    result = generator(
-                        prompt,
-                        max_length=512,
-                        do_sample=True,
-                        temperature=0.7,
-                        top_p=0.9,
-                        repetition_penalty=1.1
-                    )
-                    summary = result[0]['generated_text'] if result else ""
-                # Clean up the generated text
-                if summary:
-                    # Remove the prompt from the beginning if present
-                    if prompt in summary:
-                        summary = summary.replace(prompt, "").strip()
-                    # Clean up any remaining artifacts
-                    summary = re.sub(r'\n+', '\n', summary)
-                    summary = re.sub(r'\s+', ' ', summary)
-                    summary = summary.strip()
-                    # Ensure it's not too long
-                    if len(summary) > 512:
-                        summary = summary[:512].rsplit(' ', 1)[0] + "..."
-                    return summary
-                else:
-                    raise Exception("No text generated")
-            except Exception as model_error:
-                logger.warning(f"SLM model {model_name} failed: {str(model_error)}")
-                continue
-        # If all SLMs fail, use fallback
-        return generate_fallback_summary(data)
     except Exception as e:
         logger.error(f"Error in dynamic summary generation: {str(e)}")
-        return generate_fallback_summary(data)
-def generate_fallback_summary(data):
-    """Enhanced fallback summary generation when SLM fails"""
     try:
         data = validate_and_format_data(data)
@@ -223,80 +239,56 @@ def generate_fallback_summary(data):
         # Pricing information
         if data.get('market_value'):
-            price = format_price(data['market_value'])
-            summary_parts.append(f"Priced at {price}, this property offers excellent value for money in today's competitive market.")
-        # Year built information
-        if data.get('year_built') and data['year_built'] != 'N/A':
-            summary_parts.append(f"Built in {data['year_built']}, this property combines modern amenities with solid construction.")
-        # Amenities section
-        if data.get('amenities'):
-            amenities = data['amenities'][:5]  # Limit to 5 amenities
-            if amenities:
-                summary_parts.append(f"Residents can enjoy access to {', '.join(amenities)}.")
-        # Nearby landmarks
         landmarks = data.get('nearby_landmarks', '')
         if landmarks:
-            if isinstance(landmarks, str):
-                landmarks_list = [l.strip() for l in landmarks.split(',') if l.strip()][:3]
-                if landmarks_list:
-                    summary_parts.append(f"The property is conveniently located near {', '.join(landmarks_list)}.")
-        # Possession information
-        if data.get('possession_date') and data['possession_date'] != 'Immediate':
-            summary_parts.append(f"Ready for possession from {data['possession_date']}.")
-        # Property description
-        if data.get('property_description'):
-            desc = data['property_description'][:200] + "..." if len(data['property_description']) > 200 else data['property_description']
-            summary_parts.append(f"Property highlights: {desc}")
-        # Call to action
-        summary_parts.append("Don't miss this opportunity to own a piece of prime real estate. Contact us today for a detailed viewing and exclusive offers.")
-        # Combine all parts
-        summary = " ".join(summary_parts)
-        # Ensure it's around 512 words
-        words = summary.split()
-        if len(words) > 512:
-            summary = " ".join(words[:512]) + "..."
-        return summary
     except Exception as e:
-        logger.error(f"Error in fallback summary generation: {str(e)}")
-        return "Property summary unavailable. Please contact us for more details."
 def generate_property_summary(data):
-    """Main function to generate property summary using SLM"""
     try:
         # Validate input data
         if not data or not isinstance(data, dict):
             return "A beautiful property with excellent features and prime location. Contact us for detailed information and exclusive offers."
-        # Create a more robust fallback summary for any data
         try:
             # Use the new dynamic SLM-based approach
             summary = generate_dynamic_summary_with_slm(data)
             # Ensure summary is a proper string
             if not summary or not isinstance(summary, str):
-                summary = generate_fallback_summary(data)
             if not summary or not summary.strip():
-                summary = generate_fallback_summary(data)
             # Final fallback - always return something meaningful
             if not summary or not summary.strip():
-                summary = "A beautiful property with excellent features and prime location. Contact us for detailed information and exclusive offers."
             # Ensure it's a string and clean it up
             summary = str(summary).strip()
             if summary == '[object Object]' or summary == 'null' or summary == 'undefined':
-                summary = generate_fallback_summary(data)
             # If still no valid summary, create a basic one
             if not summary or len(summary) < 50:
@@ -305,12 +297,12 @@ def generate_property_summary(data):
             return summary
         except Exception as e:
-            logger.error(f"Error in summary generation: {str(e)}")
             return create_basic_summary(data)
     except Exception as e:
         logger.error(f"Error generating property summary: {str(e)}")
-        return "A beautiful property with excellent features and prime location. Contact us for detailed information and exclusive offers."
 def create_basic_summary(data):
     """Create a basic summary even for invalid data"""

     return prompt
 def generate_dynamic_summary_with_slm(data):
+    """Generate property summary using AI summarization model"""
     try:
         # Validate and format data
         data = validate_and_format_data(data)
+        # Create the property description text
+        property_text = create_property_description_text(data)
+        # Try to use summarization model
+        try:
+            summarizer = load_model("summarization")
+            # Check if we have a proper summarization model
+            if hasattr(summarizer, 'fallback_used') and not summarizer.fallback_used:
+                # Use the actual AI model for summarization
+                result = summarizer(property_text, max_length=150, min_length=50, do_sample=False)
+                if isinstance(result, list) and len(result) > 0:
+                    summary = result[0].get('summary_text', '')
+                    if summary and len(summary.strip()) > 20:
+                        return summary.strip()
+            # If AI model fails or returns poor results, use enhanced fallback
+            return generate_enhanced_fallback_summary(data)
+        except Exception as model_error:
+            logger.warning(f"Summarization model failed: {str(model_error)}")
+            return generate_enhanced_fallback_summary(data)
     except Exception as e:
         logger.error(f"Error in dynamic summary generation: {str(e)}")
+        return generate_enhanced_fallback_summary(data)
+def create_property_description_text(data):
+    """Create a comprehensive property description text for summarization"""
+    try:
+        # Build location string
+        location_parts = []
+        if data.get('address'):
+            location_parts.append(data['address'])
+        if data.get('city'):
+            location_parts.append(data['city'])
+        if data.get('state'):
+            location_parts.append(data['state'])
+        location = ', '.join(location_parts) if location_parts else 'Prime location'
+        # Build amenities string
+        amenities = data.get('amenities', [])
+        amenities_str = ', '.join(amenities[:5]) if amenities else 'Modern amenities'
+        # Build landmarks string
+        landmarks = data.get('nearby_landmarks', '')
+        if isinstance(landmarks, str) and landmarks:
+            landmarks_list = [l.strip() for l in landmarks.split(',') if l.strip()]
+            landmarks_str = ', '.join(landmarks_list[:3])
+        else:
+            landmarks_str = 'Convenient location'
+        # Create comprehensive property description
+        description_parts = [
+            f"This is a {data.get('property_type', 'property')} located in {location}.",
+            f"The property is currently {data.get('status', 'available')} for sale.",
+            f"It features {data.get('bedrooms', '0')} bedrooms and {data.get('bathrooms', '0')} bathrooms.",
+            f"The total area is {data.get('sq_ft', '0')} square feet.",
+            f"The property is priced at {format_price(data.get('market_value', '0'))}.",
+            f"It includes amenities such as {amenities_str}.",
+            f"The property is near {landmarks_str}.",
+            f"It was built in {data.get('year_built', 'recent years')}.",
+            f"The property offers {data.get('parking_spaces', '0')} parking spaces.",
+            f"This is an excellent investment opportunity in a prime location with modern facilities and strategic connectivity."
+        ]
+        return " ".join(description_parts)
+    except Exception as e:
+        logger.error(f"Error creating property description text: {str(e)}")
+        return f"This is a {data.get('property_type', 'property')} located in {data.get('city', 'prime location')} with excellent features and amenities."
+def generate_enhanced_fallback_summary(data):
+    """Enhanced fallback summary generation with better AI-like text"""
     try:
         data = validate_and_format_data(data)
         # Pricing information
         if data.get('market_value'):
+            price_str = format_price(data['market_value'])
+            summary_parts.append(f"Priced at {price_str}, this property offers excellent value for money and represents a sound investment opportunity.")
+        # Amenities and facilities
+        amenities = data.get('amenities', [])
+        if amenities:
+            amenities_str = ', '.join(amenities[:3])
+            summary_parts.append(f"The property includes modern amenities such as {amenities_str}.")
+        # Location benefits
         landmarks = data.get('nearby_landmarks', '')
         if landmarks:
+            summary_parts.append(f"Conveniently located near {landmarks}, this property offers easy access to essential facilities and transportation.")
+        # Closing statement
+        summary_parts.append("Perfect for families and investors alike, this property combines modern amenities with strategic location. Don't miss this opportunity to own a piece of prime real estate. Contact us today for a detailed viewing and exclusive offers.")
+        return " ".join(summary_parts)
     except Exception as e:
+        logger.error(f"Error in enhanced fallback summary: {str(e)}")
+        return create_basic_summary(data)
 def generate_property_summary(data):
+    """Main function to generate property summary using AI model"""
     try:
         # Validate input data
         if not data or not isinstance(data, dict):
             return "A beautiful property with excellent features and prime location. Contact us for detailed information and exclusive offers."
+        # Try to use AI model for summary generation
         try:
             # Use the new dynamic SLM-based approach
             summary = generate_dynamic_summary_with_slm(data)
             # Ensure summary is a proper string
             if not summary or not isinstance(summary, str):
+                summary = generate_enhanced_fallback_summary(data)
             if not summary or not summary.strip():
+                summary = generate_enhanced_fallback_summary(data)
             # Final fallback - always return something meaningful
             if not summary or not summary.strip():
+                summary = create_basic_summary(data)
             # Ensure it's a string and clean it up
             summary = str(summary).strip()
             if summary == '[object Object]' or summary == 'null' or summary == 'undefined':
+                summary = generate_enhanced_fallback_summary(data)
             # If still no valid summary, create a basic one
             if not summary or len(summary) < 50:
             return summary
         except Exception as e:
+            logger.error(f"Error in AI summary generation: {str(e)}")
             return create_basic_summary(data)
     except Exception as e:
         logger.error(f"Error generating property summary: {str(e)}")
+        return create_basic_summary(data)
 def create_basic_summary(data):
     """Create a basic summary even for invalid data"""

models/suggestions.py CHANGED Viewed

@@ -23,12 +23,13 @@ def generate_suggestions(text, data=None):
             'improvements': [],
             'warnings': [],
             'recommendations': [],
-            'confidence': 0.0
         }
         # Load model for analysis
         try:
-            classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")
         except Exception as e:
             logger.error(f"Error loading model in suggestions: {str(e)}")
             suggestions['warnings'].append({'type': 'error', 'confidence': 0.0, 'details': {'title': 'Model Error', 'message': f'Model loading error: {str(e)}', 'priority': 'high'}})
@@ -50,111 +51,127 @@ def generate_suggestions(text, data=None):
         # Analyze text with context
         context = f"{text} property_data:{str(data) if data else ''}"
         try:
-            result = classifier(context, categories, multi_label=True)
         except Exception as e:
-            logger.error(f"Error in suggestions model inference: {str(e)}")
-            suggestions['warnings'].append({'type': 'error', 'confidence': 0.0, 'details': {'title': 'Model Error', 'message': f'Model inference error: {str(e)}', 'priority': 'high'}})
-            return suggestions
-        # Process results
-        for label, score in zip(result['labels'], result['scores']):
-            if score > 0.3:  # Only include high confidence suggestions
-                suggestion = {
-                    'type': label,
-                    'confidence': float(score),
-                    'details': generate_suggestion_details(label, text, data)
-                }
-                if 'improvement' in label or 'update' in label:
-                    suggestions['improvements'].append(suggestion)
-                elif 'warning' in label or 'issue' in label:
-                    suggestions['warnings'].append(suggestion)
-                else:
-                    suggestions['recommendations'].append(suggestion)
-        # Calculate overall confidence
-        if result['scores']:
-            suggestions['confidence'] = float(max(result['scores']))
         return suggestions
     except Exception as e:
         logger.error(f"Error generating suggestions: {str(e)}")
         return {
             'improvements': [],
-            'warnings': [{'type': 'error', 'confidence': 0.0, 'details': {'title': 'Error', 'message': f'Error generating suggestions: {str(e)}', 'priority': 'high'}}],
             'recommendations': [],
             'confidence': 0.0,
-            'error': str(e)
         }
 def generate_suggestion_details(suggestion_type, text, data):
-    """Generate detailed suggestions based on the type."""
     try:
-        details = {
-            'property description improvement': {
-                'title': 'Improve Property Description',
-                'message': 'Add more detailed information about the property features and amenities.',
-                'priority': 'medium'
-            },
-            'price adjustment needed': {
-                'title': 'Review Property Price',
-                'message': 'Consider adjusting the price based on market conditions and property specifications.',
-                'priority': 'high'
-            },
-            'documentation required': {
-                'title': 'Additional Documentation Needed',
-                'message': 'Please provide more property-related documents for verification.',
-                'priority': 'high'
-            },
-            'verification needed': {
-                'title': 'Property Verification Required',
-                'message': 'Additional verification steps are needed for property authenticity.',
-                'priority': 'high'
-            },
-            'legal compliance issue': {
-                'title': 'Legal Compliance Check',
-                'message': 'Review property legal documentation and compliance status.',
-                'priority': 'high'
-            },
-            'location verification needed': {
-                'title': 'Location Verification',
-                'message': 'Verify property location details and coordinates.',
-                'priority': 'medium'
-            },
-            'property specification update': {
-                'title': 'Update Property Specifications',
-                'message': 'Review and update property specifications for accuracy.',
-                'priority': 'medium'
-            },
-            'image quality improvement': {
-                'title': 'Improve Image Quality',
-                'message': 'Add more high-quality images of the property.',
-                'priority': 'low'
-            },
-            'market value adjustment': {
-                'title': 'Market Value Review',
-                'message': 'Review and adjust market value based on current market conditions.',
-                'priority': 'high'
-            },
-            'contact information update': {
-                'title': 'Update Contact Information',
-                'message': 'Ensure contact information is complete and up-to-date.',
-                'priority': 'low'
             }
-        }
-        return details.get(suggestion_type, {
-            'title': 'General Suggestion',
-            'message': 'Review property listing for improvements.',
-            'priority': 'medium'
-        })
     except Exception as e:
         logger.error(f"Error generating suggestion details: {str(e)}")
-        return {
-            'title': 'Error',
-            'message': 'Could not generate detailed suggestion.',
-            'priority': 'low'
-        }

             'improvements': [],
             'warnings': [],
             'recommendations': [],
+            'confidence': 0.0,
+            'model_used': 'static_fallback'
         }
         # Load model for analysis
         try:
+            classifier = load_model("zero-shot-classification")  # Use standard model instead of typeform
         except Exception as e:
             logger.error(f"Error loading model in suggestions: {str(e)}")
             suggestions['warnings'].append({'type': 'error', 'confidence': 0.0, 'details': {'title': 'Model Error', 'message': f'Model loading error: {str(e)}', 'priority': 'high'}})
         # Analyze text with context
         context = f"{text} property_data:{str(data) if data else ''}"
         try:
+            result = classifier(context[:1000], categories, multi_label=True)
+            # Process results and generate suggestions
+            for label, score in zip(result['labels'], result['scores']):
+                if score > 0.3:  # Only include if confidence is above 30%
+                    suggestion_details = generate_suggestion_details(label, text, data)
+                    if suggestion_details:
+                        if 'improvement' in label.lower():
+                            suggestions['improvements'].append(suggestion_details)
+                        elif 'warning' in label.lower() or 'issue' in label.lower():
+                            suggestions['warnings'].append(suggestion_details)
+                        else:
+                            suggestions['recommendations'].append(suggestion_details)
+            # Calculate overall confidence
+            if result['scores']:
+                suggestions['confidence'] = max(result['scores'])
+            suggestions['model_used'] = getattr(classifier, 'fallback_model', 'primary_model')
         except Exception as e:
+            logger.error(f"Error in suggestions analysis: {str(e)}")
+            suggestions['warnings'].append({'type': 'error', 'confidence': 0.0, 'details': {'title': 'Analysis Error', 'message': f'Analysis error: {str(e)}', 'priority': 'medium'}})
         return suggestions
     except Exception as e:
         logger.error(f"Error generating suggestions: {str(e)}")
         return {
             'improvements': [],
+            'warnings': [{'type': 'error', 'confidence': 0.0, 'details': {'title': 'System Error', 'message': f'System error: {str(e)}', 'priority': 'high'}}],
             'recommendations': [],
             'confidence': 0.0,
+            'model_used': 'static_fallback'
         }
 def generate_suggestion_details(suggestion_type, text, data):
+    """Generate detailed suggestions based on type"""
     try:
+        if 'description improvement' in suggestion_type.lower():
+            return {
+                'type': 'description_improvement',
+                'confidence': 0.8,
+                'details': {
+                    'title': 'Improve Property Description',
+                    'message': 'Add more details about amenities, location benefits, and unique features.',
+                    'priority': 'medium',
+                    'suggestions': [
+                        'Include nearby landmarks and transportation',
+                        'Describe interior features and finishes',
+                        'Mention parking and security features',
+                        'Add information about neighborhood'
+                    ]
+                }
+            }
+        elif 'price adjustment' in suggestion_type.lower():
+            return {
+                'type': 'price_adjustment',
+                'confidence': 0.7,
+                'details': {
+                    'title': 'Review Property Price',
+                    'message': 'Consider adjusting the price based on market conditions and property features.',
+                    'priority': 'high',
+                    'suggestions': [
+                        'Compare with similar properties in the area',
+                        'Consider current market trends',
+                        'Factor in property condition and age',
+                        'Include all amenities in pricing'
+                    ]
+                }
+            }
+        elif 'documentation required' in suggestion_type.lower():
+            return {
+                'type': 'documentation_required',
+                'confidence': 0.9,
+                'details': {
+                    'title': 'Additional Documentation Needed',
+                    'message': 'Provide more documents to increase property verification.',
+                    'priority': 'high',
+                    'suggestions': [
+                        'Upload property title documents',
+                        'Include recent utility bills',
+                        'Add property tax receipts',
+                        'Provide floor plan or layout'
+                    ]
+                }
+            }
+        elif 'verification needed' in suggestion_type.lower():
+            return {
+                'type': 'verification_needed',
+                'confidence': 0.8,
+                'details': {
+                    'title': 'Property Verification Required',
+                    'message': 'Additional verification steps needed for property authenticity.',
+                    'priority': 'high',
+                    'suggestions': [
+                        'Verify property ownership',
+                        'Check for any legal disputes',
+                        'Confirm property dimensions',
+                        'Validate address details'
+                    ]
+                }
+            }
+        else:
+            return {
+                'type': 'general_suggestion',
+                'confidence': 0.6,
+                'details': {
+                    'title': 'General Improvement',
+                    'message': 'Consider improving overall property listing quality.',
+                    'priority': 'medium',
+                    'suggestions': [
+                        'Add more high-quality images',
+                        'Include detailed specifications',
+                        'Provide contact information',
+                        'Update property status regularly'
+                    ]
+                }
             }
     except Exception as e:
         logger.error(f"Error generating suggestion details: {str(e)}")
+        return None

models/text_quality.py CHANGED Viewed

@@ -11,10 +11,11 @@ def assess_text_quality(text):
                 'score': 0,
                 'reasoning': 'Text too short.',
                 'is_ai_generated': False,
-                'quality_metrics': {}
             }
         try:
-            classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")
         except Exception as e:
             logger.error(f"Error loading model in text quality: {str(e)}")
             return {
@@ -23,7 +24,8 @@ def assess_text_quality(text):
                 'reasoning': f'Model loading error: {str(e)}',
                 'is_ai_generated': False,
                 'quality_metrics': {},
-                'top_classifications': []
             }
         # Enhanced quality categories with more specific indicators
@@ -52,79 +54,59 @@ def assess_text_quality(text):
                     'confidence': float(score)
                 })
-        # AI generation detection with multiple models
-        ai_check = classifier(text[:1000], ["human-written", "AI-generated", "template-based", "authentic"])
-        is_ai_generated = (
-            (ai_check['labels'][0] == "AI-generated" and ai_check['scores'][0] > 0.6) or
-            (ai_check['labels'][0] == "template-based" and ai_check['scores'][0] > 0.7)
-        )
-        # Calculate quality metrics
-        quality_metrics = {
-            'detail_level': sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
-                              if label in ['detailed and informative', 'adequately detailed']),
-            'professionalism': sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
-                                 if label in ['professional listing', 'authentic description']),
-            'clarity': sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
-                         if label not in ['vague description', 'misleading content', 'spam-like content']),
-            'authenticity': 1.0 - sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
-                                    if label in ['template-based content', 'spam-like content'])
-        }
-        # Calculate overall score with weighted metrics
-        weights = {
-            'detail_level': 0.3,
-            'professionalism': 0.25,
-            'clarity': 0.25,
-            'authenticity': 0.2
-        }
-        score = sum(metric * weights[metric_name] for metric_name, metric in quality_metrics.items())
-        score = score * 100  # Convert to percentage
-        # Adjust score for AI-generated content
-        if is_ai_generated:
-            score = score * 0.7  # Reduce score by 30% for AI-generated content
-        # Generate detailed reasoning
-        reasoning_parts = []
-        if top_classifications:
-            primary_class = top_classifications[0]['classification']
-            reasoning_parts.append(f"Primary assessment: {primary_class}")
-        if quality_metrics['detail_level'] > 0.7:
-            reasoning_parts.append("Contains comprehensive details")
-        elif quality_metrics['detail_level'] > 0.4:
-            reasoning_parts.append("Contains adequate details")
         else:
-            reasoning_parts.append("Lacks important details")
-        if quality_metrics['professionalism'] > 0.7:
-            reasoning_parts.append("Professional listing style")
-        elif quality_metrics['professionalism'] < 0.4:
-            reasoning_parts.append("Amateur listing style")
-        if quality_metrics['clarity'] < 0.5:
-            reasoning_parts.append("Content clarity issues detected")
-        if is_ai_generated:
-            reasoning_parts.append("Content appears to be AI-generated")
         return {
-            'assessment': top_classifications[0]['classification'] if top_classifications else 'could not assess',
-            'score': int(score),
-            'reasoning': '. '.join(reasoning_parts),
             'is_ai_generated': is_ai_generated,
-            'quality_metrics': quality_metrics,
-            'top_classifications': top_classifications
         }
     except Exception as e:
-        logger.error(f"Error assessing text quality: {str(e)}")
         return {
-            'assessment': 'could not assess',
-            'score': 50,
-            'reasoning': 'Technical error.',
             'is_ai_generated': False,
             'quality_metrics': {},
-            'top_classifications': []
         }

                 'score': 0,
                 'reasoning': 'Text too short.',
                 'is_ai_generated': False,
+                'quality_metrics': {},
+                'model_used': 'static_fallback'
             }
         try:
+            classifier = load_model("zero-shot-classification")  # Use standard model instead of typeform
         except Exception as e:
             logger.error(f"Error loading model in text quality: {str(e)}")
             return {
                 'reasoning': f'Model loading error: {str(e)}',
                 'is_ai_generated': False,
                 'quality_metrics': {},
+                'top_classifications': [],
+                'model_used': 'static_fallback'
             }
         # Enhanced quality categories with more specific indicators
                     'confidence': float(score)
                 })
+        # Calculate overall quality score
+        positive_categories = ["detailed and informative", "adequately detailed", "professional listing", "authentic description"]
+        negative_categories = ["vague description", "misleading content", "amateur listing", "spam-like content", "template-based content"]
+        positive_score = sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
+                           if label in positive_categories)
+        negative_score = sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
+                           if label in negative_categories)
+        # Calculate final score (0-100)
+        quality_score = max(0, min(100, int((positive_score - negative_score + 1) * 50)))
+        # Determine assessment
+        if quality_score >= 80:
+            assessment = 'excellent'
+        elif quality_score >= 60:
+            assessment = 'good'
+        elif quality_score >= 40:
+            assessment = 'adequate'
+        elif quality_score >= 20:
+            assessment = 'poor'
         else:
+            assessment = 'very poor'
+        # Simple AI detection (basic heuristic)
+        is_ai_generated = len(text) > 500 and (
+            'beautiful' in text.lower() and 'excellent' in text.lower() and 'prime' in text.lower() or
+            text.count('.') > 10 and len(text.split()) > 100
+        )
         return {
+            'assessment': assessment,
+            'score': quality_score,
+            'reasoning': f'Quality score: {quality_score}/100 based on {len(top_classifications)} classifications.',
             'is_ai_generated': is_ai_generated,
+            'quality_metrics': {
+                'text_length': len(text),
+                'word_count': len(text.split()),
+                'sentence_count': text.count('.') + text.count('!') + text.count('?'),
+                'positive_score': positive_score,
+                'negative_score': negative_score
+            },
+            'top_classifications': top_classifications,
+            'model_used': getattr(classifier, 'fallback_model', 'primary_model')
         }
     except Exception as e:
+        logger.error(f"Error in text quality assessment: {str(e)}")
         return {
+            'assessment': 'error',
+            'score': 0,
+            'reasoning': f'Error: {str(e)}',
             'is_ai_generated': False,
             'quality_metrics': {},
+            'top_classifications': [],
+            'model_used': 'static_fallback'
         }

models/trust_score.py CHANGED Viewed

@@ -5,133 +5,63 @@ from .logging_config import logger
 def generate_trust_score(text, image_analysis, pdf_analysis):
     try:
-        try:
-            classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")
-        except Exception as e:
-            logger.error(f"Error loading model in trust score: {str(e)}")
-            return 35, f"Model loading error: {str(e)}"
-        aspects = [
-            "complete information provided",
-            "verified location",
-            "consistent data",
-            "authentic documents",
-            "authentic images",
-            "reasonable pricing",
-            "verified ownership",
-            "proper documentation"
-        ]
-        try:
-            result = classifier(str(text)[:1000], aspects, multi_label=True)
-        except Exception as e:
-            logger.error(f"Error in trust score model inference: {str(e)}")
-            return 35, f"Model inference error: {str(e)}"
-        # More balanced weights
-        weights = {
-            "complete information provided": 0.20,
-            "verified location": 0.20,
-            "consistent data": 0.15,
-            "authentic documents": 0.15,
-            "authentic images": 0.10,
-            "reasonable pricing": 0.10,
-            "verified ownership": 0.05,
-            "proper documentation": 0.05
-        }
-        score = 0
         reasoning_parts = []
-        # More reasonable scoring for each aspect
-        for label, confidence in zip(result['labels'], result['scores']):
-            adjusted_confidence = confidence
-            # Document verification
-            if label == "authentic documents":
-                if not pdf_analysis or len(pdf_analysis) == 0:
-                    adjusted_confidence = 0.3  # Base score for no documents
-                else:
-                    doc_scores = [p.get('verification_score', 0) for p in pdf_analysis]
-                    adjusted_confidence = sum(doc_scores) / max(1, len(doc_scores))
-                    # Moderate penalty for low verification scores
-                    if any(score < 0.5 for score in doc_scores):
-                        adjusted_confidence *= 0.7
-                    # Small penalty for missing documents
-                    if len(doc_scores) < 2:
-                        adjusted_confidence *= 0.8
-            # Image verification
-            elif label == "authentic images":
-                if not image_analysis or len(image_analysis) == 0:
-                    adjusted_confidence = 0.3  # Base score for no images
-                else:
-                    img_scores = [i.get('authenticity_score', 0) for i in image_analysis]
-                    adjusted_confidence = sum(img_scores) / max(1, len(img_scores))
-                    # Moderate penalty for low authenticity scores
-                    if any(score < 0.6 for score in img_scores):
-                        adjusted_confidence *= 0.7
-                    # Small penalty for AI-generated images
-                    if any(i.get('is_ai_generated', False) for i in image_analysis):
-                        adjusted_confidence *= 0.8
-                    # Small penalty for non-property related images
-                    if any(not i.get('is_property_related', False) for i in image_analysis):
-                        adjusted_confidence *= 0.8
-            # Consistency check
-            elif label == "consistent data":
-                # Check for inconsistencies in the data
-                if "inconsistent" in text.lower() or "suspicious" in text.lower():
-                    adjusted_confidence *= 0.6
-                # Check for impossible values
-                if "impossible" in text.lower() or "invalid" in text.lower():
-                    adjusted_confidence *= 0.5
-                # Check for missing critical information
-                if "missing" in text.lower() or "not provided" in text.lower():
-                    adjusted_confidence *= 0.7
-            # Completeness check
-            elif label == "complete information provided":
-                # Check for missing critical information
-                if len(text) < 200 or "not provided" in text.lower() or "missing" in text.lower():
-                    adjusted_confidence *= 0.7
-                # Check for vague or generic descriptions
-                if "generic" in text.lower() or "vague" in text.lower():
-                    adjusted_confidence *= 0.8
-                # Check for suspiciously short descriptions
-                if len(text) < 100:
-                    adjusted_confidence *= 0.6
-            score += adjusted_confidence * weights.get(label, 0.1)
-            reasoning_parts.append(f"{label} ({adjusted_confidence:.0%})")
-        # Apply moderate penalties for suspicious patterns
-        if "suspicious" in text.lower() or "fraudulent" in text.lower():
-            score *= 0.7
-        # Apply moderate penalties for suspiciously low values
-        if "suspiciously low" in text.lower() or "unusually small" in text.lower():
-            score *= 0.8
-        # Apply moderate penalties for inconsistencies
-        if "inconsistent" in text.lower() or "mismatch" in text.lower():
-            score *= 0.8
-        # Apply moderate penalties for missing critical information
-        if "missing critical" in text.lower() or "incomplete" in text.lower():
-            score *= 0.8
-        # Ensure minimum score for any valid data
-        if score < 0.1:
-            score = 0.1  # Minimum 10% score for any data
-        # Ensure score is between 0 and 100
-        score = min(100, max(0, int(score * 100)))
-        # Ensure minimum score of 25% for any valid data
-        if score < 25:
-            score = 25
-        reasoning = f"Based on: {', '.join(reasoning_parts)}"
-        return score, reasoning
     except Exception as e:
-        logger.error(f"Error generating trust score: {str(e)}")
-        return 35, "Could not assess trust."

 def generate_trust_score(text, image_analysis, pdf_analysis):
     try:
+        # Use a simpler approach to avoid timeouts
+        trust_score = 50.0  # Start with neutral score
         reasoning_parts = []
+        # Simple text-based trust indicators
+        text_lower = str(text).lower()
+        # Positive indicators
+        positive_indicators = [
+            'verified', 'authentic', 'genuine', 'real', 'legitimate',
+            'complete', 'detailed', 'professional', 'official', 'certified'
+        ]
+        # Negative indicators
+        negative_indicators = [
+            'fake', 'scam', 'fraud', 'suspicious', 'unverified',
+            'incomplete', 'missing', 'unclear', 'doubtful', 'questionable'
+        ]
+        # Count positive and negative indicators
+        positive_count = sum(1 for indicator in positive_indicators if indicator in text_lower)
+        negative_count = sum(1 for indicator in negative_indicators if indicator in text_lower)
+        # Adjust score based on indicators
+        if positive_count > 0:
+            trust_score += min(20, positive_count * 5)
+            reasoning_parts.append(f"Found {positive_count} positive trust indicators")
+        if negative_count > 0:
+            trust_score -= min(30, negative_count * 10)
+            reasoning_parts.append(f"Found {negative_count} negative trust indicators")
+        # Image analysis contribution
+        if image_analysis:
+            image_count = len(image_analysis) if isinstance(image_analysis, list) else 1
+            if image_count > 0:
+                trust_score += min(15, image_count * 3)
+                reasoning_parts.append(f"Property has {image_count} images")
+        # PDF analysis contribution
+        if pdf_analysis:
+            pdf_count = len(pdf_analysis) if isinstance(pdf_analysis, list) else 1
+            if pdf_count > 0:
+                trust_score += min(15, pdf_count * 5)
+                reasoning_parts.append(f"Property has {pdf_count} documents")
+        # Ensure score is within bounds
+        trust_score = max(0, min(100, trust_score))
+        # Create reasoning
+        if reasoning_parts:
+            reasoning = ". ".join(reasoning_parts) + "."
+        else:
+            reasoning = "Basic trust assessment completed."
+        return trust_score, reasoning
     except Exception as e:
+        logger.error(f"Error in trust score generation: {str(e)}")
+        return 35.0, f"Trust analysis failed: {str(e)}"

templates/index.html CHANGED Viewed

@@ -1951,18 +1951,36 @@
             const propertyForm = document.getElementById('propertyForm');
             const loadingIndicator = document.getElementById('loadingIndicator');
             const resultsContainer = document.getElementById('resultsContainer');
             loadingIndicator.style.display = 'block';
             resultsContainer.style.display = 'none';
             const formData = new FormData(propertyForm);
             // Add images and PDFs from preview arrays if needed
             fetch('/verify', {
                 method: 'POST',
                 body: formData
             })
-            .then(response => response.json())
             .then(data => {
                 loadingIndicator.style.display = 'none';
                 if (data.status === 'error' || data.error) {
                     showError(data.error || 'An error occurred. Please check your input and try again.');
                     return;
                 }
@@ -1970,6 +1988,7 @@
                 resultsContainer.style.display = 'block';
             })
             .catch(error => {
                 loadingIndicator.style.display = 'none';
                 showError('Server error: ' + (error.message || error));
             });
@@ -2444,45 +2463,292 @@
                 documentDiv.innerHTML = '';
                 if (data.document_analysis && data.document_analysis.pdf_count > 0) {
-                    documentDiv.innerHTML = `<p><strong>Documents Analyzed:</strong> ${data.document_analysis.pdf_count}</p>`;
-                    data.document_analysis.pdf_analysis.forEach((pdf, index) => {
-                        documentDiv.innerHTML += `
-                            <div class="pdf-preview">
-                                <p><strong>Document ${index + 1}</strong></p>
-                                <p><strong>Type:</strong> ${pdf.document_type.classification} (${Math.round(pdf.document_type.confidence * 100)}% confidence)</p>
-                                <p><strong>Authenticity:</strong> ${pdf.authenticity.assessment} (${Math.round(pdf.authenticity.confidence * 100)}% confidence)</p>
-                                <p><strong>Summary:</strong> ${pdf.summary}</p>
-                                <p><strong>Contains Signatures:</strong> ${pdf.contains_signatures ? 'Yes' : 'No'}</p>
-                                <p><strong>Contains Dates:</strong> ${pdf.contains_dates ? 'Yes' : 'No'}</p>
-                            </div>
-                        `;
-                    });
-                    // Update Document Chart
                     let authenticCount = 0;
                     let suspiciousCount = 0;
                     let incompleteCount = 0;
-                    data.document_analysis.pdf_analysis.forEach(pdf => {
-                        if (pdf.authenticity.assessment.includes('authentic')) {
                             authenticCount++;
-                        } else if (pdf.authenticity.assessment.includes('fraudulent')) {
                             suspiciousCount++;
                         } else {
                             incompleteCount++;
                         }
                     });
                     updateChart(documentChart, {
                         datasets: [{
-                            data: [
-                                authenticCount,
-                                suspiciousCount,
-                                incompleteCount
-                            ]
                         }]
                     });
                 } else {
                     documentDiv.innerHTML = '<p>No documents were uploaded for analysis.</p>';
                 }
@@ -2505,46 +2771,160 @@
                     nonRealEstateContainer.innerHTML = '<h4>Non-Real Estate Images</h4>';
                     let propertyRelatedCount = 0;
                     data.image_analysis.image_analysis.forEach((img, index) => {
                         if (img && img.is_property_related) {
                             propertyRelatedCount++;
                         }
                     });
                     imageAnalysisDiv.innerHTML = `
                         <div class="analysis-summary">
-                            <p><strong>Total Images Analyzed:</strong> ${data.image_analysis.image_count}</p>
-                            <p><strong>Property-Related Images:</strong> ${propertyRelatedCount} of ${data.image_analysis.image_count}</p>
                         </div>
                     `;
-                    // Display images in appropriate containers
                     data.images.forEach((imgData, index) => {
                         const imgAnalysis = data.image_analysis.image_analysis[index];
                         const galleryItem = document.createElement('div');
                         galleryItem.className = 'gallery-item';
-                        // Create image container with label
                         const imageContainer = document.createElement('div');
                         imageContainer.className = 'image-container';
-                        // Add the image
                         imageContainer.innerHTML = `
                             <img src="data:image/jpeg;base64,${imgData}" alt="Property Image ${index + 1}">
                             <div class="image-overlay">
-                                ${imgAnalysis && imgAnalysis.is_property_related ?
-                                    `<div class="image-label">${imgAnalysis.predicted_label || 'Property Image'}</div>` :
-                                    '<div class="image-label">Non-Property Image</div>'}
                             </div>
                         `;
-                        galleryItem.appendChild(imageContainer);
                         // Add to appropriate container based on classification
                         if (imgAnalysis && imgAnalysis.is_property_related) {
-                            realEstateContainer.appendChild(galleryItem);
                         } else {
-                            nonRealEstateContainer.appendChild(galleryItem);
                         }
                     });
@@ -2552,7 +2932,7 @@
                     imageGallery.appendChild(realEstateContainer);
                     imageGallery.appendChild(nonRealEstateContainer);
-                    // Add some CSS for the new image display
                     const style = document.createElement('style');
                     style.textContent = `
                         .image-section {
@@ -2561,29 +2941,159 @@
                         .image-section h4 {
                             margin-bottom: 15px;
                             color: var(--primary);
                         }
                         .image-container {
                             position: relative;
                             width: 100%;
                             height: 100%;
                         }
                         .image-overlay {
                             position: absolute;
                             bottom: 0;
                             left: 0;
                             right: 0;
-                            background: rgba(0, 0, 0, 0.7);
-                            padding: 8px;
                             color: white;
-                            text-align: center;
                         }
                         .image-label {
                             font-size: 0.9rem;
                             font-weight: 500;
                         }
-                        .gallery-item {
-                            position: relative;
-                            margin-bottom: 20px;
                         }
                     `;
                     document.head.appendChild(style);
@@ -2774,7 +3284,7 @@
                         pdf => pdf.verification_score || 0
                     );
                     docScore = verificationScores.length > 0 ?
-                        Math.round((verificationScores.reduce((a, b) => a + b, 0) / verificationScores.length) * 100) : 0;
                 }
                 updateScoreBar('documentBar', 'documentValue', docScore);

             const propertyForm = document.getElementById('propertyForm');
             const loadingIndicator = document.getElementById('loadingIndicator');
             const resultsContainer = document.getElementById('resultsContainer');
+            console.log("🚀 Starting form submission...");
             loadingIndicator.style.display = 'block';
             resultsContainer.style.display = 'none';
             const formData = new FormData(propertyForm);
+            // Debug: Log form data
+            console.log("📋 Form data being sent:");
+            for (let [key, value] of formData.entries()) {
+                console.log(`  ${key}: ${value}`);
+            }
+            console.log("🌐 Making request to /verify endpoint...");
             // Add images and PDFs from preview arrays if needed
             fetch('/verify', {
                 method: 'POST',
                 body: formData
             })
+            .then(response => {
+                console.log("📡 Response received:", response.status, response.statusText);
+                return response.json();
+            })
             .then(data => {
+                console.log("✅ Data received:", data);
                 loadingIndicator.style.display = 'none';
                 if (data.status === 'error' || data.error) {
+                    console.error("❌ Server error:", data.error);
                     showError(data.error || 'An error occurred. Please check your input and try again.');
                     return;
                 }
                 resultsContainer.style.display = 'block';
             })
             .catch(error => {
+                console.error("❌ Fetch error:", error);
                 loadingIndicator.style.display = 'none';
                 showError('Server error: ' + (error.message || error));
             });
                 documentDiv.innerHTML = '';
                 if (data.document_analysis && data.document_analysis.pdf_count > 0) {
+                    // Calculate summary statistics
+                    let totalVerificationScore = 0;
                     let authenticCount = 0;
                     let suspiciousCount = 0;
                     let incompleteCount = 0;
+                    let totalConfidence = 0;
+                    let totalDocumentConfidence = 0;
+                    let totalAuthenticityConfidence = 0;
+                    let documentsWithSignatures = 0;
+                    let documentsWithDates = 0;
+                    let propertyRelatedCount = 0;
+                    data.document_analysis.pdf_analysis.forEach((pdf, index) => {
+                        if (pdf) {
+                            totalVerificationScore += pdf.verification_score || 0;
+                            totalConfidence += pdf.confidence || 0;
+                            totalDocumentConfidence += pdf.document_confidence || 0;
+                            totalAuthenticityConfidence += pdf.authenticity_confidence || 0;
+                            if (pdf.contains_signatures) documentsWithSignatures++;
+                            if (pdf.contains_dates) documentsWithDates++;
+                            if (pdf.is_property_related) propertyRelatedCount++;
+                            if (pdf.authenticity_assessment && pdf.authenticity_assessment.toLowerCase().includes('authentic')) {
                             authenticCount++;
+                            } else if (pdf.authenticity_assessment && pdf.authenticity_assessment.toLowerCase().includes('suspicious')) {
                             suspiciousCount++;
                         } else {
                             incompleteCount++;
                         }
+                        }
                     });
+                    const avgVerificationScore = data.document_analysis.pdf_count > 0 ? (totalVerificationScore / data.document_analysis.pdf_count).toFixed(1) : 0;
+                    const avgConfidence = data.document_analysis.pdf_count > 0 ? (totalConfidence / data.document_analysis.pdf_count * 100).toFixed(1) : 0;
+                    const avgDocumentConfidence = data.document_analysis.pdf_count > 0 ? (totalDocumentConfidence / data.document_analysis.pdf_count * 100).toFixed(1) : 0;
+                    const avgAuthenticityConfidence = data.document_analysis.pdf_count > 0 ? (totalAuthenticityConfidence / data.document_analysis.pdf_count * 100).toFixed(1) : 0;
+                    // Create summary section
+                    documentDiv.innerHTML = `
+                        <div class="document-summary">
+                            <h4>Document Analysis Summary</h4>
+                            <div class="summary-grid">
+                                <div class="summary-item">
+                                    <h5>Total Documents</h5>
+                                    <p class="summary-value">${data.document_analysis.pdf_count}</p>
+                                </div>
+                                <div class="summary-item">
+                                    <h5>Property Related</h5>
+                                    <p class="summary-value">${propertyRelatedCount} of ${data.document_analysis.pdf_count}</p>
+                                </div>
+                                <div class="summary-item">
+                                    <h5>Avg Verification Score</h5>
+                                    <p class="summary-value">${avgVerificationScore}%</p>
+                                </div>
+                                <div class="summary-item">
+                                    <h5>Avg Confidence</h5>
+                                    <p class="summary-value">${avgConfidence}%</p>
+                                </div>
+                                <div class="summary-item">
+                                    <h5>With Signatures</h5>
+                                    <p class="summary-value">${documentsWithSignatures}</p>
+                                </div>
+                                <div class="summary-item">
+                                    <h5>With Dates</h5>
+                                    <p class="summary-value">${documentsWithDates}</p>
+                                </div>
+                            </div>
+                        </div>
+                    `;
+                    // Create detailed analysis for each document
+                    const documentsContainer = document.createElement('div');
+                    documentsContainer.className = 'documents-container';
+                    documentsContainer.innerHTML = '<h4>Detailed Document Analysis</h4>';
+                    data.document_analysis.pdf_analysis.forEach((pdf, index) => {
+                        if (pdf) {
+                            const documentCard = document.createElement('div');
+                            documentCard.className = 'document-card';
+                            // Create key information display
+                            let keyInfoHtml = '';
+                            if (pdf.key_info && Object.keys(pdf.key_info).length > 0) {
+                                keyInfoHtml = '<div class="key-info-section"><h6>Key Information:</h6><ul>';
+                                Object.entries(pdf.key_info).forEach(([key, value]) => {
+                                    if (Array.isArray(value)) {
+                                        keyInfoHtml += `<li><strong>${key}:</strong> ${value.slice(0, 3).join(', ')}${value.length > 3 ? '...' : ''}</li>`;
+                                    } else {
+                                        keyInfoHtml += `<li><strong>${key}:</strong> ${value}</li>`;
+                                    }
+                                });
+                                keyInfoHtml += '</ul></div>';
+                            }
+                            // Create real estate indicators display
+                            let indicatorsHtml = '';
+                            if (pdf.real_estate_indicators && pdf.real_estate_indicators.length > 0) {
+                                indicatorsHtml = '<div class="indicators-section"><h6>Real Estate Indicators:</h6><ul>';
+                                pdf.real_estate_indicators.slice(0, 5).forEach(indicator => {
+                                    indicatorsHtml += `<li>${indicator}</li>`;
+                                });
+                                if (pdf.real_estate_indicators.length > 5) {
+                                    indicatorsHtml += `<li>... and ${pdf.real_estate_indicators.length - 5} more</li>`;
+                                }
+                                indicatorsHtml += '</ul></div>';
+                            }
+                            // Create legal terms display
+                            let legalTermsHtml = '';
+                            if (pdf.legal_terms_found && pdf.legal_terms_found.length > 0) {
+                                legalTermsHtml = '<div class="legal-terms-section"><h6>Legal Terms Found:</h6><ul>';
+                                pdf.legal_terms_found.slice(0, 5).forEach(term => {
+                                    legalTermsHtml += `<li>${term}</li>`;
+                                });
+                                if (pdf.legal_terms_found.length > 5) {
+                                    legalTermsHtml += `<li>... and ${pdf.legal_terms_found.length - 5} more</li>`;
+                                }
+                                legalTermsHtml += '</ul></div>';
+                            }
+                            // Create keyword analysis display
+                            let keywordAnalysisHtml = '';
+                            if (pdf.keyword_analysis && Object.keys(pdf.keyword_analysis).length > 0) {
+                                keywordAnalysisHtml = '<div class="keyword-analysis-section"><h6>Keyword Analysis:</h6><ul>';
+                                Object.entries(pdf.keyword_analysis).forEach(([category, count]) => {
+                                    keywordAnalysisHtml += `<li><strong>${category}:</strong> ${count} matches</li>`;
+                                });
+                                keywordAnalysisHtml += '</ul></div>';
+                            }
+                            documentCard.innerHTML = `
+                                <div class="document-header">
+                                    <h5>Document ${index + 1}</h5>
+                                    <span class="status-badge ${pdf.is_property_related ? 'success' : 'warning'}">
+                                        ${pdf.is_property_related ? 'Property Related' : 'Non-Property'}
+                                    </span>
+                                </div>
+                                <div class="document-metrics">
+                                    <div class="metric">
+                                        <span class="metric-label">Document Type:</span>
+                                        <span class="metric-value">${typeof pdf.document_type === 'object' ? (pdf.document_type.classification || 'Unknown') : (pdf.document_type || 'Unknown')}</span>
+                                    </div>
+                                    <div class="metric">
+                                        <span class="metric-label">Type Confidence:</span>
+                                        <span class="metric-value">${Math.round((typeof pdf.document_type === 'object' ? (pdf.document_type.confidence || 0) : (pdf.document_confidence || 0)) * 100)}%</span>
+                                    </div>
+                                    <div class="metric">
+                                        <span class="metric-label">Authenticity:</span>
+                                        <span class="metric-value">${typeof pdf.authenticity === 'object' ? (pdf.authenticity.assessment || 'Unknown') : (pdf.authenticity_assessment || 'Unknown')}</span>
+                                    </div>
+                                    <div class="metric">
+                                        <span class="metric-label">Auth Confidence:</span>
+                                        <span class="metric-value">${Math.round((typeof pdf.authenticity === 'object' ? (pdf.authenticity.confidence || 0) : (pdf.authenticity_confidence || 0)) * 100)}%</span>
+                                    </div>
+                                    <div class="metric">
+                                        <span class="metric-label">Verification Score:</span>
+                                        <span class="metric-value">${Math.round(pdf.verification_score || 0)}%</span>
+                                    </div>
+                                    <div class="metric">
+                                        <span class="metric-label">Overall Confidence:</span>
+                                        <span class="metric-value">${Math.round((pdf.confidence || 0) * 100)}%</span>
+                                    </div>
+                                </div>
+                                <div class="document-details">
+                                    <div class="detail-section">
+                                        <h6>Summary:</h6>
+                                        <p>${pdf.summary || 'No summary available'}</p>
+                                    </div>
+                                    <div class="detail-section">
+                                        <h6>Document Features:</h6>
+                                        <ul>
+                                            <li><strong>Contains Signatures:</strong> ${pdf.contains_signatures ? 'Yes' : 'No'}</li>
+                                            <li><strong>Contains Dates:</strong> ${pdf.contains_dates ? 'Yes' : 'No'}</li>
+                                        </ul>
+                                    </div>
+                                    ${keyInfoHtml}
+                                    ${indicatorsHtml}
+                                    ${legalTermsHtml}
+                                    ${keywordAnalysisHtml}
+                                </div>
+                                <div class="model-info">
+                                    <small>Model: ${pdf.model_used || 'Static Analysis'}</small>
+                                </div>
+                            `;
+                            documentsContainer.appendChild(documentCard);
+                        }
+                    });
+                    documentDiv.appendChild(documentsContainer);
+                    // Update Document Chart
                     updateChart(documentChart, {
+                        labels: ['Authentic', 'Suspicious', 'Incomplete'],
                         datasets: [{
+                            data: [authenticCount, suspiciousCount, incompleteCount],
+                            backgroundColor: ['#28a745', '#ffc107', '#dc3545']
                         }]
                     });
+                    // Add CSS for document analysis
+                    const style = document.createElement('style');
+                    style.textContent = `
+                        .document-summary {
+                            margin-bottom: 30px;
+                        }
+                        .document-summary h4 {
+                            color: var(--primary);
+                            border-bottom: 2px solid var(--primary);
+                            padding-bottom: 5px;
+                            margin-bottom: 20px;
+                        }
+                        .documents-container {
+                            margin-top: 30px;
+                        }
+                        .documents-container h4 {
+                            color: var(--primary);
+                            margin-bottom: 20px;
+                        }
+                        .document-card {
+                            background: white;
+                            border-radius: var(--border-radius);
+                            box-shadow: var(--box-shadow);
+                            padding: 20px;
+                            margin-bottom: 20px;
+                        }
+                        .document-header {
+                            display: flex;
+                            justify-content: space-between;
+                            align-items: center;
+                            margin-bottom: 15px;
+                        }
+                        .document-header h5 {
+                            margin: 0;
+                            color: var(--dark);
+                        }
+                        .document-metrics {
+                            display: grid;
+                            grid-template-columns: repeat(2, 1fr);
+                            gap: 10px;
+                            margin-bottom: 20px;
+                        }
+                        .document-details {
+                            margin-bottom: 15px;
+                        }
+                        .detail-section {
+                            margin-bottom: 15px;
+                        }
+                        .detail-section h6 {
+                            color: var(--dark);
+                            margin-bottom: 8px;
+                        }
+                        .detail-section p {
+                            color: var(--gray);
+                            margin: 0;
+                            line-height: 1.5;
+                        }
+                        .detail-section ul {
+                            list-style: none;
+                            padding: 0;
+                            margin: 0;
+                        }
+                        .detail-section li {
+                            padding: 4px 0;
+                            color: var(--gray);
+                        }
+                        .key-info-section, .indicators-section, .legal-terms-section, .keyword-analysis-section {
+                            margin-bottom: 15px;
+                        }
+                        .key-info-section h6, .indicators-section h6, .legal-terms-section h6, .keyword-analysis-section h6 {
+                            color: var(--dark);
+                            margin-bottom: 8px;
+                        }
+                        .key-info-section ul, .indicators-section ul, .legal-terms-section ul, .keyword-analysis-section ul {
+                            list-style: none;
+                            padding: 0;
+                            margin: 0;
+                        }
+                        .key-info-section li, .indicators-section li, .legal-terms-section li, .keyword-analysis-section li {
+                            padding: 4px 0;
+                            font-size: 0.85rem;
+                            color: var(--gray);
+                        }
+                    `;
+                    document.head.appendChild(style);
                 } else {
                     documentDiv.innerHTML = '<p>No documents were uploaded for analysis.</p>';
                 }
                     nonRealEstateContainer.innerHTML = '<h4>Non-Real Estate Images</h4>';
                     let propertyRelatedCount = 0;
+                    let totalConfidence = 0;
+                    let totalRealEstateConfidence = 0;
+                    let totalAuthenticityScore = 0;
+                    let aiGeneratedCount = 0;
                     data.image_analysis.image_analysis.forEach((img, index) => {
                         if (img && img.is_property_related) {
                             propertyRelatedCount++;
                         }
+                        if (img) {
+                            totalConfidence += img.confidence || 0;
+                            totalRealEstateConfidence += img.real_estate_confidence || 0;
+                            totalAuthenticityScore += img.authenticity_score || 0;
+                            if (img.is_ai_generated) {
+                                aiGeneratedCount++;
+                            }
+                        }
                     });
+                    const avgConfidence = data.image_analysis.image_count > 0 ? (totalConfidence / data.image_analysis.image_count * 100).toFixed(1) : 0;
+                    const avgRealEstateConfidence = data.image_analysis.image_count > 0 ? (totalRealEstateConfidence / data.image_analysis.image_count * 100).toFixed(1) : 0;
+                    const avgAuthenticityScore = data.image_analysis.image_count > 0 ? (totalAuthenticityScore / data.image_analysis.image_count * 100).toFixed(1) : 0;
                     imageAnalysisDiv.innerHTML = `
                         <div class="analysis-summary">
+                            <div class="summary-grid">
+                                <div class="summary-item">
+                                    <h5>Total Images Analyzed</h5>
+                                    <p class="summary-value">${data.image_analysis.image_count}</p>
+                                </div>
+                                <div class="summary-item">
+                                    <h5>Property-Related Images</h5>
+                                    <p class="summary-value">${propertyRelatedCount} of ${data.image_analysis.image_count}</p>
+                                </div>
+                                <div class="summary-item">
+                                    <h5>Average Confidence</h5>
+                                    <p class="summary-value">${avgConfidence}%</p>
+                                </div>
+                                <div class="summary-item">
+                                    <h5>Real Estate Confidence</h5>
+                                    <p class="summary-value">${avgRealEstateConfidence}%</p>
+                                </div>
+                                <div class="summary-item">
+                                    <h5>Authenticity Score</h5>
+                                    <p class="summary-value">${avgAuthenticityScore}%</p>
+                                </div>
+                                <div class="summary-item">
+                                    <h5>AI Generated</h5>
+                                    <p class="summary-value">${aiGeneratedCount} images</p>
+                                </div>
+                            </div>
+                            <div class="model-info">
+                                <p><strong>Model Used:</strong> ${data.image_analysis.image_model_used ? data.image_analysis.image_model_used.join(', ') : 'Static Analysis'}</p>
+                            </div>
                         </div>
                     `;
+                    // Display images with detailed analysis
                     data.images.forEach((imgData, index) => {
                         const imgAnalysis = data.image_analysis.image_analysis[index];
                         const galleryItem = document.createElement('div');
                         galleryItem.className = 'gallery-item';
+                        // Create detailed analysis card
+                        const analysisCard = document.createElement('div');
+                        analysisCard.className = 'analysis-card';
+                        // Create image container
                         const imageContainer = document.createElement('div');
                         imageContainer.className = 'image-container';
                         imageContainer.innerHTML = `
                             <img src="data:image/jpeg;base64,${imgData}" alt="Property Image ${index + 1}">
                             <div class="image-overlay">
+                                <div class="image-label">${imgAnalysis && imgAnalysis.predicted_label ? imgAnalysis.predicted_label : 'Unknown'}</div>
+                                <div class="confidence-badge">${imgAnalysis ? Math.round((imgAnalysis.confidence || 0) * 100) : 0}%</div>
                             </div>
                         `;
+                        // Create analysis details
+                        const analysisDetails = document.createElement('div');
+                        analysisDetails.className = 'analysis-details';
+                        if (imgAnalysis) {
+                            const isPropertyRelated = imgAnalysis.is_property_related ? 'Yes' : 'No';
+                            const isAiGenerated = imgAnalysis.is_ai_generated ? 'Yes' : 'No';
+                            const authenticityScore = Math.round((imgAnalysis.authenticity_score || 0) * 100);
+                            const realEstateConfidence = Math.round((imgAnalysis.real_estate_confidence || 0) * 100);
+                            // Create top predictions list
+                            let topPredictionsHtml = '';
+                            if (imgAnalysis.top_predictions && imgAnalysis.top_predictions.length > 0) {
+                                topPredictionsHtml = '<div class="top-predictions"><h6>Top Predictions:</h6><ul>';
+                                imgAnalysis.top_predictions.slice(0, 3).forEach(pred => {
+                                    const confidence = Math.round((pred.confidence || 0) * 100);
+                                    topPredictionsHtml += `<li>${pred.label} (${confidence}%)</li>`;
+                                });
+                                topPredictionsHtml += '</ul></div>';
+                            }
+                            // Create image quality info
+                            let qualityInfo = '';
+                            if (imgAnalysis.image_quality) {
+                                qualityInfo = `
+                                    <div class="quality-info">
+                                        <h6>Image Quality:</h6>
+                                        <p>Resolution: ${imgAnalysis.image_quality.resolution || 'Unknown'}</p>
+                                        <p>Quality Score: ${Math.round((imgAnalysis.image_quality.quality_score || 0) * 100)}%</p>
+                                    </div>
+                                `;
+                            }
+                            analysisDetails.innerHTML = `
+                                <div class="analysis-header">
+                                    <h5>Image Analysis #${index + 1}</h5>
+                                    <span class="status-badge ${imgAnalysis.is_property_related ? 'success' : 'warning'}">
+                                        ${imgAnalysis.is_property_related ? 'Real Estate' : 'Non-Real Estate'}
+                                    </span>
+                                </div>
+                                <div class="analysis-metrics">
+                                    <div class="metric">
+                                        <span class="metric-label">Confidence:</span>
+                                        <span class="metric-value">${Math.round((imgAnalysis.confidence || 0) * 100)}%</span>
+                                    </div>
+                                    <div class="metric">
+                                        <span class="metric-label">Real Estate Confidence:</span>
+                                        <span class="metric-value">${realEstateConfidence}%</span>
+                                    </div>
+                                    <div class="metric">
+                                        <span class="metric-label">Authenticity:</span>
+                                        <span class="metric-value">${authenticityScore}%</span>
+                                    </div>
+                                    <div class="metric">
+                                        <span class="metric-label">AI Generated:</span>
+                                        <span class="metric-value">${isAiGenerated}</span>
+                                    </div>
+                                </div>
+                                ${topPredictionsHtml}
+                                ${qualityInfo}
+                                <div class="model-info">
+                                    <small>Model: ${imgAnalysis.model_used || 'Static Analysis'}</small>
+                                </div>
+                            `;
+                        } else {
+                            analysisDetails.innerHTML = '<p>Analysis not available</p>';
+                        }
+                        analysisCard.appendChild(imageContainer);
+                        analysisCard.appendChild(analysisDetails);
                         // Add to appropriate container based on classification
                         if (imgAnalysis && imgAnalysis.is_property_related) {
+                            realEstateContainer.appendChild(analysisCard);
                         } else {
+                            nonRealEstateContainer.appendChild(analysisCard);
                         }
                     });
                     imageGallery.appendChild(realEstateContainer);
                     imageGallery.appendChild(nonRealEstateContainer);
+                    // Add enhanced CSS for the new image display
                     const style = document.createElement('style');
                     style.textContent = `
                         .image-section {
                         .image-section h4 {
                             margin-bottom: 15px;
                             color: var(--primary);
+                            border-bottom: 2px solid var(--primary);
+                            padding-bottom: 5px;
+                        }
+                        .summary-grid {
+                            display: grid;
+                            grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+                            gap: 15px;
+                            margin-bottom: 20px;
+                        }
+                        .summary-item {
+                            background: #f8f9fa;
+                            padding: 15px;
+                            border-radius: var(--border-radius);
+                            text-align: center;
+                        }
+                        .summary-item h5 {
+                            font-size: 0.9rem;
+                            color: var(--gray);
+                            margin-bottom: 5px;
+                        }
+                        .summary-value {
+                            font-size: 1.2rem;
+                            font-weight: 600;
+                            color: var(--primary);
+                            margin: 0;
+                        }
+                        .analysis-card {
+                            background: white;
+                            border-radius: var(--border-radius);
+                            box-shadow: var(--box-shadow);
+                            overflow: hidden;
+                            margin-bottom: 20px;
                         }
                         .image-container {
                             position: relative;
+                            width: 100%;
+                            height: 200px;
+                            overflow: hidden;
+                        }
+                        .image-container img {
                             width: 100%;
                             height: 100%;
+                            object-fit: cover;
                         }
                         .image-overlay {
                             position: absolute;
                             bottom: 0;
                             left: 0;
                             right: 0;
+                            background: rgba(0, 0, 0, 0.8);
+                            padding: 10px;
                             color: white;
+                            display: flex;
+                            justify-content: space-between;
+                            align-items: center;
                         }
                         .image-label {
                             font-size: 0.9rem;
                             font-weight: 500;
                         }
+                        .confidence-badge {
+                            background: var(--primary);
+                            color: white;
+                            padding: 2px 8px;
+                            border-radius: 12px;
+                            font-size: 0.8rem;
+                            font-weight: 500;
+                        }
+                        .analysis-details {
+                            padding: 15px;
+                        }
+                        .analysis-header {
+                            display: flex;
+                            justify-content: space-between;
+                            align-items: center;
+                            margin-bottom: 15px;
+                        }
+                        .analysis-header h5 {
+                            margin: 0;
+                            color: var(--dark);
+                        }
+                        .status-badge {
+                            padding: 4px 12px;
+                            border-radius: 20px;
+                            font-size: 0.8rem;
+                            font-weight: 500;
+                        }
+                        .status-badge.success {
+                            background: #d4edda;
+                            color: #155724;
+                        }
+                        .status-badge.warning {
+                            background: #fff3cd;
+                            color: #856404;
+                        }
+                        .analysis-metrics {
+                            display: grid;
+                            grid-template-columns: repeat(2, 1fr);
+                            gap: 10px;
+                            margin-bottom: 15px;
+                        }
+                        .metric {
+                            display: flex;
+                            justify-content: space-between;
+                            padding: 8px;
+                            background: #f8f9fa;
+                            border-radius: 6px;
+                        }
+                        .metric-label {
+                            font-size: 0.9rem;
+                            color: var(--gray);
+                        }
+                        .metric-value {
+                            font-weight: 600;
+                            color: var(--dark);
+                        }
+                        .top-predictions {
+                            margin-bottom: 15px;
+                        }
+                        .top-predictions h6 {
+                            font-size: 0.9rem;
+                            color: var(--dark);
+                            margin-bottom: 8px;
+                        }
+                        .top-predictions ul {
+                            list-style: none;
+                            padding: 0;
+                            margin: 0;
+                        }
+                        .top-predictions li {
+                            padding: 4px 0;
+                            font-size: 0.85rem;
+                            color: var(--gray);
+                        }
+                        .quality-info {
+                            margin-bottom: 15px;
+                        }
+                        .quality-info h6 {
+                            font-size: 0.9rem;
+                            color: var(--dark);
+                            margin-bottom: 8px;
+                        }
+                        .quality-info p {
+                            font-size: 0.85rem;
+                            color: var(--gray);
+                            margin: 2px 0;
+                        }
+                        .model-info {
+                            text-align: right;
+                        }
+                        .model-info small {
+                            color: var(--gray);
+                            font-style: italic;
                         }
                     `;
                     document.head.appendChild(style);
                         pdf => pdf.verification_score || 0
                     );
                     docScore = verificationScores.length > 0 ?
+                        Math.round(verificationScores.reduce((a, b) => a + b, 0) / verificationScores.length) : 0;
                 }
                 updateScoreBar('documentBar', 'documentValue', docScore);