Spaces:

EGYADMIN
/

Wahbi-AI

Sleeping

File size: 68,443 Bytes

25d2b3e

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
وحدة مقارنة المستندات المتقدمة لتحليل الفروقات بين نسخ المستندات
"""

import os
import sys
import json
import re
import difflib
import Levenshtein
from datetime import datetime
import numpy as np
import pandas as pd
import streamlit as st
import plotly.express as px
import plotly.graph_objects as go
from collections import Counter
from nltk.tokenize import sent_tokenize, word_tokenize
from rouge_score import rouge_scorer
from PyPDF2 import PdfReader
import io

# إضافة مسار النظام للوصول للملفات المشتركة
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))

# استيراد المكونات المساعدة
from utils.helpers import create_directory_if_not_exists, format_time, get_user_info


class DocumentComparator:
    """فئة مقارنة المستندات المتقدمة"""
    
    def __init__(self):
        """تهيئة مقارن المستندات"""
        self.comparison_dir = os.path.join(os.path.dirname(__file__), '..', '..', 'data', 'document_comparison')
        create_directory_if_not_exists(self.comparison_dir)
        
        # تهيئة NLTK وتنزيل حزمة punkt إذا لم تكن موجودة
        self._initialize_nltk()
        
        # إعداد مقيم ROUGE لمقارنة النصوص
        self.rouge_scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=False)
        
    def _initialize_nltk(self):
        """تهيئة مكتبة NLTK وتنزيل الحزم المطلوبة"""
        try:
            # استيراد nltk
            import nltk
            
            # قائمة بالحزم المطلوبة
            required_packages = ['punkt', 'stopwords', 'wordnet']
            for package in required_packages:
                try:
                    # محاولة استخدام الحزمة أولاً، وإذا فشلت يتم تنزيلها
                    nltk.data.find(f'tokenizers/{package}')
                except LookupError:
                    print(f"تنزيل حزمة NLTK: {package}")
                    nltk.download(package, quiet=True)
                    
            # محاولة استخدام sent_tokenize للتحقق من وجود حزمة punkt
            from nltk.tokenize import sent_tokenize
            sent_tokenize("This is a test sentence.")
        except LookupError:
            # تنزيل حزمة punkt تلقائيًا إذا لم تكن موجودة
            import nltk
            nltk.download('punkt', quiet=True)
            # طباعة رسالة تأكيد التنزيل
            st.info("تم تنزيل حزمة NLTK punkt بنجاح للاستخدام في مقارنة المستندات.")
    
    def _preprocess_text(self, text):
        """معالجة النص قبل التحليل"""
        # إزالة الأرقام والرموز الخاصة والمسافات الزائدة
        text = re.sub(r'\s+', ' ', text)
        text = text.strip()
        return text
    
    def _segment_text(self, text):
        """تقسيم النص إلى فقرات وجمل"""
        # تقسيم النص إلى فقرات
        paragraphs = [p.strip() for p in text.split('\n') if p.strip()]
        
        # تقسيم كل فقرة إلى جمل
        sentences = []
        for paragraph in paragraphs:
            paragraph_sentences = sent_tokenize(paragraph)
            sentences.extend(paragraph_sentences)
        
        return paragraphs, sentences
    
    def _calculate_similarity(self, text1, text2):
        """حساب نسبة التشابه بين نصين"""
        # حساب نسبة التشابه باستخدام مقياس Levenshtein
        ratio = Levenshtein.ratio(text1, text2)
        
        # حساب درجات ROUGE
        rouge_scores = self.rouge_scorer.score(text1, text2)
        
        # حساب متوسط نقاط Rouge
        rouge1_f1 = rouge_scores['rouge1'].fmeasure
        rouge2_f1 = rouge_scores['rouge2'].fmeasure
        rougeL_f1 = rouge_scores['rougeL'].fmeasure
        avg_rouge = (rouge1_f1 + rouge2_f1 + rougeL_f1) / 3
        
        # دمج النقاط للحصول على نتيجة نهائية
        combined_score = (ratio + avg_rouge) / 2
        
        return {
            'levenshtein_ratio': ratio,
            'rouge1_f1': rouge1_f1,
            'rouge2_f1': rouge2_f1,
            'rougeL_f1': rougeL_f1,
            'avg_rouge': avg_rouge,
            'combined_score': combined_score
        }
    
    def _extract_text_from_pdf(self, pdf_file):
        """استخراج النص من ملف PDF"""
        text = ""
        try:
            # قراءة ملف PDF
            pdf_reader = PdfReader(pdf_file)
            
            # استخراج النص من كل صفحة
            for page in pdf_reader.pages:
                text += page.extract_text() + "\n"
        except Exception as e:
            st.error(f"خطأ في قراءة ملف PDF: {e}")
        
        return text
    
    def get_document_diff(self, text1, text2, title1="المستند الأول", title2="المستند الثاني"):
        """حساب الفروقات بين نصين"""
        if not text1 or not text2:
            return {
                "title1": title1,
                "title2": title2,
                "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                "similarity": 0,
                "similarity_score": 0,
                "text_diffs": [],
                "summary": "أحد المستندات فارغ، لا يمكن إجراء المقارنة."
            }
        
        # معالجة النصوص
        preprocessed_text1 = self._preprocess_text(text1)
        preprocessed_text2 = self._preprocess_text(text2)
        
        # حساب نسبة التشابه الإجمالية
        similarity_metrics = self._calculate_similarity(preprocessed_text1, preprocessed_text2)
        similarity_score = similarity_metrics['combined_score']
        similarity_percentage = int(similarity_score * 100)
        
        # تقسيم النصوص إلى فقرات وجمل
        paragraphs1, sentences1 = self._segment_text(text1)
        paragraphs2, sentences2 = self._segment_text(text2)
        
        # تحديد الفروقات بين الجمل باستخدام difflib
        differ = difflib.Differ()
        sentence_diffs = []
        
        # مصفوفة التشابه بين الجمل
        similarity_matrix = np.zeros((len(sentences1), len(sentences2)))
        for i, s1 in enumerate(sentences1):
            for j, s2 in enumerate(sentences2):
                similarity_matrix[i, j] = Levenshtein.ratio(s1, s2)
        
        # تحديد أفضل مطابقة لكل جملة
        matched_sentences2 = set()  # تتبع الجمل المطابقة في المستند الثاني
        
        for i, s1 in enumerate(sentences1):
            if len(s1.split()) < 3:  # تجاهل الجمل القصيرة جداً
                continue
                
            best_match_idx = -1
            best_match_score = 0.7  # عتبة التشابه
            
            for j, s2 in enumerate(sentences2):
                if j in matched_sentences2:
                    continue  # تجاهل الجمل التي تم مطابقتها بالفعل
                
                if len(s2.split()) < 3:  # تجاهل الجمل القصيرة جداً
                    continue
                
                score = similarity_matrix[i, j]
                if score > best_match_score and score > 0.7:
                    best_match_score = score
                    best_match_idx = j
            
            if best_match_idx != -1:
                # وجدنا تطابق، تحديد الفروقات باستخدام difflib
                s2 = sentences2[best_match_idx]
                diff = list(differ.compare(s1.split(), s2.split()))
                
                # تحويل مخرجات difflib إلى تنسيق أسهل للاستخدام
                formatted_diff = []
                for token in diff:
                    if token.startswith('  '):  # متطابق
                        formatted_diff.append({'text': token[2:], 'status': 'same'})
                    elif token.startswith('- '):  # حذف
                        formatted_diff.append({'text': token[2:], 'status': 'removed'})
                    elif token.startswith('+ '):  # إضافة
                        formatted_diff.append({'text': token[2:], 'status': 'added'})
                
                sentence_diffs.append({
                    'doc1_idx': i,
                    'doc2_idx': best_match_idx,
                    'doc1_text': s1,
                    'doc2_text': s2,
                    'similarity': best_match_score,
                    'diff': formatted_diff
                })
                
                matched_sentences2.add(best_match_idx)
            else:
                # لم نجد تطابق، هذه الجملة غير موجودة في المستند الثاني
                sentence_diffs.append({
                    'doc1_idx': i,
                    'doc2_idx': -1,
                    'doc1_text': s1,
                    'doc2_text': "",
                    'similarity': 0,
                    'diff': [{'text': word, 'status': 'removed'} for word in s1.split()]
                })
        
        # تحديد الجمل الجديدة في المستند الثاني
        for j, s2 in enumerate(sentences2):
            if j not in matched_sentences2 and len(s2.split()) >= 3:
                sentence_diffs.append({
                    'doc1_idx': -1,
                    'doc2_idx': j,
                    'doc1_text': "",
                    'doc2_text': s2,
                    'similarity': 0,
                    'diff': [{'text': word, 'status': 'added'} for word in s2.split()]
                })
        
        # ترتيب الفروقات حسب الموقع في المستند الأول
        sentence_diffs.sort(key=lambda x: (x['doc1_idx'] if x['doc1_idx'] != -1 else float('inf'), x['doc2_idx'] if x['doc2_idx'] != -1 else float('inf')))
        
        # تحديد الفقرات المضافة والمحذوفة
        paragraph_diffs = []
        matched_paragraphs2 = set()
        
        for i, p1 in enumerate(paragraphs1):
            if len(p1.split()) < 5:  # تجاهل الفقرات القصيرة جداً
                continue
                
            best_match_idx = -1
            best_match_score = 0.6  # عتبة التشابه
            
            for j, p2 in enumerate(paragraphs2):
                if j in matched_paragraphs2:
                    continue
                
                if len(p2.split()) < 5:
                    continue
                
                score = Levenshtein.ratio(p1, p2)
                if score > best_match_score:
                    best_match_score = score
                    best_match_idx = j
            
            if best_match_idx != -1:
                # وجدنا تطابق
                p2 = paragraphs2[best_match_idx]
                paragraph_diffs.append({
                    'doc1_idx': i,
                    'doc2_idx': best_match_idx,
                    'doc1_text': p1,
                    'doc2_text': p2,
                    'similarity': best_match_score,
                    'status': 'modified' if best_match_score < 0.9 else 'same'
                })
                
                matched_paragraphs2.add(best_match_idx)
            else:
                # لم نجد تطابق، هذه الفقرة غير موجودة في المستند الثاني
                paragraph_diffs.append({
                    'doc1_idx': i,
                    'doc2_idx': -1,
                    'doc1_text': p1,
                    'doc2_text': "",
                    'similarity': 0,
                    'status': 'removed'
                })
        
        # تحديد الفقرات الجديدة في المستند الثاني
        for j, p2 in enumerate(paragraphs2):
            if j not in matched_paragraphs2 and len(p2.split()) >= 5:
                paragraph_diffs.append({
                    'doc1_idx': -1,
                    'doc2_idx': j,
                    'doc1_text': "",
                    'doc2_text': p2,
                    'similarity': 0,
                    'status': 'added'
                })
        
        # ترتيب الفروقات حسب الموقع
        paragraph_diffs.sort(key=lambda x: (x['doc1_idx'] if x['doc1_idx'] != -1 else float('inf'), x['doc2_idx'] if x['doc2_idx'] != -1 else float('inf')))
        
        # تحليل الفروقات للحصول على إحصائيات
        total_paragraphs = len(paragraphs1) + len(paragraphs2)
        removed_paragraphs = sum(1 for p in paragraph_diffs if p['status'] == 'removed')
        added_paragraphs = sum(1 for p in paragraph_diffs if p['status'] == 'added')
        modified_paragraphs = sum(1 for p in paragraph_diffs if p['status'] == 'modified')
        
        # تحليل الكلمات المضافة، المحذوفة والمتغيرة
        added_words = []
        removed_words = []
        modified_contexts = []
        
        for diff in sentence_diffs:
            for token in diff['diff']:
                if token['status'] == 'added':
                    added_words.append(token['text'])
                elif token['status'] == 'removed':
                    removed_words.append(token['text'])
                    
            # جمع السياقات المتغيرة للتحليل
            if diff['doc1_idx'] != -1 and diff['doc2_idx'] != -1 and diff['similarity'] < 0.9:
                modified_contexts.append({
                    'doc1_text': diff['doc1_text'],
                    'doc2_text': diff['doc2_text'],
                    'similarity': diff['similarity']
                })
        
        # إنشاء التقرير النهائي
        comparison_report = {
            "title1": title1,
            "title2": title2,
            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            "similarity": similarity_percentage,
            "similarity_metrics": similarity_metrics,
            "sentence_diffs": sentence_diffs,
            "paragraph_diffs": paragraph_diffs,
            "statistics": {
                "doc1_paragraphs": len(paragraphs1),
                "doc2_paragraphs": len(paragraphs2),
                "doc1_sentences": len(sentences1),
                "doc2_sentences": len(sentences2),
                "removed_paragraphs": removed_paragraphs,
                "added_paragraphs": added_paragraphs,
                "modified_paragraphs": modified_paragraphs,
                "removed_words_count": len(removed_words),
                "added_words_count": len(added_words),
                "top_removed_words": Counter(removed_words).most_common(10),
                "top_added_words": Counter(added_words).most_common(10)
            },
            "modified_contexts": modified_contexts[:10],  # أهم 10 سياقات متغيرة
            "summary": self._generate_comparison_summary(
                similarity_percentage,
                len(paragraphs1),
                len(paragraphs2),
                removed_paragraphs,
                added_paragraphs,
                modified_paragraphs,
                len(removed_words),
                len(added_words)
            )
        }
        
        # حفظ تقرير المقارنة
        self._save_comparison_report(comparison_report, title1, title2)
        
        return comparison_report
    
    def _generate_comparison_summary(self, similarity, p1_count, p2_count, removed_p, added_p, modified_p, removed_w, added_w):
        """إنشاء ملخص للمقارنة بين المستندين"""
        if similarity >= 90:
            similarity_description = "متطابقة بشكل كبير"
        elif similarity >= 70:
            similarity_description = "متشابهة"
        elif similarity >= 50:
            similarity_description = "متشابهة جزئياً"
        else:
            similarity_description = "مختلفة"
        
        summary = f"المستندان {similarity_description} بنسبة {similarity}%. "
        
        # وصف التغييرات في الفقرات
        if removed_p > 0 or added_p > 0 or modified_p > 0:
            changes = []
            if removed_p > 0:
                changes.append(f"تم حذف {removed_p} فقرة")
            if added_p > 0:
                changes.append(f"تم إضافة {added_p} فقرة")
            if modified_p > 0:
                changes.append(f"تم تعديل {modified_p} فقرة")
            
            summary += "التغييرات تشمل: " + "، ".join(changes) + ". "
        
        # وصف التغييرات في الكلمات
        if removed_w > 0 or added_w > 0:
            word_changes = []
            if removed_w > 0:
                word_changes.append(f"تم حذف {removed_w} كلمة")
            if added_w > 0:
                word_changes.append(f"تم إضافة {added_w} كلمة")
            
            summary += "على مستوى الكلمات: " + "، ".join(word_changes) + "."
        
        return summary
    
    def _save_comparison_report(self, report, title1, title2):
        """حفظ تقرير المقارنة"""
        # إنشاء اسم ملف فريد
        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
        filename = f"compare_{title1.replace(' ', '_')}_{title2.replace(' ', '_')}_{timestamp}.json"
        file_path = os.path.join(self.comparison_dir, filename)
        
        try:
            with open(file_path, 'w', encoding='utf-8') as f:
                json.dump(report, f, ensure_ascii=False, indent=2)
        except Exception as e:
            print(f"خطأ في حفظ تقرير المقارنة: {e}")
    
    def load_comparison_report(self, filename):
        """تحميل تقرير مقارنة محفوظ"""
        file_path = os.path.join(self.comparison_dir, filename)
        
        if not os.path.exists(file_path):
            return None
        
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                report = json.load(f)
                return report
        except Exception as e:
            print(f"خطأ في تحميل تقرير المقارنة: {e}")
            return None
    
    def get_comparison_reports(self):
        """الحصول على قائمة تقارير المقارنة المحفوظة"""
        reports = []
        
        for filename in os.listdir(self.comparison_dir):
            if filename.startswith("compare_") and filename.endswith(".json"):
                file_path = os.path.join(self.comparison_dir, filename)
                try:
                    with open(file_path, 'r', encoding='utf-8') as f:
                        report = json.load(f)
                        reports.append({
                            "filename": filename,
                            "title1": report.get("title1", "مستند 1"),
                            "title2": report.get("title2", "مستند 2"),
                            "timestamp": report.get("timestamp", ""),
                            "similarity": report.get("similarity", 0)
                        })
                except Exception as e:
                    print(f"خطأ في قراءة تقرير المقارنة {filename}: {e}")
        
        # ترتيب التقارير حسب التاريخ (الأحدث أولاً)
        reports.sort(key=lambda x: x["timestamp"], reverse=True)
        
        return reports
    
    def extract_key_differences(self, comparison_report):
        """استخراج الاختلافات الرئيسية من تقرير المقارنة"""
        if not comparison_report or "paragraph_diffs" not in comparison_report:
            return []
        
        key_differences = []
        
        # استخراج الفقرات المضافة
        added_paragraphs = [p for p in comparison_report["paragraph_diffs"] if p["status"] == "added"]
        if added_paragraphs:
            key_differences.append({
                "type": "added_paragraphs",
                "label": "فقرات مضافة",
                "count": len(added_paragraphs),
                "items": [p["doc2_text"] for p in added_paragraphs]
            })
        
        # استخراج الفقرات المحذوفة
        removed_paragraphs = [p for p in comparison_report["paragraph_diffs"] if p["status"] == "removed"]
        if removed_paragraphs:
            key_differences.append({
                "type": "removed_paragraphs",
                "label": "فقرات محذوفة",
                "count": len(removed_paragraphs),
                "items": [p["doc1_text"] for p in removed_paragraphs]
            })
        
        # استخراج الفقرات المعدلة
        modified_paragraphs = [p for p in comparison_report["paragraph_diffs"] if p["status"] == "modified"]
        if modified_paragraphs:
            modified_items = []
            for p in modified_paragraphs:
                modified_items.append({
                    "doc1_text": p["doc1_text"],
                    "doc2_text": p["doc2_text"],
                    "similarity": p["similarity"]
                })
            
            key_differences.append({
                "type": "modified_paragraphs",
                "label": "فقرات معدلة",
                "count": len(modified_paragraphs),
                "items": modified_items
            })
        
        # استخراج الكلمات الرئيسية المضافة والمحذوفة
        if "statistics" in comparison_report:
            stats = comparison_report["statistics"]
            
            if "top_added_words" in stats and stats["top_added_words"]:
                key_differences.append({
                    "type": "added_words",
                    "label": "الكلمات المضافة الأكثر تكراراً",
                    "count": stats["added_words_count"],
                    "items": stats["top_added_words"]
                })
            
            if "top_removed_words" in stats and stats["top_removed_words"]:
                key_differences.append({
                    "type": "removed_words",
                    "label": "الكلمات المحذوفة الأكثر تكراراً",
                    "count": stats["removed_words_count"],
                    "items": stats["top_removed_words"]
                })
        
        return key_differences
    
    def analyze_legal_changes(self, comparison_report):
        """تحليل التغييرات القانونية في المستندات"""
        if not comparison_report:
            return []
        
        # قائمة المصطلحات القانونية الهامة للبحث عنها
        legal_terms = {
            "payment": ["دفع", "سداد", "مستحقات", "مقابل", "رسوم", "تكلفة", "مبلغ", "أتعاب"],
            "deadlines": ["ميعاد", "موعد", "تاريخ", "أجل", "مدة", "فترة", "مهلة"],
            "liability": ["مسؤولية", "التزام", "تحمل", "تعويض", "ضمان", "كفالة"],
            "termination": ["إنهاء", "فسخ", "إلغاء", "إيقاف", "إنهاء العلاقة"],
            "dispute": ["نزاع", "خلاف", "منازعة", "اعتراض", "تحكيم", "قضاء", "محكمة"],
            "penalties": ["غرامة", "عقوبة", "شرط جزائي", "جزاء", "تعويض"],
            "conditions": ["شرط", "بند", "حالة", "اشتراط", "متطلب"],
            "rights": ["حق", "صلاحية", "امتياز", "منفعة", "ملكية", "تصرف"],
            "obligations": ["التزام", "واجب", "تعهد", "إلزام", "لازم"]
        }
        
        # البحث عن التغييرات المتعلقة بالمصطلحات القانونية
        legal_changes = []
        
        if "sentence_diffs" in comparison_report:
            for category, terms in legal_terms.items():
                category_changes = []
                
                for diff in comparison_report["sentence_diffs"]:
                    # فحص فقط الجمل المعدلة (المتطابقة جزئياً)
                    if diff["doc1_idx"] != -1 and diff["doc2_idx"] != -1 and diff["similarity"] < 0.9:
                        # فحص ما إذا كانت الجملة تحتوي على أي من المصطلحات القانونية
                        contains_term = False
                        for term in terms:
                            if term in diff["doc1_text"].lower() or term in diff["doc2_text"].lower():
                                contains_term = True
                                break
                        
                        if contains_term:
                            category_changes.append({
                                "doc1_text": diff["doc1_text"],
                                "doc2_text": diff["doc2_text"],
                                "similarity": diff["similarity"]
                            })
                
                if category_changes:
                    legal_category_name = {
                        "payment": "الدفع والمستحقات المالية",
                        "deadlines": "المواعيد والفترات الزمنية",
                        "liability": "المسؤولية والالتزامات",
                        "termination": "إنهاء العقد أو فسخه",
                        "dispute": "النزاعات والخلافات",
                        "penalties": "الغرامات والعقوبات",
                        "conditions": "الشروط والبنود",
                        "rights": "الحقوق والصلاحيات",
                        "obligations": "الالتزامات والواجبات"
                    }
                    
                    legal_changes.append({
                        "category": category,
                        "label": legal_category_name.get(category, category),
                        "count": len(category_changes),
                        "changes": category_changes
                    })
        
        # ترتيب التغييرات حسب الأهمية (عدد التغييرات)
        legal_changes.sort(key=lambda x: x["count"], reverse=True)
        
        return legal_changes
    
    def analyze_price_changes(self, text1, text2):
        """تحليل التغييرات في الأسعار بين نسختي المستند"""
        # البحث عن الأرقام متبوعة بعملة أو تعبيرات تدل على المبالغ
        price_pattern = r'(\d{1,3}(?:,\d{3})*(?:\.\d+)?)\s*(?:ريال|دولار|يورو|جنيه|درهم|دينار|SAR|USD|EUR|SR|$|€|£)'
        amount_pattern = r'مبلغ[\s\w]*?(\d{1,3}(?:,\d{3})*(?:\.\d+)?)'
        
        # استخراج الأسعار من كل نص
        prices1 = re.findall(price_pattern, text1)
        prices1.extend(re.findall(amount_pattern, text1))
        prices1 = [p.replace(',', '') for p in prices1]
        prices1 = [float(p) for p in prices1 if p]
        
        prices2 = re.findall(price_pattern, text2)
        prices2.extend(re.findall(amount_pattern, text2))
        prices2 = [p.replace(',', '') for p in prices2]
        prices2 = [float(p) for p in prices2 if p]
        
        # تحليل التغييرات
        price_diff = {
            "doc1_prices_count": len(prices1),
            "doc2_prices_count": len(prices2),
            "doc1_total": sum(prices1) if prices1 else 0,
            "doc2_total": sum(prices2) if prices2 else 0,
            "doc1_average": sum(prices1) / len(prices1) if prices1 else 0,
            "doc2_average": sum(prices2) / len(prices2) if prices2 else 0,
            "doc1_min": min(prices1) if prices1 else 0,
            "doc2_min": min(prices2) if prices2 else 0,
            "doc1_max": max(prices1) if prices1 else 0,
            "doc2_max": max(prices2) if prices2 else 0
        }
        
        # حساب التغيير في إجمالي الأسعار
        if price_diff["doc1_total"] > 0:
            price_diff["total_change_percentage"] = ((price_diff["doc2_total"] - price_diff["doc1_total"]) / price_diff["doc1_total"]) * 100
        else:
            price_diff["total_change_percentage"] = 0
        
        return price_diff
    
    def analyze_date_changes(self, text1, text2):
        """تحليل التغييرات في التواريخ بين نسختي المستند"""
        # البحث عن التواريخ بالصيغ المختلفة
        date_patterns = [
            r'\d{1,2}/\d{1,2}/\d{2,4}',  # DD/MM/YYYY or MM/DD/YYYY
            r'\d{1,2}-\d{1,2}-\d{2,4}',  # DD-MM-YYYY or MM-DD-YYYY
            r'\d{2,4}/\d{1,2}/\d{1,2}',  # YYYY/MM/DD
            r'\d{2,4}-\d{1,2}-\d{1,2}',  # YYYY-MM-DD
            r'\d{1,2}\s+(?:يناير|فبراير|مارس|أبريل|مايو|يونيو|يوليو|أغسطس|سبتمبر|أكتوبر|نوفمبر|ديسمبر)\s+\d{2,4}'  # DD شهر YYYY
        ]
        
        dates1 = []
        dates2 = []
        
        for pattern in date_patterns:
            dates1.extend(re.findall(pattern, text1))
            dates2.extend(re.findall(pattern, text2))
        
        # إنشاء تقرير التغييرات في التواريخ
        date_changes = {
            "doc1_dates_count": len(dates1),
            "doc2_dates_count": len(dates2),
            "doc1_dates": dates1[:10],  # أول 10 تواريخ فقط
            "doc2_dates": dates2[:10],
            "common_dates": list(set(dates1).intersection(set(dates2))),
            "removed_dates": list(set(dates1) - set(dates2)),
            "added_dates": list(set(dates2) - set(dates1))
        }
        
        return date_changes
    
    def render_document_comparison(self, text1, text2, title1="المستند الأول", title2="المستند الثاني"):
        """عرض مقارنة المستندات بالواجهة التفاعلية"""
        st.markdown("<h2 class='module-title'>مقارنة المستندات المتقدمة</h2>", unsafe_allow_html=True)
        
        if not text1 or not text2:
            st.warning("يرجى توفير نصوص المستندين للمقارنة")
            return
        
        with st.spinner("جاري تحليل ومقارنة المستندين..."):
            # إجراء المقارنة
            comparison_report = self.get_document_diff(text1, text2, title1, title2)
            
            # تحليل التغييرات القانونية
            legal_changes = self.analyze_legal_changes(comparison_report)
            
            # تحليل التغييرات في الأسعار والتواريخ
            price_changes = self.analyze_price_changes(text1, text2)
            date_changes = self.analyze_date_changes(text1, text2)
        
        # عرض ملخص المقارنة
        st.markdown("<h3>ملخص المقارنة</h3>", unsafe_allow_html=True)
        
        col1, col2, col3 = st.columns([1, 1, 1])
        
        with col1:
            similarity = comparison_report["similarity"]
            color = "#00b894" if similarity >= 80 else "#fdcb6e" if similarity >= 50 else "#d63031"
            
            st.markdown(f"""
            <div class="similarity-card">
                <div class="similarity-title">نسبة التشابه الإجمالية</div>
                <div class="similarity-score" style="color: {color};">{similarity}%</div>
                <div class="similarity-info">تم تحليل {comparison_report["statistics"]["doc1_paragraphs"]} فقرة في {title1} و {comparison_report["statistics"]["doc2_paragraphs"]} فقرة في {title2}</div>
            </div>
            """, unsafe_allow_html=True)
        
        with col2:
            st.markdown(f"""
            <div class="changes-card">
                <div class="changes-title">ملخص التغييرات</div>
                <div class="changes-list">
                    <div class="change-item">
                        <span class="change-label">فقرات محذوفة:</span>
                        <span class="change-value">{comparison_report["statistics"]["removed_paragraphs"]}</span>
                    </div>
                    <div class="change-item">
                        <span class="change-label">فقرات مضافة:</span>
                        <span class="change-value">{comparison_report["statistics"]["added_paragraphs"]}</span>
                    </div>
                    <div class="change-item">
                        <span class="change-label">فقرات معدلة:</span>
                        <span class="change-value">{comparison_report["statistics"]["modified_paragraphs"]}</span>
                    </div>
                </div>
            </div>
            """, unsafe_allow_html=True)
        
        with col3:
            st.markdown(f"""
            <div class="words-card">
                <div class="words-title">تغييرات الكلمات</div>
                <div class="words-list">
                    <div class="words-item">
                        <span class="words-label">كلمات محذوفة:</span>
                        <span class="words-value">{comparison_report["statistics"]["removed_words_count"]}</span>
                    </div>
                    <div class="words-item">
                        <span class="words-label">كلمات مضافة:</span>
                        <span class="words-value">{comparison_report["statistics"]["added_words_count"]}</span>
                    </div>
                </div>
            </div>
            """, unsafe_allow_html=True)
        
        # عرض ملخص نصي
        st.markdown(f"""
        <div class="text-summary">
            {comparison_report["summary"]}
        </div>
        """, unsafe_allow_html=True)
        
        # عرض تحليل التغييرات القانونية
        st.markdown("<h3>تحليل التغييرات القانونية</h3>", unsafe_allow_html=True)
        
        if legal_changes:
            tabs = st.tabs([change["label"] for change in legal_changes])
            
            for i, tab in enumerate(tabs):
                with tab:
                    st.markdown(f"**عدد التغييرات: {legal_changes[i]['count']}**")
                    
                    for j, change in enumerate(legal_changes[i]["changes"]):
                        col1, col2 = st.columns(2)
                        with col1:
                            st.markdown(f"**{title1}:**")
                            st.markdown(f"<div class='diff-text diff-old'>{change['doc1_text']}</div>", unsafe_allow_html=True)
                        with col2:
                            st.markdown(f"**{title2}:**")
                            st.markdown(f"<div class='diff-text diff-new'>{change['doc2_text']}</div>", unsafe_allow_html=True)
                        
                        if j < len(legal_changes[i]["changes"]) - 1:
                            st.markdown("---")
        else:
            st.info("لم يتم اكتشاف تغييرات قانونية هامة بين المستندين.")
        
        # عرض الرسوم البيانية للتغييرات
        st.markdown("<h3>رسوم بيانية للتغييرات</h3>", unsafe_allow_html=True)
        
        col1, col2 = st.columns(2)
        
        with col1:
            # رسم بياني لتوزيع أنواع التغييرات في الفقرات
            stats = comparison_report["statistics"]
            fig = px.pie(
                names=["فقرات متطابقة", "فقرات معدلة", "فقرات محذوفة", "فقرات مضافة"],
                values=[
                    stats["doc1_paragraphs"] - stats["removed_paragraphs"] - stats["modified_paragraphs"],
                    stats["modified_paragraphs"],
                    stats["removed_paragraphs"],
                    stats["added_paragraphs"]
                ],
                title="توزيع التغييرات في الفقرات",
                color_discrete_sequence=["#00b894", "#fdcb6e", "#d63031", "#0984e3"]
            )
            
            fig.update_layout(
                font=dict(family="Arial, sans-serif", size=14),
                height=350
            )
            
            st.plotly_chart(fig, use_container_width=True)
        
        with col2:
            # رسم بياني للكلمات المضافة والمحذوفة الأكثر تكراراً
            words_data = []
            
            for word, count in comparison_report["statistics"]["top_removed_words"]:
                if len(word) > 1:  # تجاهل الأحرف المفردة
                    words_data.append({"word": word, "count": count, "type": "محذوفة"})
            
            for word, count in comparison_report["statistics"]["top_added_words"]:
                if len(word) > 1:  # تجاهل الأحرف المفردة
                    words_data.append({"word": word, "count": count, "type": "مضافة"})
            
            if words_data:
                words_df = pd.DataFrame(words_data)
                
                fig = px.bar(
                    words_df,
                    x="word",
                    y="count",
                    color="type",
                    title="الكلمات المضافة والمحذوفة الأكثر تكراراً",
                    labels={"word": "الكلمة", "count": "عدد المرات", "type": "النوع"},
                    color_discrete_map={"محذوفة": "#d63031", "مضافة": "#0984e3"}
                )
                
                fig.update_layout(
                    font=dict(family="Arial, sans-serif", size=14),
                    height=350
                )
                
                st.plotly_chart(fig, use_container_width=True)
            else:
                st.info("لا توجد بيانات كافية للكلمات المضافة والمحذوفة.")
        
        # عرض تحليل الأسعار والتواريخ
        col1, col2 = st.columns(2)
        
        with col1:
            st.markdown("<h3>تحليل التغييرات في الأسعار</h3>", unsafe_allow_html=True)
            
            if price_changes["doc1_prices_count"] > 0 or price_changes["doc2_prices_count"] > 0:
                price_change_direction = "زيادة" if price_changes["total_change_percentage"] > 0 else "نقص"
                price_change_color = "#d63031" if price_changes["total_change_percentage"] > 0 else "#00b894"
                
                st.markdown(f"""
                <div class="price-analysis">
                    <div class="price-summary">تغيير في إجمالي الأسعار بنسبة <span style="color: {price_change_color}; font-weight: bold;">{abs(price_changes['total_change_percentage']):.2f}% ({price_change_direction})</span></div>
                    <div class="price-details">
                        <div class="price-row">
                            <div class="price-label"></div>
                            <div class="price-value-header">{title1}</div>
                            <div class="price-value-header">{title2}</div>
                        </div>
                        <div class="price-row">
                            <div class="price-label">عدد الأسعار:</div>
                            <div class="price-value">{price_changes['doc1_prices_count']}</div>
                            <div class="price-value">{price_changes['doc2_prices_count']}</div>
                        </div>
                        <div class="price-row">
                            <div class="price-label">الإجمالي:</div>
                            <div class="price-value">{price_changes['doc1_total']:,.2f}</div>
                            <div class="price-value">{price_changes['doc2_total']:,.2f}</div>
                        </div>
                        <div class="price-row">
                            <div class="price-label">المتوسط:</div>
                            <div class="price-value">{price_changes['doc1_average']:,.2f}</div>
                            <div class="price-value">{price_changes['doc2_average']:,.2f}</div>
                        </div>
                        <div class="price-row">
                            <div class="price-label">الحد الأدنى:</div>
                            <div class="price-value">{price_changes['doc1_min']:,.2f}</div>
                            <div class="price-value">{price_changes['doc2_min']:,.2f}</div>
                        </div>
                        <div class="price-row">
                            <div class="price-label">الحد الأقصى:</div>
                            <div class="price-value">{price_changes['doc1_max']:,.2f}</div>
                            <div class="price-value">{price_changes['doc2_max']:,.2f}</div>
                        </div>
                    </div>
                </div>
                """, unsafe_allow_html=True)
                
                # رسم بياني للأسعار
                if price_changes["doc1_prices_count"] > 0 and price_changes["doc2_prices_count"] > 0:
                    price_chart_data = [
                        {"document": title1, "metric": "الإجمالي", "value": price_changes["doc1_total"]},
                        {"document": title2, "metric": "الإجمالي", "value": price_changes["doc2_total"]},
                        {"document": title1, "metric": "المتوسط", "value": price_changes["doc1_average"]},
                        {"document": title2, "metric": "المتوسط", "value": price_changes["doc2_average"]},
                        {"document": title1, "metric": "الحد الأقصى", "value": price_changes["doc1_max"]},
                        {"document": title2, "metric": "الحد الأقصى", "value": price_changes["doc2_max"]}
                    ]
                    
                    price_df = pd.DataFrame(price_chart_data)
                    
                    fig = px.bar(
                        price_df,
                        x="metric",
                        y="value",
                        color="document",
                        barmode="group",
                        title="مقارنة الأسعار بين المستندين",
                        color_discrete_map={title1: "#0984e3", title2: "#00b894"}
                    )
                    
                    fig.update_layout(
                        font=dict(family="Arial, sans-serif", size=14),
                        height=350
                    )
                    
                    st.plotly_chart(fig, use_container_width=True)
            else:
                st.info("لم يتم اكتشاف أي أسعار في المستندين.")
        
        with col2:
            st.markdown("<h3>تحليل التغييرات في التواريخ</h3>", unsafe_allow_html=True)
            
            if date_changes["doc1_dates_count"] > 0 or date_changes["doc2_dates_count"] > 0:
                st.markdown(f"""
                <div class="date-analysis">
                    <div class="date-summary">تم اكتشاف {date_changes['doc1_dates_count']} تاريخ في {title1} و {date_changes['doc2_dates_count']} تاريخ في {title2}</div>
                    <div class="date-stats">
                        <div class="date-stat">
                            <span class="date-label">تواريخ مشتركة:</span>
                            <span class="date-value">{len(date_changes['common_dates'])}</span>
                        </div>
                        <div class="date-stat">
                            <span class="date-label">تواريخ محذوفة:</span>
                            <span class="date-value">{len(date_changes['removed_dates'])}</span>
                        </div>
                        <div class="date-stat">
                            <span class="date-label">تواريخ مضافة:</span>
                            <span class="date-value">{len(date_changes['added_dates'])}</span>
                        </div>
                    </div>
                </div>
                """, unsafe_allow_html=True)
                
                # عرض التواريخ المحذوفة والمضافة
                if date_changes["removed_dates"]:
                    st.markdown("**التواريخ المحذوفة:**")
                    for date in date_changes["removed_dates"][:10]:  # عرض أول 10 فقط إذا كان هناك الكثير
                        st.markdown(f"<div class='diff-text diff-old'>{date}</div>", unsafe_allow_html=True)
                
                if date_changes["added_dates"]:
                    st.markdown("**التواريخ المضافة:**")
                    for date in date_changes["added_dates"][:10]:  # عرض أول 10 فقط
                        st.markdown(f"<div class='diff-text diff-new'>{date}</div>", unsafe_allow_html=True)
                
                # رسم بياني للتواريخ
                date_chart_data = [
                    {"category": "تواريخ مشتركة", "count": len(date_changes["common_dates"])},
                    {"category": "تواريخ محذوفة", "count": len(date_changes["removed_dates"])},
                    {"category": "تواريخ مضافة", "count": len(date_changes["added_dates"])}
                ]
                
                date_df = pd.DataFrame(date_chart_data)
                
                fig = px.bar(
                    date_df,
                    x="category",
                    y="count",
                    title="توزيع التغييرات في التواريخ",
                    color="category",
                    color_discrete_map={
                        "تواريخ مشتركة": "#00b894",
                        "تواريخ محذوفة": "#d63031",
                        "تواريخ مضافة": "#0984e3"
                    }
                )
                
                fig.update_layout(
                    font=dict(family="Arial, sans-serif", size=14),
                    height=350
                )
                
                st.plotly_chart(fig, use_container_width=True)
            else:
                st.info("لم يتم اكتشاف أي تواريخ في المستندين.")
        
        # عرض العرض المرئي للتغييرات بين المستندين
        st.markdown("<h3>العرض المرئي للتغييرات</h3>", unsafe_allow_html=True)
        
        # إضافة خيار لتصفية الفروقات
        st.markdown("#### تصفية الفروقات حسب النوع")
        col1, col2, col3 = st.columns(3)
        
        with col1:
            show_added = st.checkbox("عرض الإضافات", value=True)
        with col2:
            show_removed = st.checkbox("عرض الحذف", value=True)
        with col3:
            show_modified = st.checkbox("عرض التعديلات", value=True)
        
        # تحديد الفروقات للعرض
        filtered_diffs = []
        
        for diff in comparison_report["paragraph_diffs"]:
            if diff["status"] == "added" and show_added:
                filtered_diffs.append(diff)
            elif diff["status"] == "removed" and show_removed:
                filtered_diffs.append(diff)
            elif diff["status"] == "modified" and show_modified:
                filtered_diffs.append(diff)
        
        # عرض الفروقات
        if filtered_diffs:
            for diff in filtered_diffs:
                if diff["status"] == "added":
                    st.markdown(f"""
                    <div class="diff-block diff-added">
                        <div class="diff-header">
                            <div class="diff-title">فقرة مضافة في {title2}</div>
                        </div>
                        <div class="diff-content">
                            {diff["doc2_text"]}
                        </div>
                    </div>
                    """, unsafe_allow_html=True)
                
                elif diff["status"] == "removed":
                    st.markdown(f"""
                    <div class="diff-block diff-removed">
                        <div class="diff-header">
                            <div class="diff-title">فقرة محذوفة من {title1}</div>
                        </div>
                        <div class="diff-content">
                            {diff["doc1_text"]}
                        </div>
                    </div>
                    """, unsafe_allow_html=True)
                
                elif diff["status"] == "modified":
                    similarity_percentage = int(diff["similarity"] * 100)
                    
                    st.markdown(f"""
                    <div class="diff-block diff-modified">
                        <div class="diff-header">
                            <div class="diff-title">فقرة معدلة (نسبة التشابه: {similarity_percentage}%)</div>
                        </div>
                        <div class="diff-content-container">
                            <div class="diff-content-old">
                                <div class="diff-subtitle">{title1}:</div>
                                {diff["doc1_text"]}
                            </div>
                            <div class="diff-content-new">
                                <div class="diff-subtitle">{title2}:</div>
                                {diff["doc2_text"]}
                            </div>
                        </div>
                    </div>
                    """, unsafe_allow_html=True)
        else:
            st.info("لا توجد فروقات تطابق معايير التصفية المحددة.")
        
        # إضافة CSS للتنسيق
        st.markdown("""
        <style>
        .module-title {
            color: #1E88E5;
            font-size: 1.8rem;
            font-weight: bold;
            margin-bottom: 1rem;
            text-align: center;
        }
        
        .similarity-card, .changes-card, .words-card {
            background-color: #fff;
            border-radius: 8px;
            padding: 1rem;
            box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
            height: 100%;
            text-align: center;
        }
        
        .similarity-title, .changes-title, .words-title {
            font-weight: bold;
            font-size: 1rem;
            margin-bottom: 0.5rem;
            color: #333;
        }
        
        .similarity-score {
            font-size: 2.5rem;
            font-weight: bold;
            margin-bottom: 0.25rem;
        }
        
        .similarity-info {
            font-size: 0.8rem;
            color: #666;
        }
        
        .changes-list, .words-list {
            text-align: right;
        }
        
        .change-item, .words-item {
            display: flex;
            justify-content: space-between;
            margin-bottom: 0.5rem;
        }
        
        .change-label, .words-label {
            color: #555;
        }
        
        .change-value, .words-value {
            font-weight: bold;
            color: #333;
        }
        
        .text-summary {
            background-color: #f8f9fa;
            border-right: 4px solid #1E88E5;
            padding: 1rem;
            margin: 1rem 0;
            color: #444;
            font-size: 1rem;
            text-align: right;
        }
        
        .diff-text {
            padding: 0.5rem;
            border-radius: 4px;
            margin-bottom: 0.5rem;
            white-space: pre-wrap;
        }
        
        .diff-old {
            background-color: rgba(214, 48, 49, 0.1);
            border-right: 3px solid #d63031;
        }
        
        .diff-new {
            background-color: rgba(9, 132, 227, 0.1);
            border-right: 3px solid #0984e3;
        }
        
        .price-analysis, .date-analysis {
            background-color: #f8f9fa;
            border-radius: 8px;
            padding: 1rem;
            margin-bottom: 1rem;
        }
        
        .price-summary, .date-summary {
            font-size: 1rem;
            margin-bottom: 0.5rem;
            text-align: center;
        }
        
        .price-details {
            margin-top: 1rem;
        }
        
        .price-row {
            display: flex;
            justify-content: space-between;
            margin-bottom: 0.25rem;
            border-bottom: 1px solid #eee;
            padding-bottom: 0.25rem;
        }
        
        .price-label {
            flex: 1;
            text-align: right;
            font-weight: bold;
            color: #555;
        }
        
        .price-value-header {
            flex: 1;
            text-align: center;
            font-weight: bold;
            color: #333;
        }
        
        .price-value {
            flex: 1;
            text-align: center;
            color: #333;
        }
        
        .date-stats {
            display: flex;
            justify-content: space-around;
            margin-top: 0.5rem;
        }
        
        .date-stat {
            text-align: center;
        }
        
        .date-label {
            display: block;
            font-size: 0.9rem;
            color: #555;
        }
        
        .date-value {
            display: block;
            font-size: 1.2rem;
            font-weight: bold;
            color: #333;
        }
        
        .diff-block {
            background-color: #fff;
            border-radius: 8px;
            margin-bottom: 1rem;
            box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
            overflow: hidden;
        }
        
        .diff-header {
            padding: 0.5rem 1rem;
            border-bottom: 1px solid #eee;
        }
        
        .diff-title {
            font-weight: bold;
            color: #333;
        }
        
        .diff-content {
            padding: 1rem;
            white-space: pre-wrap;
            direction: rtl;
            text-align: right;
        }
        
        .diff-content-container {
            display: flex;
            flex-direction: column;
        }
        
        .diff-content-old, .diff-content-new {
            padding: 1rem;
            white-space: pre-wrap;
            direction: rtl;
            text-align: right;
        }
        
        .diff-content-old {
            background-color: rgba(214, 48, 49, 0.05);
            border-bottom: 1px solid #eee;
        }
        
        .diff-content-new {
            background-color: rgba(9, 132, 227, 0.05);
        }
        
        .diff-subtitle {
            font-weight: bold;
            margin-bottom: 0.5rem;
            color: #555;
        }
        
        .diff-added {
            border-right: 4px solid #0984e3;
        }
        
        .diff-removed {
            border-right: 4px solid #d63031;
        }
        
        .diff-modified {
            border-right: 4px solid #fdcb6e;
        }
        
        @media (min-width: 992px) {
            .diff-content-container {
                flex-direction: row;
            }
            
            .diff-content-old, .diff-content-new {
                flex: 1;
            }
            
            .diff-content-old {
                border-bottom: none;
                border-left: 1px solid #eee;
            }
        }
        </style>
        """, unsafe_allow_html=True)
    
    def render_advanced_comparison_tools(self):
        """عرض أدوات المقارنة المتقدمة"""
        st.markdown("<h2 class='module-title'>أدوات مقارنة المستندات المتقدمة</h2>", unsafe_allow_html=True)
        
        st.markdown("""
        <div class="module-description">
            استخدم هذه الأدوات لمقارنة مستندات العقود بشكل متقدم، واكتشاف التغييرات والفروقات بين نسخ المستندات المختلفة،
            مع تحليل التغييرات القانونية والمالية والتواريخ.
        </div>
        """, unsafe_allow_html=True)
        
        # إنشاء علامات التبويب للأدوات المختلفة
        tabs = st.tabs([
            "مقارنة نصية مباشرة",
            "مقارنة ملفات PDF",
            "عرض تقارير المقارنة السابقة"
        ])
        
        with tabs[0]:
            st.markdown("### مقارنة نصية مباشرة")
            
            col1, col2 = st.columns(2)
            
            with col1:
                title1 = st.text_input("عنوان المستند الأول", key="text_title1")
                text1 = st.text_area("نص المستند الأول", height=300, key="text_input1")
            
            with col2:
                title2 = st.text_input("عنوان المستند الثاني", key="text_title2")
                text2 = st.text_area("نص المستند الثاني", height=300, key="text_input2")
            
            if st.button("قارن النصوص", key="compare_text_btn"):
                if text1 and text2:
                    self.render_document_comparison(
                        text1, 
                        text2, 
                        title1 or "المستند الأول", 
                        title2 or "المستند الثاني"
                    )
                else:
                    st.warning("يرجى إدخال نص المستندين للمقارنة")
        
        with tabs[1]:
            st.markdown("### مقارنة ملفات PDF")
            
            col1, col2 = st.columns(2)
            
            with col1:
                title1_pdf = st.text_input("عنوان المستند الأول", key="pdf_title1")
                uploaded_file1 = st.file_uploader("تحميل المستند الأول (PDF)", type=["pdf"], key="pdf_upload1")
            
            with col2:
                title2_pdf = st.text_input("عنوان المستند الثاني", key="pdf_title2")
                uploaded_file2 = st.file_uploader("تحميل المستند الثاني (PDF)", type=["pdf"], key="pdf_upload2")
            
            if st.button("قارن ملفات PDF", key="compare_pdf_btn"):
                if uploaded_file1 is not None and uploaded_file2 is not None:
                    with st.spinner("جاري استخراج النصوص من ملفات PDF..."):
                        text1_pdf = self._extract_text_from_pdf(uploaded_file1)
                        text2_pdf = self._extract_text_from_pdf(uploaded_file2)
                        
                        if text1_pdf and text2_pdf:
                            self.render_document_comparison(
                                text1_pdf, 
                                text2_pdf, 
                                title1_pdf or uploaded_file1.name,
                                title2_pdf or uploaded_file2.name
                            )
                        else:
                            st.error("تعذر استخراج النص من ملفات PDF. يرجى التأكد من أن الملفات تحتوي على نصوص قابلة للاستخراج.")
                else:
                    st.warning("يرجى تحميل ملفي PDF للمقارنة")
        
        with tabs[2]:
            st.markdown("### تقارير المقارنة السابقة")
            
            # الحصول على تقارير المقارنة المحفوظة
            reports = self.get_comparison_reports()
            
            if reports:
                # عرض التقارير في جدول
                report_data = []
                for report in reports:
                    report_data.append({
                        "التاريخ": report["timestamp"],
                        "المستند الأول": report["title1"],
                        "المستند الثاني": report["title2"],
                        "نسبة التشابه": f"{report['similarity']}%",
                        "الملف": report["filename"]
                    })
                
                report_df = pd.DataFrame(report_data)
                st.dataframe(report_df)
                
                # اختيار تقرير لعرضه
                selected_report = st.selectbox(
                    "اختر تقريراً لعرضه",
                    options=[f"{r['title1']} و {r['title2']} ({r['timestamp']})" for r in reports],
                    format_func=lambda x: x
                )
                
                report_index = next((i for i, r in enumerate(reports) if f"{r['title1']} و {r['title2']} ({r['timestamp']})" == selected_report), None)
                
                if report_index is not None and st.button("عرض التقرير المحدد"):
                    selected_filename = reports[report_index]["filename"]
                    report_data = self.load_comparison_report(selected_filename)
                    
                    if report_data:
                        st.success(f"تم تحميل تقرير المقارنة بنجاح")
                        
                        # عرض ملخص التقرير
                        st.markdown(f"### ملخص تقرير المقارنة")
                        st.markdown(f"**نسبة التشابه:** {report_data['similarity']}%")
                        st.markdown(f"**تاريخ المقارنة:** {report_data['timestamp']}")
                        st.markdown(f"**ملخص التغييرات:** {report_data['summary']}")
                        
                        # استخراج الاختلافات الرئيسية
                        key_differences = self.extract_key_differences(report_data)
                        
                        if key_differences:
                            st.markdown("### الاختلافات الرئيسية")
                            
                            for diff in key_differences:
                                st.markdown(f"#### {diff['label']} ({diff['count']})")
                                
                                if diff["type"] == "added_paragraphs":
                                    for item in diff["items"][:5]:  # عرض أول 5 فقط
                                        st.markdown(f"<div class='diff-text diff-new'>{item}</div>", unsafe_allow_html=True)
                                
                                elif diff["type"] == "removed_paragraphs":
                                    for item in diff["items"][:5]:
                                        st.markdown(f"<div class='diff-text diff-old'>{item}</div>", unsafe_allow_html=True)
                                
                                elif diff["type"] == "modified_paragraphs":
                                    for item in diff["items"][:3]:
                                        col1, col2 = st.columns(2)
                                        with col1:
                                            st.markdown(f"**{report_data['title1']}:**")
                                            st.markdown(f"<div class='diff-text diff-old'>{item['doc1_text']}</div>", unsafe_allow_html=True)
                                        with col2:
                                            st.markdown(f"**{report_data['title2']}:**")
                                            st.markdown(f"<div class='diff-text diff-new'>{item['doc2_text']}</div>", unsafe_allow_html=True)
                                
                                elif diff["type"] in ["added_words", "removed_words"]:
                                    # عرض الكلمات في شكل جدول
                                    word_data = []
                                    for word, count in diff["items"]:
                                        if len(word) > 1:  # تجاهل الأحرف المفردة
                                            word_data.append({"الكلمة": word, "عدد المرات": count})
                                    
                                    if word_data:
                                        word_df = pd.DataFrame(word_data)
                                        st.dataframe(word_df)
                        
                        # تحليل التغييرات القانونية
                        legal_changes = self.analyze_legal_changes(report_data)
                        
                        if legal_changes:
                            st.markdown("### تحليل التغييرات القانونية")
                            
                            for change in legal_changes[:3]:  # عرض أهم 3 فئات فقط
                                st.markdown(f"#### {change['label']} ({change['count']})")
                                
                                for item in change["changes"][:2]:  # عرض أول مثالين فقط
                                    col1, col2 = st.columns(2)
                                    with col1:
                                        st.markdown(f"**{report_data['title1']}:**")
                                        st.markdown(f"<div class='diff-text diff-old'>{item['doc1_text']}</div>", unsafe_allow_html=True)
                                    with col2:
                                        st.markdown(f"**{report_data['title2']}:**")
                                        st.markdown(f"<div class='diff-text diff-new'>{item['doc2_text']}</div>", unsafe_allow_html=True)
                    else:
                        st.error("تعذر تحميل تقرير المقارنة")
            else:
                st.info("لا توجد تقارير مقارنة محفوظة")
        
        # إضافة CSS للتنسيق
        st.markdown("""
        <style>
        .module-title {
            color: #1E88E5;
            font-size: 1.8rem;
            font-weight: bold;
            margin-bottom: 1rem;
            text-align: center;
        }
        
        .module-description {
            background-color: #f8f9fa;
            border-right: 4px solid #1E88E5;
            padding: 1rem;
            margin-bottom: 1.5rem;
            color: #444;
            font-size: 1rem;
            text-align: right;
        }
        
        .diff-text {
            padding: 0.5rem;
            border-radius: 4px;
            margin-bottom: 0.5rem;
            white-space: pre-wrap;
        }
        
        .diff-old {
            background-color: rgba(214, 48, 49, 0.1);
            border-right: 3px solid #d63031;
        }
        
        .diff-new {
            background-color: rgba(9, 132, 227, 0.1);
            border-right: 3px solid #0984e3;
        }
        </style>
        """, unsafe_allow_html=True)
    
    def render(self):
        """عرض واجهة المستخدم الرئيسية للتطبيق"""
        self.render_advanced_comparison_tools()