|
|
|
|
|
|
|
""" |
|
وحدة مقارنة المستندات المتقدمة لتحليل الفروقات بين نسخ المستندات |
|
""" |
|
|
|
import os |
|
import sys |
|
import json |
|
import re |
|
import difflib |
|
import Levenshtein |
|
from datetime import datetime |
|
import numpy as np |
|
import pandas as pd |
|
import streamlit as st |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
from collections import Counter |
|
from nltk.tokenize import sent_tokenize, word_tokenize |
|
from rouge_score import rouge_scorer |
|
from PyPDF2 import PdfReader |
|
import io |
|
|
|
|
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))) |
|
|
|
|
|
from utils.helpers import create_directory_if_not_exists, format_time, get_user_info |
|
|
|
|
|
class DocumentComparator: |
|
"""فئة مقارنة المستندات المتقدمة""" |
|
|
|
def __init__(self): |
|
"""تهيئة مقارن المستندات""" |
|
self.comparison_dir = os.path.join(os.path.dirname(__file__), '..', '..', 'data', 'document_comparison') |
|
create_directory_if_not_exists(self.comparison_dir) |
|
|
|
|
|
self._initialize_nltk() |
|
|
|
|
|
self.rouge_scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=False) |
|
|
|
def _initialize_nltk(self): |
|
"""تهيئة مكتبة NLTK وتنزيل الحزم المطلوبة""" |
|
try: |
|
|
|
import nltk |
|
|
|
|
|
required_packages = ['punkt', 'stopwords', 'wordnet'] |
|
for package in required_packages: |
|
try: |
|
|
|
nltk.data.find(f'tokenizers/{package}') |
|
except LookupError: |
|
print(f"تنزيل حزمة NLTK: {package}") |
|
nltk.download(package, quiet=True) |
|
|
|
|
|
from nltk.tokenize import sent_tokenize |
|
sent_tokenize("This is a test sentence.") |
|
except LookupError: |
|
|
|
import nltk |
|
nltk.download('punkt', quiet=True) |
|
|
|
st.info("تم تنزيل حزمة NLTK punkt بنجاح للاستخدام في مقارنة المستندات.") |
|
|
|
def _preprocess_text(self, text): |
|
"""معالجة النص قبل التحليل""" |
|
|
|
text = re.sub(r'\s+', ' ', text) |
|
text = text.strip() |
|
return text |
|
|
|
def _segment_text(self, text): |
|
"""تقسيم النص إلى فقرات وجمل""" |
|
|
|
paragraphs = [p.strip() for p in text.split('\n') if p.strip()] |
|
|
|
|
|
sentences = [] |
|
for paragraph in paragraphs: |
|
paragraph_sentences = sent_tokenize(paragraph) |
|
sentences.extend(paragraph_sentences) |
|
|
|
return paragraphs, sentences |
|
|
|
def _calculate_similarity(self, text1, text2): |
|
"""حساب نسبة التشابه بين نصين""" |
|
|
|
ratio = Levenshtein.ratio(text1, text2) |
|
|
|
|
|
rouge_scores = self.rouge_scorer.score(text1, text2) |
|
|
|
|
|
rouge1_f1 = rouge_scores['rouge1'].fmeasure |
|
rouge2_f1 = rouge_scores['rouge2'].fmeasure |
|
rougeL_f1 = rouge_scores['rougeL'].fmeasure |
|
avg_rouge = (rouge1_f1 + rouge2_f1 + rougeL_f1) / 3 |
|
|
|
|
|
combined_score = (ratio + avg_rouge) / 2 |
|
|
|
return { |
|
'levenshtein_ratio': ratio, |
|
'rouge1_f1': rouge1_f1, |
|
'rouge2_f1': rouge2_f1, |
|
'rougeL_f1': rougeL_f1, |
|
'avg_rouge': avg_rouge, |
|
'combined_score': combined_score |
|
} |
|
|
|
def _extract_text_from_pdf(self, pdf_file): |
|
"""استخراج النص من ملف PDF""" |
|
text = "" |
|
try: |
|
|
|
pdf_reader = PdfReader(pdf_file) |
|
|
|
|
|
for page in pdf_reader.pages: |
|
text += page.extract_text() + "\n" |
|
except Exception as e: |
|
st.error(f"خطأ في قراءة ملف PDF: {e}") |
|
|
|
return text |
|
|
|
def get_document_diff(self, text1, text2, title1="المستند الأول", title2="المستند الثاني"): |
|
"""حساب الفروقات بين نصين""" |
|
if not text1 or not text2: |
|
return { |
|
"title1": title1, |
|
"title2": title2, |
|
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), |
|
"similarity": 0, |
|
"similarity_score": 0, |
|
"text_diffs": [], |
|
"summary": "أحد المستندات فارغ، لا يمكن إجراء المقارنة." |
|
} |
|
|
|
|
|
preprocessed_text1 = self._preprocess_text(text1) |
|
preprocessed_text2 = self._preprocess_text(text2) |
|
|
|
|
|
similarity_metrics = self._calculate_similarity(preprocessed_text1, preprocessed_text2) |
|
similarity_score = similarity_metrics['combined_score'] |
|
similarity_percentage = int(similarity_score * 100) |
|
|
|
|
|
paragraphs1, sentences1 = self._segment_text(text1) |
|
paragraphs2, sentences2 = self._segment_text(text2) |
|
|
|
|
|
differ = difflib.Differ() |
|
sentence_diffs = [] |
|
|
|
|
|
similarity_matrix = np.zeros((len(sentences1), len(sentences2))) |
|
for i, s1 in enumerate(sentences1): |
|
for j, s2 in enumerate(sentences2): |
|
similarity_matrix[i, j] = Levenshtein.ratio(s1, s2) |
|
|
|
|
|
matched_sentences2 = set() |
|
|
|
for i, s1 in enumerate(sentences1): |
|
if len(s1.split()) < 3: |
|
continue |
|
|
|
best_match_idx = -1 |
|
best_match_score = 0.7 |
|
|
|
for j, s2 in enumerate(sentences2): |
|
if j in matched_sentences2: |
|
continue |
|
|
|
if len(s2.split()) < 3: |
|
continue |
|
|
|
score = similarity_matrix[i, j] |
|
if score > best_match_score and score > 0.7: |
|
best_match_score = score |
|
best_match_idx = j |
|
|
|
if best_match_idx != -1: |
|
|
|
s2 = sentences2[best_match_idx] |
|
diff = list(differ.compare(s1.split(), s2.split())) |
|
|
|
|
|
formatted_diff = [] |
|
for token in diff: |
|
if token.startswith(' '): |
|
formatted_diff.append({'text': token[2:], 'status': 'same'}) |
|
elif token.startswith('- '): |
|
formatted_diff.append({'text': token[2:], 'status': 'removed'}) |
|
elif token.startswith('+ '): |
|
formatted_diff.append({'text': token[2:], 'status': 'added'}) |
|
|
|
sentence_diffs.append({ |
|
'doc1_idx': i, |
|
'doc2_idx': best_match_idx, |
|
'doc1_text': s1, |
|
'doc2_text': s2, |
|
'similarity': best_match_score, |
|
'diff': formatted_diff |
|
}) |
|
|
|
matched_sentences2.add(best_match_idx) |
|
else: |
|
|
|
sentence_diffs.append({ |
|
'doc1_idx': i, |
|
'doc2_idx': -1, |
|
'doc1_text': s1, |
|
'doc2_text': "", |
|
'similarity': 0, |
|
'diff': [{'text': word, 'status': 'removed'} for word in s1.split()] |
|
}) |
|
|
|
|
|
for j, s2 in enumerate(sentences2): |
|
if j not in matched_sentences2 and len(s2.split()) >= 3: |
|
sentence_diffs.append({ |
|
'doc1_idx': -1, |
|
'doc2_idx': j, |
|
'doc1_text': "", |
|
'doc2_text': s2, |
|
'similarity': 0, |
|
'diff': [{'text': word, 'status': 'added'} for word in s2.split()] |
|
}) |
|
|
|
|
|
sentence_diffs.sort(key=lambda x: (x['doc1_idx'] if x['doc1_idx'] != -1 else float('inf'), x['doc2_idx'] if x['doc2_idx'] != -1 else float('inf'))) |
|
|
|
|
|
paragraph_diffs = [] |
|
matched_paragraphs2 = set() |
|
|
|
for i, p1 in enumerate(paragraphs1): |
|
if len(p1.split()) < 5: |
|
continue |
|
|
|
best_match_idx = -1 |
|
best_match_score = 0.6 |
|
|
|
for j, p2 in enumerate(paragraphs2): |
|
if j in matched_paragraphs2: |
|
continue |
|
|
|
if len(p2.split()) < 5: |
|
continue |
|
|
|
score = Levenshtein.ratio(p1, p2) |
|
if score > best_match_score: |
|
best_match_score = score |
|
best_match_idx = j |
|
|
|
if best_match_idx != -1: |
|
|
|
p2 = paragraphs2[best_match_idx] |
|
paragraph_diffs.append({ |
|
'doc1_idx': i, |
|
'doc2_idx': best_match_idx, |
|
'doc1_text': p1, |
|
'doc2_text': p2, |
|
'similarity': best_match_score, |
|
'status': 'modified' if best_match_score < 0.9 else 'same' |
|
}) |
|
|
|
matched_paragraphs2.add(best_match_idx) |
|
else: |
|
|
|
paragraph_diffs.append({ |
|
'doc1_idx': i, |
|
'doc2_idx': -1, |
|
'doc1_text': p1, |
|
'doc2_text': "", |
|
'similarity': 0, |
|
'status': 'removed' |
|
}) |
|
|
|
|
|
for j, p2 in enumerate(paragraphs2): |
|
if j not in matched_paragraphs2 and len(p2.split()) >= 5: |
|
paragraph_diffs.append({ |
|
'doc1_idx': -1, |
|
'doc2_idx': j, |
|
'doc1_text': "", |
|
'doc2_text': p2, |
|
'similarity': 0, |
|
'status': 'added' |
|
}) |
|
|
|
|
|
paragraph_diffs.sort(key=lambda x: (x['doc1_idx'] if x['doc1_idx'] != -1 else float('inf'), x['doc2_idx'] if x['doc2_idx'] != -1 else float('inf'))) |
|
|
|
|
|
total_paragraphs = len(paragraphs1) + len(paragraphs2) |
|
removed_paragraphs = sum(1 for p in paragraph_diffs if p['status'] == 'removed') |
|
added_paragraphs = sum(1 for p in paragraph_diffs if p['status'] == 'added') |
|
modified_paragraphs = sum(1 for p in paragraph_diffs if p['status'] == 'modified') |
|
|
|
|
|
added_words = [] |
|
removed_words = [] |
|
modified_contexts = [] |
|
|
|
for diff in sentence_diffs: |
|
for token in diff['diff']: |
|
if token['status'] == 'added': |
|
added_words.append(token['text']) |
|
elif token['status'] == 'removed': |
|
removed_words.append(token['text']) |
|
|
|
|
|
if diff['doc1_idx'] != -1 and diff['doc2_idx'] != -1 and diff['similarity'] < 0.9: |
|
modified_contexts.append({ |
|
'doc1_text': diff['doc1_text'], |
|
'doc2_text': diff['doc2_text'], |
|
'similarity': diff['similarity'] |
|
}) |
|
|
|
|
|
comparison_report = { |
|
"title1": title1, |
|
"title2": title2, |
|
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), |
|
"similarity": similarity_percentage, |
|
"similarity_metrics": similarity_metrics, |
|
"sentence_diffs": sentence_diffs, |
|
"paragraph_diffs": paragraph_diffs, |
|
"statistics": { |
|
"doc1_paragraphs": len(paragraphs1), |
|
"doc2_paragraphs": len(paragraphs2), |
|
"doc1_sentences": len(sentences1), |
|
"doc2_sentences": len(sentences2), |
|
"removed_paragraphs": removed_paragraphs, |
|
"added_paragraphs": added_paragraphs, |
|
"modified_paragraphs": modified_paragraphs, |
|
"removed_words_count": len(removed_words), |
|
"added_words_count": len(added_words), |
|
"top_removed_words": Counter(removed_words).most_common(10), |
|
"top_added_words": Counter(added_words).most_common(10) |
|
}, |
|
"modified_contexts": modified_contexts[:10], |
|
"summary": self._generate_comparison_summary( |
|
similarity_percentage, |
|
len(paragraphs1), |
|
len(paragraphs2), |
|
removed_paragraphs, |
|
added_paragraphs, |
|
modified_paragraphs, |
|
len(removed_words), |
|
len(added_words) |
|
) |
|
} |
|
|
|
|
|
self._save_comparison_report(comparison_report, title1, title2) |
|
|
|
return comparison_report |
|
|
|
def _generate_comparison_summary(self, similarity, p1_count, p2_count, removed_p, added_p, modified_p, removed_w, added_w): |
|
"""إنشاء ملخص للمقارنة بين المستندين""" |
|
if similarity >= 90: |
|
similarity_description = "متطابقة بشكل كبير" |
|
elif similarity >= 70: |
|
similarity_description = "متشابهة" |
|
elif similarity >= 50: |
|
similarity_description = "متشابهة جزئياً" |
|
else: |
|
similarity_description = "مختلفة" |
|
|
|
summary = f"المستندان {similarity_description} بنسبة {similarity}%. " |
|
|
|
|
|
if removed_p > 0 or added_p > 0 or modified_p > 0: |
|
changes = [] |
|
if removed_p > 0: |
|
changes.append(f"تم حذف {removed_p} فقرة") |
|
if added_p > 0: |
|
changes.append(f"تم إضافة {added_p} فقرة") |
|
if modified_p > 0: |
|
changes.append(f"تم تعديل {modified_p} فقرة") |
|
|
|
summary += "التغييرات تشمل: " + "، ".join(changes) + ". " |
|
|
|
|
|
if removed_w > 0 or added_w > 0: |
|
word_changes = [] |
|
if removed_w > 0: |
|
word_changes.append(f"تم حذف {removed_w} كلمة") |
|
if added_w > 0: |
|
word_changes.append(f"تم إضافة {added_w} كلمة") |
|
|
|
summary += "على مستوى الكلمات: " + "، ".join(word_changes) + "." |
|
|
|
return summary |
|
|
|
def _save_comparison_report(self, report, title1, title2): |
|
"""حفظ تقرير المقارنة""" |
|
|
|
timestamp = datetime.now().strftime("%Y%m%d%H%M%S") |
|
filename = f"compare_{title1.replace(' ', '_')}_{title2.replace(' ', '_')}_{timestamp}.json" |
|
file_path = os.path.join(self.comparison_dir, filename) |
|
|
|
try: |
|
with open(file_path, 'w', encoding='utf-8') as f: |
|
json.dump(report, f, ensure_ascii=False, indent=2) |
|
except Exception as e: |
|
print(f"خطأ في حفظ تقرير المقارنة: {e}") |
|
|
|
def load_comparison_report(self, filename): |
|
"""تحميل تقرير مقارنة محفوظ""" |
|
file_path = os.path.join(self.comparison_dir, filename) |
|
|
|
if not os.path.exists(file_path): |
|
return None |
|
|
|
try: |
|
with open(file_path, 'r', encoding='utf-8') as f: |
|
report = json.load(f) |
|
return report |
|
except Exception as e: |
|
print(f"خطأ في تحميل تقرير المقارنة: {e}") |
|
return None |
|
|
|
def get_comparison_reports(self): |
|
"""الحصول على قائمة تقارير المقارنة المحفوظة""" |
|
reports = [] |
|
|
|
for filename in os.listdir(self.comparison_dir): |
|
if filename.startswith("compare_") and filename.endswith(".json"): |
|
file_path = os.path.join(self.comparison_dir, filename) |
|
try: |
|
with open(file_path, 'r', encoding='utf-8') as f: |
|
report = json.load(f) |
|
reports.append({ |
|
"filename": filename, |
|
"title1": report.get("title1", "مستند 1"), |
|
"title2": report.get("title2", "مستند 2"), |
|
"timestamp": report.get("timestamp", ""), |
|
"similarity": report.get("similarity", 0) |
|
}) |
|
except Exception as e: |
|
print(f"خطأ في قراءة تقرير المقارنة {filename}: {e}") |
|
|
|
|
|
reports.sort(key=lambda x: x["timestamp"], reverse=True) |
|
|
|
return reports |
|
|
|
def extract_key_differences(self, comparison_report): |
|
"""استخراج الاختلافات الرئيسية من تقرير المقارنة""" |
|
if not comparison_report or "paragraph_diffs" not in comparison_report: |
|
return [] |
|
|
|
key_differences = [] |
|
|
|
|
|
added_paragraphs = [p for p in comparison_report["paragraph_diffs"] if p["status"] == "added"] |
|
if added_paragraphs: |
|
key_differences.append({ |
|
"type": "added_paragraphs", |
|
"label": "فقرات مضافة", |
|
"count": len(added_paragraphs), |
|
"items": [p["doc2_text"] for p in added_paragraphs] |
|
}) |
|
|
|
|
|
removed_paragraphs = [p for p in comparison_report["paragraph_diffs"] if p["status"] == "removed"] |
|
if removed_paragraphs: |
|
key_differences.append({ |
|
"type": "removed_paragraphs", |
|
"label": "فقرات محذوفة", |
|
"count": len(removed_paragraphs), |
|
"items": [p["doc1_text"] for p in removed_paragraphs] |
|
}) |
|
|
|
|
|
modified_paragraphs = [p for p in comparison_report["paragraph_diffs"] if p["status"] == "modified"] |
|
if modified_paragraphs: |
|
modified_items = [] |
|
for p in modified_paragraphs: |
|
modified_items.append({ |
|
"doc1_text": p["doc1_text"], |
|
"doc2_text": p["doc2_text"], |
|
"similarity": p["similarity"] |
|
}) |
|
|
|
key_differences.append({ |
|
"type": "modified_paragraphs", |
|
"label": "فقرات معدلة", |
|
"count": len(modified_paragraphs), |
|
"items": modified_items |
|
}) |
|
|
|
|
|
if "statistics" in comparison_report: |
|
stats = comparison_report["statistics"] |
|
|
|
if "top_added_words" in stats and stats["top_added_words"]: |
|
key_differences.append({ |
|
"type": "added_words", |
|
"label": "الكلمات المضافة الأكثر تكراراً", |
|
"count": stats["added_words_count"], |
|
"items": stats["top_added_words"] |
|
}) |
|
|
|
if "top_removed_words" in stats and stats["top_removed_words"]: |
|
key_differences.append({ |
|
"type": "removed_words", |
|
"label": "الكلمات المحذوفة الأكثر تكراراً", |
|
"count": stats["removed_words_count"], |
|
"items": stats["top_removed_words"] |
|
}) |
|
|
|
return key_differences |
|
|
|
def analyze_legal_changes(self, comparison_report): |
|
"""تحليل التغييرات القانونية في المستندات""" |
|
if not comparison_report: |
|
return [] |
|
|
|
|
|
legal_terms = { |
|
"payment": ["دفع", "سداد", "مستحقات", "مقابل", "رسوم", "تكلفة", "مبلغ", "أتعاب"], |
|
"deadlines": ["ميعاد", "موعد", "تاريخ", "أجل", "مدة", "فترة", "مهلة"], |
|
"liability": ["مسؤولية", "التزام", "تحمل", "تعويض", "ضمان", "كفالة"], |
|
"termination": ["إنهاء", "فسخ", "إلغاء", "إيقاف", "إنهاء العلاقة"], |
|
"dispute": ["نزاع", "خلاف", "منازعة", "اعتراض", "تحكيم", "قضاء", "محكمة"], |
|
"penalties": ["غرامة", "عقوبة", "شرط جزائي", "جزاء", "تعويض"], |
|
"conditions": ["شرط", "بند", "حالة", "اشتراط", "متطلب"], |
|
"rights": ["حق", "صلاحية", "امتياز", "منفعة", "ملكية", "تصرف"], |
|
"obligations": ["التزام", "واجب", "تعهد", "إلزام", "لازم"] |
|
} |
|
|
|
|
|
legal_changes = [] |
|
|
|
if "sentence_diffs" in comparison_report: |
|
for category, terms in legal_terms.items(): |
|
category_changes = [] |
|
|
|
for diff in comparison_report["sentence_diffs"]: |
|
|
|
if diff["doc1_idx"] != -1 and diff["doc2_idx"] != -1 and diff["similarity"] < 0.9: |
|
|
|
contains_term = False |
|
for term in terms: |
|
if term in diff["doc1_text"].lower() or term in diff["doc2_text"].lower(): |
|
contains_term = True |
|
break |
|
|
|
if contains_term: |
|
category_changes.append({ |
|
"doc1_text": diff["doc1_text"], |
|
"doc2_text": diff["doc2_text"], |
|
"similarity": diff["similarity"] |
|
}) |
|
|
|
if category_changes: |
|
legal_category_name = { |
|
"payment": "الدفع والمستحقات المالية", |
|
"deadlines": "المواعيد والفترات الزمنية", |
|
"liability": "المسؤولية والالتزامات", |
|
"termination": "إنهاء العقد أو فسخه", |
|
"dispute": "النزاعات والخلافات", |
|
"penalties": "الغرامات والعقوبات", |
|
"conditions": "الشروط والبنود", |
|
"rights": "الحقوق والصلاحيات", |
|
"obligations": "الالتزامات والواجبات" |
|
} |
|
|
|
legal_changes.append({ |
|
"category": category, |
|
"label": legal_category_name.get(category, category), |
|
"count": len(category_changes), |
|
"changes": category_changes |
|
}) |
|
|
|
|
|
legal_changes.sort(key=lambda x: x["count"], reverse=True) |
|
|
|
return legal_changes |
|
|
|
def analyze_price_changes(self, text1, text2): |
|
"""تحليل التغييرات في الأسعار بين نسختي المستند""" |
|
|
|
price_pattern = r'(\d{1,3}(?:,\d{3})*(?:\.\d+)?)\s*(?:ريال|دولار|يورو|جنيه|درهم|دينار|SAR|USD|EUR|SR|$|€|£)' |
|
amount_pattern = r'مبلغ[\s\w]*?(\d{1,3}(?:,\d{3})*(?:\.\d+)?)' |
|
|
|
|
|
prices1 = re.findall(price_pattern, text1) |
|
prices1.extend(re.findall(amount_pattern, text1)) |
|
prices1 = [p.replace(',', '') for p in prices1] |
|
prices1 = [float(p) for p in prices1 if p] |
|
|
|
prices2 = re.findall(price_pattern, text2) |
|
prices2.extend(re.findall(amount_pattern, text2)) |
|
prices2 = [p.replace(',', '') for p in prices2] |
|
prices2 = [float(p) for p in prices2 if p] |
|
|
|
|
|
price_diff = { |
|
"doc1_prices_count": len(prices1), |
|
"doc2_prices_count": len(prices2), |
|
"doc1_total": sum(prices1) if prices1 else 0, |
|
"doc2_total": sum(prices2) if prices2 else 0, |
|
"doc1_average": sum(prices1) / len(prices1) if prices1 else 0, |
|
"doc2_average": sum(prices2) / len(prices2) if prices2 else 0, |
|
"doc1_min": min(prices1) if prices1 else 0, |
|
"doc2_min": min(prices2) if prices2 else 0, |
|
"doc1_max": max(prices1) if prices1 else 0, |
|
"doc2_max": max(prices2) if prices2 else 0 |
|
} |
|
|
|
|
|
if price_diff["doc1_total"] > 0: |
|
price_diff["total_change_percentage"] = ((price_diff["doc2_total"] - price_diff["doc1_total"]) / price_diff["doc1_total"]) * 100 |
|
else: |
|
price_diff["total_change_percentage"] = 0 |
|
|
|
return price_diff |
|
|
|
def analyze_date_changes(self, text1, text2): |
|
"""تحليل التغييرات في التواريخ بين نسختي المستند""" |
|
|
|
date_patterns = [ |
|
r'\d{1,2}/\d{1,2}/\d{2,4}', |
|
r'\d{1,2}-\d{1,2}-\d{2,4}', |
|
r'\d{2,4}/\d{1,2}/\d{1,2}', |
|
r'\d{2,4}-\d{1,2}-\d{1,2}', |
|
r'\d{1,2}\s+(?:يناير|فبراير|مارس|أبريل|مايو|يونيو|يوليو|أغسطس|سبتمبر|أكتوبر|نوفمبر|ديسمبر)\s+\d{2,4}' |
|
] |
|
|
|
dates1 = [] |
|
dates2 = [] |
|
|
|
for pattern in date_patterns: |
|
dates1.extend(re.findall(pattern, text1)) |
|
dates2.extend(re.findall(pattern, text2)) |
|
|
|
|
|
date_changes = { |
|
"doc1_dates_count": len(dates1), |
|
"doc2_dates_count": len(dates2), |
|
"doc1_dates": dates1[:10], |
|
"doc2_dates": dates2[:10], |
|
"common_dates": list(set(dates1).intersection(set(dates2))), |
|
"removed_dates": list(set(dates1) - set(dates2)), |
|
"added_dates": list(set(dates2) - set(dates1)) |
|
} |
|
|
|
return date_changes |
|
|
|
def render_document_comparison(self, text1, text2, title1="المستند الأول", title2="المستند الثاني"): |
|
"""عرض مقارنة المستندات بالواجهة التفاعلية""" |
|
st.markdown("<h2 class='module-title'>مقارنة المستندات المتقدمة</h2>", unsafe_allow_html=True) |
|
|
|
if not text1 or not text2: |
|
st.warning("يرجى توفير نصوص المستندين للمقارنة") |
|
return |
|
|
|
with st.spinner("جاري تحليل ومقارنة المستندين..."): |
|
|
|
comparison_report = self.get_document_diff(text1, text2, title1, title2) |
|
|
|
|
|
legal_changes = self.analyze_legal_changes(comparison_report) |
|
|
|
|
|
price_changes = self.analyze_price_changes(text1, text2) |
|
date_changes = self.analyze_date_changes(text1, text2) |
|
|
|
|
|
st.markdown("<h3>ملخص المقارنة</h3>", unsafe_allow_html=True) |
|
|
|
col1, col2, col3 = st.columns([1, 1, 1]) |
|
|
|
with col1: |
|
similarity = comparison_report["similarity"] |
|
color = "#00b894" if similarity >= 80 else "#fdcb6e" if similarity >= 50 else "#d63031" |
|
|
|
st.markdown(f""" |
|
<div class="similarity-card"> |
|
<div class="similarity-title">نسبة التشابه الإجمالية</div> |
|
<div class="similarity-score" style="color: {color};">{similarity}%</div> |
|
<div class="similarity-info">تم تحليل {comparison_report["statistics"]["doc1_paragraphs"]} فقرة في {title1} و {comparison_report["statistics"]["doc2_paragraphs"]} فقرة في {title2}</div> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
|
|
with col2: |
|
st.markdown(f""" |
|
<div class="changes-card"> |
|
<div class="changes-title">ملخص التغييرات</div> |
|
<div class="changes-list"> |
|
<div class="change-item"> |
|
<span class="change-label">فقرات محذوفة:</span> |
|
<span class="change-value">{comparison_report["statistics"]["removed_paragraphs"]}</span> |
|
</div> |
|
<div class="change-item"> |
|
<span class="change-label">فقرات مضافة:</span> |
|
<span class="change-value">{comparison_report["statistics"]["added_paragraphs"]}</span> |
|
</div> |
|
<div class="change-item"> |
|
<span class="change-label">فقرات معدلة:</span> |
|
<span class="change-value">{comparison_report["statistics"]["modified_paragraphs"]}</span> |
|
</div> |
|
</div> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
|
|
with col3: |
|
st.markdown(f""" |
|
<div class="words-card"> |
|
<div class="words-title">تغييرات الكلمات</div> |
|
<div class="words-list"> |
|
<div class="words-item"> |
|
<span class="words-label">كلمات محذوفة:</span> |
|
<span class="words-value">{comparison_report["statistics"]["removed_words_count"]}</span> |
|
</div> |
|
<div class="words-item"> |
|
<span class="words-label">كلمات مضافة:</span> |
|
<span class="words-value">{comparison_report["statistics"]["added_words_count"]}</span> |
|
</div> |
|
</div> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown(f""" |
|
<div class="text-summary"> |
|
{comparison_report["summary"]} |
|
</div> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown("<h3>تحليل التغييرات القانونية</h3>", unsafe_allow_html=True) |
|
|
|
if legal_changes: |
|
tabs = st.tabs([change["label"] for change in legal_changes]) |
|
|
|
for i, tab in enumerate(tabs): |
|
with tab: |
|
st.markdown(f"**عدد التغييرات: {legal_changes[i]['count']}**") |
|
|
|
for j, change in enumerate(legal_changes[i]["changes"]): |
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.markdown(f"**{title1}:**") |
|
st.markdown(f"<div class='diff-text diff-old'>{change['doc1_text']}</div>", unsafe_allow_html=True) |
|
with col2: |
|
st.markdown(f"**{title2}:**") |
|
st.markdown(f"<div class='diff-text diff-new'>{change['doc2_text']}</div>", unsafe_allow_html=True) |
|
|
|
if j < len(legal_changes[i]["changes"]) - 1: |
|
st.markdown("---") |
|
else: |
|
st.info("لم يتم اكتشاف تغييرات قانونية هامة بين المستندين.") |
|
|
|
|
|
st.markdown("<h3>رسوم بيانية للتغييرات</h3>", unsafe_allow_html=True) |
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
|
|
stats = comparison_report["statistics"] |
|
fig = px.pie( |
|
names=["فقرات متطابقة", "فقرات معدلة", "فقرات محذوفة", "فقرات مضافة"], |
|
values=[ |
|
stats["doc1_paragraphs"] - stats["removed_paragraphs"] - stats["modified_paragraphs"], |
|
stats["modified_paragraphs"], |
|
stats["removed_paragraphs"], |
|
stats["added_paragraphs"] |
|
], |
|
title="توزيع التغييرات في الفقرات", |
|
color_discrete_sequence=["#00b894", "#fdcb6e", "#d63031", "#0984e3"] |
|
) |
|
|
|
fig.update_layout( |
|
font=dict(family="Arial, sans-serif", size=14), |
|
height=350 |
|
) |
|
|
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
with col2: |
|
|
|
words_data = [] |
|
|
|
for word, count in comparison_report["statistics"]["top_removed_words"]: |
|
if len(word) > 1: |
|
words_data.append({"word": word, "count": count, "type": "محذوفة"}) |
|
|
|
for word, count in comparison_report["statistics"]["top_added_words"]: |
|
if len(word) > 1: |
|
words_data.append({"word": word, "count": count, "type": "مضافة"}) |
|
|
|
if words_data: |
|
words_df = pd.DataFrame(words_data) |
|
|
|
fig = px.bar( |
|
words_df, |
|
x="word", |
|
y="count", |
|
color="type", |
|
title="الكلمات المضافة والمحذوفة الأكثر تكراراً", |
|
labels={"word": "الكلمة", "count": "عدد المرات", "type": "النوع"}, |
|
color_discrete_map={"محذوفة": "#d63031", "مضافة": "#0984e3"} |
|
) |
|
|
|
fig.update_layout( |
|
font=dict(family="Arial, sans-serif", size=14), |
|
height=350 |
|
) |
|
|
|
st.plotly_chart(fig, use_container_width=True) |
|
else: |
|
st.info("لا توجد بيانات كافية للكلمات المضافة والمحذوفة.") |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
st.markdown("<h3>تحليل التغييرات في الأسعار</h3>", unsafe_allow_html=True) |
|
|
|
if price_changes["doc1_prices_count"] > 0 or price_changes["doc2_prices_count"] > 0: |
|
price_change_direction = "زيادة" if price_changes["total_change_percentage"] > 0 else "نقص" |
|
price_change_color = "#d63031" if price_changes["total_change_percentage"] > 0 else "#00b894" |
|
|
|
st.markdown(f""" |
|
<div class="price-analysis"> |
|
<div class="price-summary">تغيير في إجمالي الأسعار بنسبة <span style="color: {price_change_color}; font-weight: bold;">{abs(price_changes['total_change_percentage']):.2f}% ({price_change_direction})</span></div> |
|
<div class="price-details"> |
|
<div class="price-row"> |
|
<div class="price-label"></div> |
|
<div class="price-value-header">{title1}</div> |
|
<div class="price-value-header">{title2}</div> |
|
</div> |
|
<div class="price-row"> |
|
<div class="price-label">عدد الأسعار:</div> |
|
<div class="price-value">{price_changes['doc1_prices_count']}</div> |
|
<div class="price-value">{price_changes['doc2_prices_count']}</div> |
|
</div> |
|
<div class="price-row"> |
|
<div class="price-label">الإجمالي:</div> |
|
<div class="price-value">{price_changes['doc1_total']:,.2f}</div> |
|
<div class="price-value">{price_changes['doc2_total']:,.2f}</div> |
|
</div> |
|
<div class="price-row"> |
|
<div class="price-label">المتوسط:</div> |
|
<div class="price-value">{price_changes['doc1_average']:,.2f}</div> |
|
<div class="price-value">{price_changes['doc2_average']:,.2f}</div> |
|
</div> |
|
<div class="price-row"> |
|
<div class="price-label">الحد الأدنى:</div> |
|
<div class="price-value">{price_changes['doc1_min']:,.2f}</div> |
|
<div class="price-value">{price_changes['doc2_min']:,.2f}</div> |
|
</div> |
|
<div class="price-row"> |
|
<div class="price-label">الحد الأقصى:</div> |
|
<div class="price-value">{price_changes['doc1_max']:,.2f}</div> |
|
<div class="price-value">{price_changes['doc2_max']:,.2f}</div> |
|
</div> |
|
</div> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
if price_changes["doc1_prices_count"] > 0 and price_changes["doc2_prices_count"] > 0: |
|
price_chart_data = [ |
|
{"document": title1, "metric": "الإجمالي", "value": price_changes["doc1_total"]}, |
|
{"document": title2, "metric": "الإجمالي", "value": price_changes["doc2_total"]}, |
|
{"document": title1, "metric": "المتوسط", "value": price_changes["doc1_average"]}, |
|
{"document": title2, "metric": "المتوسط", "value": price_changes["doc2_average"]}, |
|
{"document": title1, "metric": "الحد الأقصى", "value": price_changes["doc1_max"]}, |
|
{"document": title2, "metric": "الحد الأقصى", "value": price_changes["doc2_max"]} |
|
] |
|
|
|
price_df = pd.DataFrame(price_chart_data) |
|
|
|
fig = px.bar( |
|
price_df, |
|
x="metric", |
|
y="value", |
|
color="document", |
|
barmode="group", |
|
title="مقارنة الأسعار بين المستندين", |
|
color_discrete_map={title1: "#0984e3", title2: "#00b894"} |
|
) |
|
|
|
fig.update_layout( |
|
font=dict(family="Arial, sans-serif", size=14), |
|
height=350 |
|
) |
|
|
|
st.plotly_chart(fig, use_container_width=True) |
|
else: |
|
st.info("لم يتم اكتشاف أي أسعار في المستندين.") |
|
|
|
with col2: |
|
st.markdown("<h3>تحليل التغييرات في التواريخ</h3>", unsafe_allow_html=True) |
|
|
|
if date_changes["doc1_dates_count"] > 0 or date_changes["doc2_dates_count"] > 0: |
|
st.markdown(f""" |
|
<div class="date-analysis"> |
|
<div class="date-summary">تم اكتشاف {date_changes['doc1_dates_count']} تاريخ في {title1} و {date_changes['doc2_dates_count']} تاريخ في {title2}</div> |
|
<div class="date-stats"> |
|
<div class="date-stat"> |
|
<span class="date-label">تواريخ مشتركة:</span> |
|
<span class="date-value">{len(date_changes['common_dates'])}</span> |
|
</div> |
|
<div class="date-stat"> |
|
<span class="date-label">تواريخ محذوفة:</span> |
|
<span class="date-value">{len(date_changes['removed_dates'])}</span> |
|
</div> |
|
<div class="date-stat"> |
|
<span class="date-label">تواريخ مضافة:</span> |
|
<span class="date-value">{len(date_changes['added_dates'])}</span> |
|
</div> |
|
</div> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
if date_changes["removed_dates"]: |
|
st.markdown("**التواريخ المحذوفة:**") |
|
for date in date_changes["removed_dates"][:10]: |
|
st.markdown(f"<div class='diff-text diff-old'>{date}</div>", unsafe_allow_html=True) |
|
|
|
if date_changes["added_dates"]: |
|
st.markdown("**التواريخ المضافة:**") |
|
for date in date_changes["added_dates"][:10]: |
|
st.markdown(f"<div class='diff-text diff-new'>{date}</div>", unsafe_allow_html=True) |
|
|
|
|
|
date_chart_data = [ |
|
{"category": "تواريخ مشتركة", "count": len(date_changes["common_dates"])}, |
|
{"category": "تواريخ محذوفة", "count": len(date_changes["removed_dates"])}, |
|
{"category": "تواريخ مضافة", "count": len(date_changes["added_dates"])} |
|
] |
|
|
|
date_df = pd.DataFrame(date_chart_data) |
|
|
|
fig = px.bar( |
|
date_df, |
|
x="category", |
|
y="count", |
|
title="توزيع التغييرات في التواريخ", |
|
color="category", |
|
color_discrete_map={ |
|
"تواريخ مشتركة": "#00b894", |
|
"تواريخ محذوفة": "#d63031", |
|
"تواريخ مضافة": "#0984e3" |
|
} |
|
) |
|
|
|
fig.update_layout( |
|
font=dict(family="Arial, sans-serif", size=14), |
|
height=350 |
|
) |
|
|
|
st.plotly_chart(fig, use_container_width=True) |
|
else: |
|
st.info("لم يتم اكتشاف أي تواريخ في المستندين.") |
|
|
|
|
|
st.markdown("<h3>العرض المرئي للتغييرات</h3>", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown("#### تصفية الفروقات حسب النوع") |
|
col1, col2, col3 = st.columns(3) |
|
|
|
with col1: |
|
show_added = st.checkbox("عرض الإضافات", value=True) |
|
with col2: |
|
show_removed = st.checkbox("عرض الحذف", value=True) |
|
with col3: |
|
show_modified = st.checkbox("عرض التعديلات", value=True) |
|
|
|
|
|
filtered_diffs = [] |
|
|
|
for diff in comparison_report["paragraph_diffs"]: |
|
if diff["status"] == "added" and show_added: |
|
filtered_diffs.append(diff) |
|
elif diff["status"] == "removed" and show_removed: |
|
filtered_diffs.append(diff) |
|
elif diff["status"] == "modified" and show_modified: |
|
filtered_diffs.append(diff) |
|
|
|
|
|
if filtered_diffs: |
|
for diff in filtered_diffs: |
|
if diff["status"] == "added": |
|
st.markdown(f""" |
|
<div class="diff-block diff-added"> |
|
<div class="diff-header"> |
|
<div class="diff-title">فقرة مضافة في {title2}</div> |
|
</div> |
|
<div class="diff-content"> |
|
{diff["doc2_text"]} |
|
</div> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
|
|
elif diff["status"] == "removed": |
|
st.markdown(f""" |
|
<div class="diff-block diff-removed"> |
|
<div class="diff-header"> |
|
<div class="diff-title">فقرة محذوفة من {title1}</div> |
|
</div> |
|
<div class="diff-content"> |
|
{diff["doc1_text"]} |
|
</div> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
|
|
elif diff["status"] == "modified": |
|
similarity_percentage = int(diff["similarity"] * 100) |
|
|
|
st.markdown(f""" |
|
<div class="diff-block diff-modified"> |
|
<div class="diff-header"> |
|
<div class="diff-title">فقرة معدلة (نسبة التشابه: {similarity_percentage}%)</div> |
|
</div> |
|
<div class="diff-content-container"> |
|
<div class="diff-content-old"> |
|
<div class="diff-subtitle">{title1}:</div> |
|
{diff["doc1_text"]} |
|
</div> |
|
<div class="diff-content-new"> |
|
<div class="diff-subtitle">{title2}:</div> |
|
{diff["doc2_text"]} |
|
</div> |
|
</div> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
else: |
|
st.info("لا توجد فروقات تطابق معايير التصفية المحددة.") |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
.module-title { |
|
color: #1E88E5; |
|
font-size: 1.8rem; |
|
font-weight: bold; |
|
margin-bottom: 1rem; |
|
text-align: center; |
|
} |
|
|
|
.similarity-card, .changes-card, .words-card { |
|
background-color: #fff; |
|
border-radius: 8px; |
|
padding: 1rem; |
|
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1); |
|
height: 100%; |
|
text-align: center; |
|
} |
|
|
|
.similarity-title, .changes-title, .words-title { |
|
font-weight: bold; |
|
font-size: 1rem; |
|
margin-bottom: 0.5rem; |
|
color: #333; |
|
} |
|
|
|
.similarity-score { |
|
font-size: 2.5rem; |
|
font-weight: bold; |
|
margin-bottom: 0.25rem; |
|
} |
|
|
|
.similarity-info { |
|
font-size: 0.8rem; |
|
color: #666; |
|
} |
|
|
|
.changes-list, .words-list { |
|
text-align: right; |
|
} |
|
|
|
.change-item, .words-item { |
|
display: flex; |
|
justify-content: space-between; |
|
margin-bottom: 0.5rem; |
|
} |
|
|
|
.change-label, .words-label { |
|
color: #555; |
|
} |
|
|
|
.change-value, .words-value { |
|
font-weight: bold; |
|
color: #333; |
|
} |
|
|
|
.text-summary { |
|
background-color: #f8f9fa; |
|
border-right: 4px solid #1E88E5; |
|
padding: 1rem; |
|
margin: 1rem 0; |
|
color: #444; |
|
font-size: 1rem; |
|
text-align: right; |
|
} |
|
|
|
.diff-text { |
|
padding: 0.5rem; |
|
border-radius: 4px; |
|
margin-bottom: 0.5rem; |
|
white-space: pre-wrap; |
|
} |
|
|
|
.diff-old { |
|
background-color: rgba(214, 48, 49, 0.1); |
|
border-right: 3px solid #d63031; |
|
} |
|
|
|
.diff-new { |
|
background-color: rgba(9, 132, 227, 0.1); |
|
border-right: 3px solid #0984e3; |
|
} |
|
|
|
.price-analysis, .date-analysis { |
|
background-color: #f8f9fa; |
|
border-radius: 8px; |
|
padding: 1rem; |
|
margin-bottom: 1rem; |
|
} |
|
|
|
.price-summary, .date-summary { |
|
font-size: 1rem; |
|
margin-bottom: 0.5rem; |
|
text-align: center; |
|
} |
|
|
|
.price-details { |
|
margin-top: 1rem; |
|
} |
|
|
|
.price-row { |
|
display: flex; |
|
justify-content: space-between; |
|
margin-bottom: 0.25rem; |
|
border-bottom: 1px solid #eee; |
|
padding-bottom: 0.25rem; |
|
} |
|
|
|
.price-label { |
|
flex: 1; |
|
text-align: right; |
|
font-weight: bold; |
|
color: #555; |
|
} |
|
|
|
.price-value-header { |
|
flex: 1; |
|
text-align: center; |
|
font-weight: bold; |
|
color: #333; |
|
} |
|
|
|
.price-value { |
|
flex: 1; |
|
text-align: center; |
|
color: #333; |
|
} |
|
|
|
.date-stats { |
|
display: flex; |
|
justify-content: space-around; |
|
margin-top: 0.5rem; |
|
} |
|
|
|
.date-stat { |
|
text-align: center; |
|
} |
|
|
|
.date-label { |
|
display: block; |
|
font-size: 0.9rem; |
|
color: #555; |
|
} |
|
|
|
.date-value { |
|
display: block; |
|
font-size: 1.2rem; |
|
font-weight: bold; |
|
color: #333; |
|
} |
|
|
|
.diff-block { |
|
background-color: #fff; |
|
border-radius: 8px; |
|
margin-bottom: 1rem; |
|
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); |
|
overflow: hidden; |
|
} |
|
|
|
.diff-header { |
|
padding: 0.5rem 1rem; |
|
border-bottom: 1px solid #eee; |
|
} |
|
|
|
.diff-title { |
|
font-weight: bold; |
|
color: #333; |
|
} |
|
|
|
.diff-content { |
|
padding: 1rem; |
|
white-space: pre-wrap; |
|
direction: rtl; |
|
text-align: right; |
|
} |
|
|
|
.diff-content-container { |
|
display: flex; |
|
flex-direction: column; |
|
} |
|
|
|
.diff-content-old, .diff-content-new { |
|
padding: 1rem; |
|
white-space: pre-wrap; |
|
direction: rtl; |
|
text-align: right; |
|
} |
|
|
|
.diff-content-old { |
|
background-color: rgba(214, 48, 49, 0.05); |
|
border-bottom: 1px solid #eee; |
|
} |
|
|
|
.diff-content-new { |
|
background-color: rgba(9, 132, 227, 0.05); |
|
} |
|
|
|
.diff-subtitle { |
|
font-weight: bold; |
|
margin-bottom: 0.5rem; |
|
color: #555; |
|
} |
|
|
|
.diff-added { |
|
border-right: 4px solid #0984e3; |
|
} |
|
|
|
.diff-removed { |
|
border-right: 4px solid #d63031; |
|
} |
|
|
|
.diff-modified { |
|
border-right: 4px solid #fdcb6e; |
|
} |
|
|
|
@media (min-width: 992px) { |
|
.diff-content-container { |
|
flex-direction: row; |
|
} |
|
|
|
.diff-content-old, .diff-content-new { |
|
flex: 1; |
|
} |
|
|
|
.diff-content-old { |
|
border-bottom: none; |
|
border-left: 1px solid #eee; |
|
} |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
def render_advanced_comparison_tools(self): |
|
"""عرض أدوات المقارنة المتقدمة""" |
|
st.markdown("<h2 class='module-title'>أدوات مقارنة المستندات المتقدمة</h2>", unsafe_allow_html=True) |
|
|
|
st.markdown(""" |
|
<div class="module-description"> |
|
استخدم هذه الأدوات لمقارنة مستندات العقود بشكل متقدم، واكتشاف التغييرات والفروقات بين نسخ المستندات المختلفة، |
|
مع تحليل التغييرات القانونية والمالية والتواريخ. |
|
</div> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
tabs = st.tabs([ |
|
"مقارنة نصية مباشرة", |
|
"مقارنة ملفات PDF", |
|
"عرض تقارير المقارنة السابقة" |
|
]) |
|
|
|
with tabs[0]: |
|
st.markdown("### مقارنة نصية مباشرة") |
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
title1 = st.text_input("عنوان المستند الأول", key="text_title1") |
|
text1 = st.text_area("نص المستند الأول", height=300, key="text_input1") |
|
|
|
with col2: |
|
title2 = st.text_input("عنوان المستند الثاني", key="text_title2") |
|
text2 = st.text_area("نص المستند الثاني", height=300, key="text_input2") |
|
|
|
if st.button("قارن النصوص", key="compare_text_btn"): |
|
if text1 and text2: |
|
self.render_document_comparison( |
|
text1, |
|
text2, |
|
title1 or "المستند الأول", |
|
title2 or "المستند الثاني" |
|
) |
|
else: |
|
st.warning("يرجى إدخال نص المستندين للمقارنة") |
|
|
|
with tabs[1]: |
|
st.markdown("### مقارنة ملفات PDF") |
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
title1_pdf = st.text_input("عنوان المستند الأول", key="pdf_title1") |
|
uploaded_file1 = st.file_uploader("تحميل المستند الأول (PDF)", type=["pdf"], key="pdf_upload1") |
|
|
|
with col2: |
|
title2_pdf = st.text_input("عنوان المستند الثاني", key="pdf_title2") |
|
uploaded_file2 = st.file_uploader("تحميل المستند الثاني (PDF)", type=["pdf"], key="pdf_upload2") |
|
|
|
if st.button("قارن ملفات PDF", key="compare_pdf_btn"): |
|
if uploaded_file1 is not None and uploaded_file2 is not None: |
|
with st.spinner("جاري استخراج النصوص من ملفات PDF..."): |
|
text1_pdf = self._extract_text_from_pdf(uploaded_file1) |
|
text2_pdf = self._extract_text_from_pdf(uploaded_file2) |
|
|
|
if text1_pdf and text2_pdf: |
|
self.render_document_comparison( |
|
text1_pdf, |
|
text2_pdf, |
|
title1_pdf or uploaded_file1.name, |
|
title2_pdf or uploaded_file2.name |
|
) |
|
else: |
|
st.error("تعذر استخراج النص من ملفات PDF. يرجى التأكد من أن الملفات تحتوي على نصوص قابلة للاستخراج.") |
|
else: |
|
st.warning("يرجى تحميل ملفي PDF للمقارنة") |
|
|
|
with tabs[2]: |
|
st.markdown("### تقارير المقارنة السابقة") |
|
|
|
|
|
reports = self.get_comparison_reports() |
|
|
|
if reports: |
|
|
|
report_data = [] |
|
for report in reports: |
|
report_data.append({ |
|
"التاريخ": report["timestamp"], |
|
"المستند الأول": report["title1"], |
|
"المستند الثاني": report["title2"], |
|
"نسبة التشابه": f"{report['similarity']}%", |
|
"الملف": report["filename"] |
|
}) |
|
|
|
report_df = pd.DataFrame(report_data) |
|
st.dataframe(report_df) |
|
|
|
|
|
selected_report = st.selectbox( |
|
"اختر تقريراً لعرضه", |
|
options=[f"{r['title1']} و {r['title2']} ({r['timestamp']})" for r in reports], |
|
format_func=lambda x: x |
|
) |
|
|
|
report_index = next((i for i, r in enumerate(reports) if f"{r['title1']} و {r['title2']} ({r['timestamp']})" == selected_report), None) |
|
|
|
if report_index is not None and st.button("عرض التقرير المحدد"): |
|
selected_filename = reports[report_index]["filename"] |
|
report_data = self.load_comparison_report(selected_filename) |
|
|
|
if report_data: |
|
st.success(f"تم تحميل تقرير المقارنة بنجاح") |
|
|
|
|
|
st.markdown(f"### ملخص تقرير المقارنة") |
|
st.markdown(f"**نسبة التشابه:** {report_data['similarity']}%") |
|
st.markdown(f"**تاريخ المقارنة:** {report_data['timestamp']}") |
|
st.markdown(f"**ملخص التغييرات:** {report_data['summary']}") |
|
|
|
|
|
key_differences = self.extract_key_differences(report_data) |
|
|
|
if key_differences: |
|
st.markdown("### الاختلافات الرئيسية") |
|
|
|
for diff in key_differences: |
|
st.markdown(f"#### {diff['label']} ({diff['count']})") |
|
|
|
if diff["type"] == "added_paragraphs": |
|
for item in diff["items"][:5]: |
|
st.markdown(f"<div class='diff-text diff-new'>{item}</div>", unsafe_allow_html=True) |
|
|
|
elif diff["type"] == "removed_paragraphs": |
|
for item in diff["items"][:5]: |
|
st.markdown(f"<div class='diff-text diff-old'>{item}</div>", unsafe_allow_html=True) |
|
|
|
elif diff["type"] == "modified_paragraphs": |
|
for item in diff["items"][:3]: |
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.markdown(f"**{report_data['title1']}:**") |
|
st.markdown(f"<div class='diff-text diff-old'>{item['doc1_text']}</div>", unsafe_allow_html=True) |
|
with col2: |
|
st.markdown(f"**{report_data['title2']}:**") |
|
st.markdown(f"<div class='diff-text diff-new'>{item['doc2_text']}</div>", unsafe_allow_html=True) |
|
|
|
elif diff["type"] in ["added_words", "removed_words"]: |
|
|
|
word_data = [] |
|
for word, count in diff["items"]: |
|
if len(word) > 1: |
|
word_data.append({"الكلمة": word, "عدد المرات": count}) |
|
|
|
if word_data: |
|
word_df = pd.DataFrame(word_data) |
|
st.dataframe(word_df) |
|
|
|
|
|
legal_changes = self.analyze_legal_changes(report_data) |
|
|
|
if legal_changes: |
|
st.markdown("### تحليل التغييرات القانونية") |
|
|
|
for change in legal_changes[:3]: |
|
st.markdown(f"#### {change['label']} ({change['count']})") |
|
|
|
for item in change["changes"][:2]: |
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.markdown(f"**{report_data['title1']}:**") |
|
st.markdown(f"<div class='diff-text diff-old'>{item['doc1_text']}</div>", unsafe_allow_html=True) |
|
with col2: |
|
st.markdown(f"**{report_data['title2']}:**") |
|
st.markdown(f"<div class='diff-text diff-new'>{item['doc2_text']}</div>", unsafe_allow_html=True) |
|
else: |
|
st.error("تعذر تحميل تقرير المقارنة") |
|
else: |
|
st.info("لا توجد تقارير مقارنة محفوظة") |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
.module-title { |
|
color: #1E88E5; |
|
font-size: 1.8rem; |
|
font-weight: bold; |
|
margin-bottom: 1rem; |
|
text-align: center; |
|
} |
|
|
|
.module-description { |
|
background-color: #f8f9fa; |
|
border-right: 4px solid #1E88E5; |
|
padding: 1rem; |
|
margin-bottom: 1.5rem; |
|
color: #444; |
|
font-size: 1rem; |
|
text-align: right; |
|
} |
|
|
|
.diff-text { |
|
padding: 0.5rem; |
|
border-radius: 4px; |
|
margin-bottom: 0.5rem; |
|
white-space: pre-wrap; |
|
} |
|
|
|
.diff-old { |
|
background-color: rgba(214, 48, 49, 0.1); |
|
border-right: 3px solid #d63031; |
|
} |
|
|
|
.diff-new { |
|
background-color: rgba(9, 132, 227, 0.1); |
|
border-right: 3px solid #0984e3; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
def render(self): |
|
"""عرض واجهة المستخدم الرئيسية للتطبيق""" |
|
self.render_advanced_comparison_tools() |