sohampawar1030 commited on
Commit
3135249
Β·
verified Β·
1 Parent(s): b173c74

Update legal_document_analysis.py

Browse files
Files changed (1) hide show
  1. legal_document_analysis.py +642 -635
legal_document_analysis.py CHANGED
@@ -1,636 +1,643 @@
1
- import os
2
- import PyPDF2
3
- import streamlit as st
4
- from dotenv import load_dotenv
5
- from langchain_groq import ChatGroq
6
- from docx import Document
7
- import matplotlib.pyplot as plt
8
- import io
9
- import base64
10
- from email.mime.multipart import MIMEMultipart
11
- from email.mime.text import MIMEText
12
- from email.mime.application import MIMEApplication
13
- import smtplib
14
- from fpdf import FPDF
15
- import getpass
16
- import pandas as pd
17
- import seaborn as sns
18
-
19
- # Load environment variables from .env file
20
- load_dotenv()
21
-
22
- # Check if the GROQ_API_KEY is in the environment variables
23
- if not os.environ.get("GROQ_API_KEY"):
24
- os.environ["GROQ_API_KEY"] = getpass.getpass("Enter API key for Groq: ")
25
-
26
- # Initialize the model
27
- model = ChatGroq(model="llama-3.1-8b-instant", api_key=os.environ.get("GROQ_API_KEY"))
28
-
29
- # Custom CSS for improved aesthetics
30
- st.markdown(
31
- """
32
- <style>
33
- .main {
34
- background-color: #f0f2f5;
35
- }
36
- .sidebar .sidebar-content {
37
- background-color: #ffffff;
38
- }
39
- h1 {
40
- color: #2C3E50;
41
- }
42
- h2 {
43
- color: #2980B9;
44
- }
45
- .stButton button {
46
- background-color: #2980B9;
47
- color: white;
48
- border: None;
49
- border-radius: 5px;
50
- padding: 10px;
51
- }
52
- </style>
53
- """,
54
- unsafe_allow_html=True
55
- )
56
-
57
- # Function to read PDF content
58
- def read_pdf(file):
59
- reader = PyPDF2.PdfReader(file)
60
- text = ""
61
- for page in reader.pages:
62
- text += page.extract_text()
63
- return text
64
-
65
- # Function to extract text from DOCX files
66
- def extract_text_from_docx(file):
67
- doc = Document(file)
68
- text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
69
- return text
70
-
71
- # Function to preprocess text
72
- def preprocess_text(text):
73
- return " ".join(text.replace("\n", " ").replace("\r", " ").split())
74
-
75
- # Function to chunk large text into smaller parts
76
- def chunk_text(text, max_tokens=2000):
77
- chunks = []
78
- current_chunk = []
79
- current_length = 0
80
-
81
- for sentence in text.split(". "):
82
- sentence_length = len(sentence.split())
83
- if current_length + sentence_length <= max_tokens:
84
- current_chunk.append(sentence)
85
- current_length += sentence_length
86
- else:
87
- chunks.append(". ".join(current_chunk))
88
- current_chunk = [sentence]
89
- current_length = sentence_length
90
-
91
- if current_chunk:
92
- chunks.append(". ".join(current_chunk))
93
-
94
- return chunks
95
-
96
- # Function to generate summary for each chunk
97
- def generate_summary(text):
98
- prompt = f"Please summarize the following content:\n\n{text}"
99
- try:
100
- response = model.invoke(prompt)
101
- if hasattr(response, 'content'):
102
- summary = response.content
103
- else:
104
- summary = str(response)
105
- return summary.strip() if summary else "No summary available."
106
- except Exception as e:
107
- st.error(f"Error generating summary: {str(e)}")
108
- return None
109
-
110
- # Function to summarize large texts
111
- def summarize_large_text(text, chunk_limit=5000):
112
- chunks = chunk_text(text, max_tokens=chunk_limit)
113
- summaries = []
114
- for chunk in chunks:
115
- summary = generate_summary(chunk)
116
- if summary:
117
- summaries.append(summary)
118
- return " ".join(summaries)
119
-
120
- # Function to detect key clauses
121
- def detect_key_clauses(text):
122
- key_clauses = [
123
- {"clause": "confidentiality", "summary": "Confidentiality clauses ensure that sensitive information remains protected."},
124
- {"clause": "liability", "summary": "Liability clauses outline the responsibility for damages or losses incurred."},
125
- {"clause": "termination", "summary": "Termination clauses specify the conditions under which a contract may be ended."},
126
- {"clause": "force majeure", "summary": "Force majeure clauses excuse parties from performance obligations due to unforeseen events."},
127
- {"clause": "governing law", "summary": "Governing law clauses specify which jurisdiction's laws will govern the contract."},
128
- {"clause": "dispute resolution", "summary": "Dispute resolution clauses specify how conflicts between parties will be resolved."},
129
- {"clause": "amendment", "summary": "Amendment clauses outline the process for changing the terms of the contract."},
130
- {"clause": "warranty", "summary": "Warranty clauses provide assurances regarding the quality or condition of goods or services."},
131
- ]
132
-
133
- detected_clauses = []
134
- for clause in key_clauses:
135
- if clause["clause"].lower() in text.lower():
136
- clause_start = text.lower().find(clause["clause"].lower())
137
- context = text[clause_start - 50: clause_start + 200]
138
- explanation = f"The document mentions '{clause['clause']}' clause. Context: {context.strip()}..."
139
- detected_clauses.append({
140
- "clause": clause["clause"].capitalize(),
141
- "summary": clause["summary"],
142
- "explanation": explanation
143
- })
144
-
145
- return detected_clauses
146
-
147
- # Function to detect hidden obligations or dependencies
148
- def detect_hidden_obligations_or_dependencies(text, summary):
149
- hidden_obligations = [
150
- {"phrase": "dependent upon", "summary": "This suggests that some action is conditional upon another."},
151
- {"phrase": "if", "summary": "This indicates that certain conditions must be met to fulfill the obligation."},
152
- {"phrase": "may be required", "summary": "Implies that the party could be obligated to perform an action under specific conditions."},
153
- {"phrase": "should", "summary": "Implies a recommendation or requirement, though not explicitly mandatory."},
154
- {"phrase": "obligated to", "summary": "Indicates a clear, binding duty to perform an action."},
155
- ]
156
-
157
- hidden_dependencies = []
158
-
159
- for item in hidden_obligations:
160
- if item["phrase"].lower() in text.lower() or item["phrase"].lower() in summary.lower():
161
- phrase_start = text.lower().find(item["phrase"].lower())
162
- context = text[phrase_start - 50: phrase_start + 200]
163
- hidden_dependencies.append({
164
- "phrase": item["phrase"],
165
- "summary": item["summary"],
166
- "context": context.strip()
167
- })
168
-
169
- return hidden_dependencies
170
-
171
- # Function to detect risks in the text
172
- def detect_risks(text, summary):
173
- risk_phrases = [
174
- {"phrase": "penalty", "summary": "This indicates financial or legal consequences.", "risk_level": "High"},
175
- {"phrase": "liability", "summary": "This suggests potential financial responsibility.", "risk_level": "Medium"},
176
- {"phrase": "default", "summary": "This can lead to serious legal consequences.", "risk_level": "High"},
177
- {"phrase": "breach", "summary": "This may expose the party to significant penalties.", "risk_level": "High"},
178
- {"phrase": "suspension", "summary": "This indicates risks of halting services.", "risk_level": "Medium"},
179
- {"phrase": "should", "summary": "This implies a recommendation, which may not be mandatory.", "risk_level": "Low"},
180
- {"phrase": "may be required", "summary": "This suggests that obligations could exist under certain conditions.", "risk_level": "Low"},
181
- {"phrase": "indemnify", "summary": "This entails a duty to compensate for harm or loss, indicating potential financial risk.", "risk_level": "High"},
182
- {"phrase": "termination for cause", "summary": "This indicates a risk of ending the contract due to specific failures.", "risk_level": "High"},
183
- {"phrase": "compliance", "summary": "Non-compliance with regulations can lead to legal penalties.", "risk_level": "High"},
184
- ]
185
-
186
- detected_risks = []
187
-
188
- for item in risk_phrases:
189
- if item["phrase"].lower() in text.lower() or item["phrase"].lower() in summary.lower():
190
- phrase_start = text.lower().find(item["phrase"].lower())
191
- context = text[phrase_start - 50: phrase_start + 200]
192
- detected_risks.append({
193
- "phrase": item["phrase"],
194
- "summary": item["summary"],
195
- "context": context.strip(),
196
- "risk_level": item["risk_level"]
197
- })
198
-
199
- return detected_risks
200
-
201
- # Function to calculate overall risk score
202
- def calculate_overall_risk_score(detected_risks):
203
- risk_scores = {
204
- "High": 3,
205
- "Medium": 2,
206
- "Low": 1
207
- }
208
- total_score = sum(risk_scores.get(risk['risk_level'], 0) for risk in detected_risks)
209
- return total_score
210
-
211
- # Function to plot risk assessment matrix
212
- def plot_risk_assessment_matrix(detected_risks):
213
- likelihood = []
214
- impact = []
215
-
216
- for risk in detected_risks:
217
- if risk['risk_level'] == 'High':
218
- likelihood.append(3)
219
- impact.append(3)
220
- elif risk['risk_level'] == 'Medium':
221
- likelihood.append(2)
222
- impact.append(2)
223
- elif risk['risk_level'] == 'Low':
224
- likelihood.append(1)
225
- impact.append(1)
226
-
227
- fig, ax = plt.subplots(figsize=(6, 6))
228
- scatter = ax.scatter(likelihood, impact, alpha=0.6)
229
-
230
- ax.set_xticks([1, 2, 3])
231
- ax.set_yticks([1, 2, 3])
232
- ax.set_xticklabels(['Low', 'Medium', 'High'])
233
- ax.set_yticklabels(['Low', 'Medium', 'High'])
234
- ax.set_xlabel('Likelihood')
235
- ax.set_ylabel('Impact')
236
- ax.set_title('Risk Assessment Matrix')
237
-
238
- for i in range(len(detected_risks)):
239
- ax.annotate(detected_risks[i]['phrase'], (likelihood[i], impact[i]))
240
-
241
- buf = io.BytesIO()
242
- plt.savefig(buf, format="png", bbox_inches='tight')
243
- buf.seek(0)
244
-
245
- img_str = base64.b64encode(buf.read()).decode('utf-8')
246
- buf.close()
247
-
248
- return img_str
249
-
250
- # Function to plot risk level distribution pie chart
251
- def plot_risk_level_distribution(detected_risks):
252
- risk_levels = [risk['risk_level'] for risk in detected_risks]
253
- level_counts = {level: risk_levels.count(level) for level in set(risk_levels)}
254
-
255
- fig, ax = plt.subplots(figsize=(4, 3))
256
- ax.pie(level_counts.values(), labels=level_counts.keys(), autopct='%1.1f%%', startangle=90)
257
- ax.axis('equal')
258
-
259
- plt.title("Risk Level Distribution", fontsize=10)
260
-
261
- buf = io.BytesIO()
262
- plt.savefig(buf, format="png", bbox_inches='tight')
263
- buf.seek(0)
264
-
265
- img_str = base64.b64encode(buf.read()).decode('utf-8')
266
- buf.close()
267
-
268
- return img_str
269
-
270
- # Function to plot risks by type bar chart
271
- def plot_risks_by_type(detected_risks):
272
- risk_phrases = [risk['phrase'] for risk in detected_risks]
273
- phrase_counts = {phrase: risk_phrases.count(phrase) for phrase in set(risk_phrases)}
274
-
275
- fig, ax = plt.subplots(figsize=(4, 3))
276
- ax.bar(phrase_counts.keys(), phrase_counts.values(), color='lightcoral')
277
- plt.xticks(rotation=45, ha='right')
278
- ax.set_title("Risks by Type", fontsize=10)
279
- ax.set_ylabel("Count")
280
-
281
- buf = io.BytesIO()
282
- plt.savefig(buf, format="png", bbox_inches='tight')
283
- buf.seek(0)
284
-
285
- img_str = base64.b64encode(buf.read()).decode('utf-8')
286
- buf.close()
287
-
288
- return img_str
289
-
290
- # Function to plot stacked bar chart of risks by level
291
- def plot_stacked_bar_chart(detected_risks):
292
- risk_levels = ['High', 'Medium', 'Low']
293
- level_counts = {level: 0 for level in risk_levels}
294
-
295
- for risk in detected_risks:
296
- level_counts[risk['risk_level']] += 1
297
-
298
- fig, ax = plt.subplots(figsize=(4, 3))
299
- ax.bar(level_counts.keys(), level_counts.values(), color=['#ff9999', '#66b3ff', '#99ff99'])
300
- ax.set_title("Stacked Bar Chart of Risks by Level", fontsize=10)
301
- ax.set_ylabel("Count")
302
-
303
- buf = io.BytesIO()
304
- plt.savefig(buf, format="png", bbox_inches='tight')
305
- buf.seek(0)
306
-
307
- img_str = base64.b64encode(buf.read()).decode('utf-8')
308
- buf.close()
309
-
310
- return img_str
311
-
312
- # Function to plot risk heatmap
313
- def plot_risk_heatmap(detected_risks):
314
- risk_data = {'Risk Level': [], 'Count': []}
315
-
316
- for risk in detected_risks:
317
- risk_data['Risk Level'].append(risk['risk_level'])
318
- risk_data['Count'].append(1)
319
-
320
- df = pd.DataFrame(risk_data)
321
- heatmap_data = df.groupby('Risk Level').count().reset_index()
322
-
323
- fig, ax = plt.subplots(figsize=(4, 3))
324
- sns.heatmap(heatmap_data.pivot_table(index='Risk Level', values='Count'), annot=True, cmap='YlGnBu', ax=ax)
325
- ax.set_title("Risk Heatmap")
326
-
327
- buf = io.BytesIO()
328
- plt.savefig(buf, format="png", bbox_inches='tight')
329
- buf.seek(0)
330
-
331
- img_str = base64.b64encode(buf.read()).decode('utf-8')
332
- buf.close()
333
-
334
- return img_str
335
-
336
- # Function to convert base64 to image
337
- def base64_to_image(data):
338
- return io.BytesIO(base64.b64decode(data))
339
-
340
- # Function to generate PDF document with improved aesthetics
341
- def generate_pdf_analysis(document_text, summary, detected_clauses, hidden_obligations, detected_risks, risk_assessment_matrix, risk_level_distribution, risks_by_type, stacked_bar_chart, risk_heatmap):
342
- pdf = FPDF()
343
- pdf.add_page()
344
-
345
- # Set page borders
346
- pdf.set_draw_color(0, 0, 0)
347
- pdf.rect(5, 5, 200, 287)
348
-
349
- # Add Arial font
350
- pdf.add_font("Arial", "", "arial.ttf", uni=True)
351
- pdf.set_font("Arial", size=12)
352
-
353
- # Title
354
- pdf.set_font("Arial", 'B', 16)
355
- pdf.cell(0, 10, 'Legal Document Analysis Report', ln=True, align='C')
356
- pdf.ln(10)
357
-
358
- # Executive Summary
359
- pdf.set_font("Arial", 'B', 14)
360
- pdf.cell(0, 10, 'Executive Summary', ln=True)
361
- pdf.set_font("Arial", '', 12)
362
- pdf.multi_cell(0, 10, summary)
363
- pdf.ln(10)
364
-
365
- # Risks Section
366
- pdf.set_font("Arial", 'B', 14)
367
- pdf.cell(0, 10, 'Risk Analysis', ln=True)
368
- pdf.set_font("Arial", '', 12)
369
- for risk in detected_risks:
370
- pdf.cell(0, 10, f"{risk['phrase']}: {risk['summary']} (Risk Level: {risk['risk_level']})", ln=True)
371
- pdf.ln(10)
372
-
373
- # Add visualizations for risks
374
- pdf.image(base64_to_image(risk_assessment_matrix), x=10, y=pdf.get_y(), w=90)
375
- pdf.image(base64_to_image(risk_level_distribution), x=110, y=pdf.get_y()-50, w=90) # Position next to the first image
376
- pdf.ln(60)
377
-
378
- pdf.image(base64_to_image(risks_by_type), x=10, y=pdf.get_y(), w=90)
379
- pdf.image(base64_to_image(stacked_bar_chart), x=110, y=pdf.get_y()-50, w=90) # Position next to the previous image
380
- pdf.ln(60)
381
-
382
- pdf.image(base64_to_image(risk_heatmap), x=10, y=pdf.get_y(), w=190) # Fit image to width
383
- pdf.ln(10)
384
-
385
- # Footer
386
- pdf.set_y(-15)
387
- pdf.set_font("Arial", 'I', 8)
388
- pdf.cell(0, 10, f'Page {pdf.page_no()}', 0, 0, 'C')
389
-
390
- return pdf
391
-
392
- # Function to handle chatbot interaction
393
- def chatbot_query(user_input):
394
- try:
395
- response = model({"text": user_input})
396
- if isinstance(response, dict) and 'text' in response:
397
- return response['text']
398
- else:
399
- return "Error: Unexpected response format."
400
- except Exception as e:
401
- return f"Error: {str(e)}"
402
-
403
- # Function to generate suggestions for improvement
404
- def generate_suggestions(text):
405
- suggestions = []
406
-
407
- if "shall" in text.lower():
408
- suggestions.append("Consider replacing 'shall' with 'must' for clarity.")
409
- if "may" in text.lower():
410
- suggestions.append("Clarify the conditions under which actions 'may' be taken.")
411
- if "if" in text.lower() and "then" not in text.lower():
412
- suggestions.append("Ensure conditional statements are clear and complete.")
413
- if "not" in text.lower():
414
- suggestions.append("Review negative clauses to ensure they are not overly restrictive.")
415
-
416
- return suggestions
417
-
418
- # Function to send feedback via email
419
- def send_feedback(feedback_content):
420
- sender_email = os.getenv("SENDER_EMAIL")
421
- receiver_email = os.getenv("FEEDBACK_EMAIL")
422
- password = os.getenv("EMAIL_PASS")
423
-
424
- msg = MIMEMultipart()
425
- msg['From'] = sender_email
426
- msg['To'] = receiver_email
427
- msg['Subject'] = "User Feedback on Legal Document Analysis"
428
-
429
- msg.attach(MIMEText(feedback_content, 'plain'))
430
-
431
- try:
432
- with smtplib.SMTP('smtp.gmail.com', 587) as server:
433
- server.starttls()
434
- server.login(sender_email, password)
435
- server.send_message(msg)
436
- return True
437
- except Exception as e:
438
- return False
439
-
440
- # Function to send PDF via email
441
- def send_pdf_via_email(pdf_buffer, recipient_email):
442
- sender_email = os.getenv("SENDER_EMAIL")
443
- password = os.getenv("EMAIL_PASS")
444
-
445
- msg = MIMEMultipart()
446
- msg['From'] = sender_email
447
- msg['To'] = recipient_email
448
- msg['Subject'] = "Legal Document Analysis PDF"
449
-
450
- msg.attach(MIMEText("Please find the attached analysis of your legal document.", 'plain'))
451
-
452
- # Attach the PDF
453
- pdf_attachment = io.BytesIO(pdf_buffer.getvalue())
454
- pdf_attachment.seek(0)
455
- part = MIMEApplication(pdf_attachment.read(), Name='legal_document_analysis.pdf')
456
- part['Content-Disposition'] = 'attachment; filename="legal_document_analysis.pdf"'
457
- msg.attach(part)
458
-
459
- try:
460
- with smtplib.SMTP('smtp.gmail.com', 587) as server:
461
- server.starttls()
462
- server.login(sender_email, password)
463
- server.send_message(msg)
464
- return True
465
- except Exception as e:
466
- return False
467
-
468
- # Function to simulate tracking updates in the document
469
- def track_updates(document_text):
470
- updates = [
471
- {"update": "Updated confidentiality clause.", "suggestion": "Consider specifying the duration of confidentiality."},
472
- {"update": "Revised liability limits.", "suggestion": "Ensure the limits are realistic and compliant with regulations."},
473
- {"update": "Clarified termination conditions.", "suggestion": "Check if all potential termination scenarios are covered."},
474
- ]
475
- return updates
476
-
477
- # Function to get suggestion from Groq API based on the update
478
- def get_update_suggestion(update):
479
- prompt = f"Suggest improvements or updates for this legal clause: {update}"
480
- suggestion = generate_summary(prompt)
481
- return suggestion if suggestion else "No suggestion available."
482
-
483
- # Function to display feedback form
484
- def display_feedback_form():
485
- st.subheader("Feedback Form")
486
- feedback = st.text_area("Please provide your feedback or suggestions:")
487
-
488
- question1 = st.radio("How would you rate the analysis?", ("Excellent", "Good", "Fair", "Poor"))
489
- question2 = st.radio("Would you recommend this tool to others?", ("Yes", "No"))
490
-
491
- if st.button("Submit Feedback"):
492
- feedback_content = f"Feedback: {feedback}\nRating: {question1}\nRecommendation: {question2}"
493
- if send_feedback(feedback_content):
494
- st.success("Thank you for your feedback! It has been sent.")
495
- else:
496
- st.error("Failed to send feedback. Please try again later.")
497
-
498
- # Main function to display the legal analysis page
499
- def display_legal_analysis_page():
500
- st.title("πŸ“œ Legal Document Analysis with Groq API")
501
-
502
- uploaded_file = st.file_uploader("Upload your legal document (PDF or DOCX)", type=["pdf", "docx"])
503
- if uploaded_file:
504
- if uploaded_file.name.endswith(".pdf"):
505
- document_text = preprocess_text(read_pdf(uploaded_file))
506
- elif uploaded_file.name.endswith(".docx"):
507
- document_text = preprocess_text(extract_text_from_docx(uploaded_file))
508
- else:
509
- st.error("Unsupported file type!")
510
- return
511
-
512
- tabs = st.tabs(["πŸ“„ Document Text", "πŸ” Summary", "πŸ”‘ Key Clauses", "πŸ”’ Hidden Obligations", "⚠ Risk Analysis", "πŸ’‘ Suggestions & Chatbot", "πŸ”„ Update Tracker"])
513
-
514
- with tabs[0]:
515
- st.subheader("Document Text")
516
- st.write(document_text)
517
-
518
- with tabs[1]:
519
- st.subheader("Summary")
520
- summary = summarize_large_text(document_text)
521
- st.write(summary)
522
-
523
- with tabs[2]:
524
- st.subheader("Key Clauses Identified")
525
- detected_clauses = detect_key_clauses(document_text)
526
- if detected_clauses:
527
- for clause in detected_clauses:
528
- with st.expander(clause['clause'], expanded=False):
529
- st.write(f"*Summary:* {clause['summary']}")
530
- st.write(f"*Context:* {clause['explanation']}")
531
-
532
- else:
533
- st.write("No key clauses detected.")
534
-
535
- with tabs[3]:
536
- st.subheader("Hidden Obligations and Dependencies")
537
- hidden_obligations = detect_hidden_obligations_or_dependencies(document_text, summary)
538
- if hidden_obligations:
539
- for obligation in hidden_obligations:
540
- st.write(f"{obligation['phrase']}: {obligation['summary']}")
541
- st.write(obligation['context'])
542
- else:
543
- st.write("No hidden obligations detected.")
544
-
545
- with tabs[4]:
546
- st.subheader("Risk Analysis")
547
- detected_risks = detect_risks(document_text, summary)
548
- overall_risk_score = calculate_overall_risk_score(detected_risks)
549
-
550
- st.write(f"*Overall Risk Score:* {overall_risk_score}")
551
-
552
- if detected_risks:
553
- for risk in detected_risks:
554
- with st.expander(risk['phrase'], expanded=False):
555
- st.write(f"*Summary:* {risk['summary']} (Risk Level: {risk['risk_level']})")
556
- short_context = risk['context'].strip().split('. ')[0] + '.'
557
- st.write(f"*Context:* {short_context}")
558
- else:
559
- st.write("No risks detected.")
560
-
561
- # Generate all visualizations
562
- risk_assessment_matrix = plot_risk_assessment_matrix(detected_risks)
563
- risk_level_distribution = plot_risk_level_distribution(detected_risks)
564
- risks_by_type = plot_risks_by_type(detected_risks)
565
- stacked_bar_chart = plot_stacked_bar_chart(detected_risks)
566
- risk_heatmap = plot_risk_heatmap(detected_risks)
567
-
568
- # Display the charts
569
- st.image(f"data:image/png;base64,{risk_assessment_matrix}", caption="Risk Assessment Matrix")
570
- st.image(f"data:image/png;base64,{risk_level_distribution}", caption="Risk Level Distribution")
571
- st.image(f"data:image/png;base64,{risks_by_type}", caption="Risks by Type")
572
- st.image(f"data:image/png;base64,{stacked_bar_chart}", caption="Stacked Bar Chart of Risks by Level")
573
- st.image(f"data:image/png;base64,{risk_heatmap}", caption="Risk Heatmap")
574
-
575
- with tabs[5]:
576
- st.subheader("Suggestions for Improvement")
577
- suggestions = generate_suggestions(document_text)
578
- for suggestion in suggestions:
579
- st.write(f"- {suggestion}")
580
-
581
- st.subheader("Chatbot for Analysis")
582
- user_input = st.text_input("Ask the chatbot about your document:")
583
- if st.button("Send"):
584
- if user_input:
585
- chatbot_response = chatbot_query(user_input)
586
- st.write("*Chatbot Response:*")
587
- st.write(chatbot_response)
588
- else:
589
- st.warning("Please enter a question.")
590
-
591
- # Download PDF Analysis Button
592
- st.subheader("Download Analysis as PDF")
593
- pdf_buffer = io.BytesIO()
594
- pdf = generate_pdf_analysis(document_text, summary, detected_clauses, hidden_obligations, detected_risks, risk_assessment_matrix, risk_level_distribution, risks_by_type, stacked_bar_chart, risk_heatmap)
595
- pdf.output(pdf_buffer, 'F')
596
- pdf_buffer.seek(0)
597
-
598
- # Add download button for PDF
599
- st.download_button(
600
- label="Download PDF Analysis",
601
- data=pdf_buffer,
602
- file_name="legal_document_analysis.pdf",
603
- mime="application/pdf"
604
- )
605
-
606
- # Input for recipient email
607
- recipient_email = st.text_input("Enter your email address to receive the PDF:")
608
-
609
- # Button to send PDF via email
610
- if st.button("Send PDF Analysis"):
611
- if recipient_email:
612
- if send_pdf_via_email(pdf_buffer, recipient_email):
613
- st.success("PDF has been sent successfully!")
614
- else:
615
- st.error("Failed to send PDF. Please try again.")
616
- else:
617
- st.warning("Please enter a valid email address.")
618
-
619
- # Feedback Form Section
620
- display_feedback_form()
621
-
622
- with tabs[6]: # Update Tracker Tab
623
- st.subheader("Document Updates")
624
- updates = track_updates(document_text)
625
- if st.button("Show Updates"):
626
- if updates:
627
- for update in updates:
628
- with st.expander(update['update'], expanded=False):
629
- suggestion = get_update_suggestion(update['update'])
630
- st.write(f"*Suggestion:* {suggestion}")
631
- else:
632
- st.write("No updates detected.")
633
-
634
- # Run the application
635
- if __name__ == "__main__":
 
 
 
 
 
 
 
636
  display_legal_analysis_page()
 
1
+ import os
2
+ import PyPDF2
3
+ import streamlit as st
4
+ from dotenv import load_dotenv
5
+ from langchain_groq import ChatGroq
6
+ from docx import Document
7
+ import matplotlib.pyplot as plt
8
+ import io
9
+ import base64
10
+ from email.mime.multipart import MIMEMultipart
11
+ from email.mime.text import MIMEText
12
+ from email.mime.application import MIMEApplication
13
+ import smtplib
14
+ from fpdf import FPDF
15
+ import getpass
16
+ import pandas as pd
17
+ import seaborn as sns
18
+
19
+ from langchain_core.globals import set_verbose
20
+ set_verbose(False)
21
+
22
+ model = ChatGroq(
23
+ model="llama-3.1-8b-instant",
24
+ api_key=os.environ.get("GROQ_API_KEY"),
25
+ verbose=False
26
+ # Load environment variables from .env file
27
+ load_dotenv()
28
+
29
+ # Check if the GROQ_API_KEY is in the environment variables
30
+ if not os.environ.get("GROQ_API_KEY"):
31
+ os.environ["GROQ_API_KEY"] = getpass.getpass("Enter API key for Groq: ")
32
+
33
+ # Initialize the model
34
+ model = ChatGroq(model="llama-3.1-8b-instant", api_key=os.environ.get("GROQ_API_KEY"))
35
+
36
+ # Custom CSS for improved aesthetics
37
+ st.markdown(
38
+ """
39
+ <style>
40
+ .main {
41
+ background-color: #f0f2f5;
42
+ }
43
+ .sidebar .sidebar-content {
44
+ background-color: #ffffff;
45
+ }
46
+ h1 {
47
+ color: #2C3E50;
48
+ }
49
+ h2 {
50
+ color: #2980B9;
51
+ }
52
+ .stButton button {
53
+ background-color: #2980B9;
54
+ color: white;
55
+ border: None;
56
+ border-radius: 5px;
57
+ padding: 10px;
58
+ }
59
+ </style>
60
+ """,
61
+ unsafe_allow_html=True
62
+ )
63
+
64
+ # Function to read PDF content
65
+ def read_pdf(file):
66
+ reader = PyPDF2.PdfReader(file)
67
+ text = ""
68
+ for page in reader.pages:
69
+ text += page.extract_text()
70
+ return text
71
+
72
+ # Function to extract text from DOCX files
73
+ def extract_text_from_docx(file):
74
+ doc = Document(file)
75
+ text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
76
+ return text
77
+
78
+ # Function to preprocess text
79
+ def preprocess_text(text):
80
+ return " ".join(text.replace("\n", " ").replace("\r", " ").split())
81
+
82
+ # Function to chunk large text into smaller parts
83
+ def chunk_text(text, max_tokens=2000):
84
+ chunks = []
85
+ current_chunk = []
86
+ current_length = 0
87
+
88
+ for sentence in text.split(". "):
89
+ sentence_length = len(sentence.split())
90
+ if current_length + sentence_length <= max_tokens:
91
+ current_chunk.append(sentence)
92
+ current_length += sentence_length
93
+ else:
94
+ chunks.append(". ".join(current_chunk))
95
+ current_chunk = [sentence]
96
+ current_length = sentence_length
97
+
98
+ if current_chunk:
99
+ chunks.append(". ".join(current_chunk))
100
+
101
+ return chunks
102
+
103
+ # Function to generate summary for each chunk
104
+ def generate_summary(text):
105
+ prompt = f"Please summarize the following content:\n\n{text}"
106
+ try:
107
+ response = model.invoke(prompt)
108
+ if hasattr(response, 'content'):
109
+ summary = response.content
110
+ else:
111
+ summary = str(response)
112
+ return summary.strip() if summary else "No summary available."
113
+ except Exception as e:
114
+ st.error(f"Error generating summary: {str(e)}")
115
+ return None
116
+
117
+ # Function to summarize large texts
118
+ def summarize_large_text(text, chunk_limit=5000):
119
+ chunks = chunk_text(text, max_tokens=chunk_limit)
120
+ summaries = []
121
+ for chunk in chunks:
122
+ summary = generate_summary(chunk)
123
+ if summary:
124
+ summaries.append(summary)
125
+ return " ".join(summaries)
126
+
127
+ # Function to detect key clauses
128
+ def detect_key_clauses(text):
129
+ key_clauses = [
130
+ {"clause": "confidentiality", "summary": "Confidentiality clauses ensure that sensitive information remains protected."},
131
+ {"clause": "liability", "summary": "Liability clauses outline the responsibility for damages or losses incurred."},
132
+ {"clause": "termination", "summary": "Termination clauses specify the conditions under which a contract may be ended."},
133
+ {"clause": "force majeure", "summary": "Force majeure clauses excuse parties from performance obligations due to unforeseen events."},
134
+ {"clause": "governing law", "summary": "Governing law clauses specify which jurisdiction's laws will govern the contract."},
135
+ {"clause": "dispute resolution", "summary": "Dispute resolution clauses specify how conflicts between parties will be resolved."},
136
+ {"clause": "amendment", "summary": "Amendment clauses outline the process for changing the terms of the contract."},
137
+ {"clause": "warranty", "summary": "Warranty clauses provide assurances regarding the quality or condition of goods or services."},
138
+ ]
139
+
140
+ detected_clauses = []
141
+ for clause in key_clauses:
142
+ if clause["clause"].lower() in text.lower():
143
+ clause_start = text.lower().find(clause["clause"].lower())
144
+ context = text[clause_start - 50: clause_start + 200]
145
+ explanation = f"The document mentions '{clause['clause']}' clause. Context: {context.strip()}..."
146
+ detected_clauses.append({
147
+ "clause": clause["clause"].capitalize(),
148
+ "summary": clause["summary"],
149
+ "explanation": explanation
150
+ })
151
+
152
+ return detected_clauses
153
+
154
+ # Function to detect hidden obligations or dependencies
155
+ def detect_hidden_obligations_or_dependencies(text, summary):
156
+ hidden_obligations = [
157
+ {"phrase": "dependent upon", "summary": "This suggests that some action is conditional upon another."},
158
+ {"phrase": "if", "summary": "This indicates that certain conditions must be met to fulfill the obligation."},
159
+ {"phrase": "may be required", "summary": "Implies that the party could be obligated to perform an action under specific conditions."},
160
+ {"phrase": "should", "summary": "Implies a recommendation or requirement, though not explicitly mandatory."},
161
+ {"phrase": "obligated to", "summary": "Indicates a clear, binding duty to perform an action."},
162
+ ]
163
+
164
+ hidden_dependencies = []
165
+
166
+ for item in hidden_obligations:
167
+ if item["phrase"].lower() in text.lower() or item["phrase"].lower() in summary.lower():
168
+ phrase_start = text.lower().find(item["phrase"].lower())
169
+ context = text[phrase_start - 50: phrase_start + 200]
170
+ hidden_dependencies.append({
171
+ "phrase": item["phrase"],
172
+ "summary": item["summary"],
173
+ "context": context.strip()
174
+ })
175
+
176
+ return hidden_dependencies
177
+
178
+ # Function to detect risks in the text
179
+ def detect_risks(text, summary):
180
+ risk_phrases = [
181
+ {"phrase": "penalty", "summary": "This indicates financial or legal consequences.", "risk_level": "High"},
182
+ {"phrase": "liability", "summary": "This suggests potential financial responsibility.", "risk_level": "Medium"},
183
+ {"phrase": "default", "summary": "This can lead to serious legal consequences.", "risk_level": "High"},
184
+ {"phrase": "breach", "summary": "This may expose the party to significant penalties.", "risk_level": "High"},
185
+ {"phrase": "suspension", "summary": "This indicates risks of halting services.", "risk_level": "Medium"},
186
+ {"phrase": "should", "summary": "This implies a recommendation, which may not be mandatory.", "risk_level": "Low"},
187
+ {"phrase": "may be required", "summary": "This suggests that obligations could exist under certain conditions.", "risk_level": "Low"},
188
+ {"phrase": "indemnify", "summary": "This entails a duty to compensate for harm or loss, indicating potential financial risk.", "risk_level": "High"},
189
+ {"phrase": "termination for cause", "summary": "This indicates a risk of ending the contract due to specific failures.", "risk_level": "High"},
190
+ {"phrase": "compliance", "summary": "Non-compliance with regulations can lead to legal penalties.", "risk_level": "High"},
191
+ ]
192
+
193
+ detected_risks = []
194
+
195
+ for item in risk_phrases:
196
+ if item["phrase"].lower() in text.lower() or item["phrase"].lower() in summary.lower():
197
+ phrase_start = text.lower().find(item["phrase"].lower())
198
+ context = text[phrase_start - 50: phrase_start + 200]
199
+ detected_risks.append({
200
+ "phrase": item["phrase"],
201
+ "summary": item["summary"],
202
+ "context": context.strip(),
203
+ "risk_level": item["risk_level"]
204
+ })
205
+
206
+ return detected_risks
207
+
208
+ # Function to calculate overall risk score
209
+ def calculate_overall_risk_score(detected_risks):
210
+ risk_scores = {
211
+ "High": 3,
212
+ "Medium": 2,
213
+ "Low": 1
214
+ }
215
+ total_score = sum(risk_scores.get(risk['risk_level'], 0) for risk in detected_risks)
216
+ return total_score
217
+
218
+ # Function to plot risk assessment matrix
219
+ def plot_risk_assessment_matrix(detected_risks):
220
+ likelihood = []
221
+ impact = []
222
+
223
+ for risk in detected_risks:
224
+ if risk['risk_level'] == 'High':
225
+ likelihood.append(3)
226
+ impact.append(3)
227
+ elif risk['risk_level'] == 'Medium':
228
+ likelihood.append(2)
229
+ impact.append(2)
230
+ elif risk['risk_level'] == 'Low':
231
+ likelihood.append(1)
232
+ impact.append(1)
233
+
234
+ fig, ax = plt.subplots(figsize=(6, 6))
235
+ scatter = ax.scatter(likelihood, impact, alpha=0.6)
236
+
237
+ ax.set_xticks([1, 2, 3])
238
+ ax.set_yticks([1, 2, 3])
239
+ ax.set_xticklabels(['Low', 'Medium', 'High'])
240
+ ax.set_yticklabels(['Low', 'Medium', 'High'])
241
+ ax.set_xlabel('Likelihood')
242
+ ax.set_ylabel('Impact')
243
+ ax.set_title('Risk Assessment Matrix')
244
+
245
+ for i in range(len(detected_risks)):
246
+ ax.annotate(detected_risks[i]['phrase'], (likelihood[i], impact[i]))
247
+
248
+ buf = io.BytesIO()
249
+ plt.savefig(buf, format="png", bbox_inches='tight')
250
+ buf.seek(0)
251
+
252
+ img_str = base64.b64encode(buf.read()).decode('utf-8')
253
+ buf.close()
254
+
255
+ return img_str
256
+
257
+ # Function to plot risk level distribution pie chart
258
+ def plot_risk_level_distribution(detected_risks):
259
+ risk_levels = [risk['risk_level'] for risk in detected_risks]
260
+ level_counts = {level: risk_levels.count(level) for level in set(risk_levels)}
261
+
262
+ fig, ax = plt.subplots(figsize=(4, 3))
263
+ ax.pie(level_counts.values(), labels=level_counts.keys(), autopct='%1.1f%%', startangle=90)
264
+ ax.axis('equal')
265
+
266
+ plt.title("Risk Level Distribution", fontsize=10)
267
+
268
+ buf = io.BytesIO()
269
+ plt.savefig(buf, format="png", bbox_inches='tight')
270
+ buf.seek(0)
271
+
272
+ img_str = base64.b64encode(buf.read()).decode('utf-8')
273
+ buf.close()
274
+
275
+ return img_str
276
+
277
+ # Function to plot risks by type bar chart
278
+ def plot_risks_by_type(detected_risks):
279
+ risk_phrases = [risk['phrase'] for risk in detected_risks]
280
+ phrase_counts = {phrase: risk_phrases.count(phrase) for phrase in set(risk_phrases)}
281
+
282
+ fig, ax = plt.subplots(figsize=(4, 3))
283
+ ax.bar(phrase_counts.keys(), phrase_counts.values(), color='lightcoral')
284
+ plt.xticks(rotation=45, ha='right')
285
+ ax.set_title("Risks by Type", fontsize=10)
286
+ ax.set_ylabel("Count")
287
+
288
+ buf = io.BytesIO()
289
+ plt.savefig(buf, format="png", bbox_inches='tight')
290
+ buf.seek(0)
291
+
292
+ img_str = base64.b64encode(buf.read()).decode('utf-8')
293
+ buf.close()
294
+
295
+ return img_str
296
+
297
+ # Function to plot stacked bar chart of risks by level
298
+ def plot_stacked_bar_chart(detected_risks):
299
+ risk_levels = ['High', 'Medium', 'Low']
300
+ level_counts = {level: 0 for level in risk_levels}
301
+
302
+ for risk in detected_risks:
303
+ level_counts[risk['risk_level']] += 1
304
+
305
+ fig, ax = plt.subplots(figsize=(4, 3))
306
+ ax.bar(level_counts.keys(), level_counts.values(), color=['#ff9999', '#66b3ff', '#99ff99'])
307
+ ax.set_title("Stacked Bar Chart of Risks by Level", fontsize=10)
308
+ ax.set_ylabel("Count")
309
+
310
+ buf = io.BytesIO()
311
+ plt.savefig(buf, format="png", bbox_inches='tight')
312
+ buf.seek(0)
313
+
314
+ img_str = base64.b64encode(buf.read()).decode('utf-8')
315
+ buf.close()
316
+
317
+ return img_str
318
+
319
+ # Function to plot risk heatmap
320
+ def plot_risk_heatmap(detected_risks):
321
+ risk_data = {'Risk Level': [], 'Count': []}
322
+
323
+ for risk in detected_risks:
324
+ risk_data['Risk Level'].append(risk['risk_level'])
325
+ risk_data['Count'].append(1)
326
+
327
+ df = pd.DataFrame(risk_data)
328
+ heatmap_data = df.groupby('Risk Level').count().reset_index()
329
+
330
+ fig, ax = plt.subplots(figsize=(4, 3))
331
+ sns.heatmap(heatmap_data.pivot_table(index='Risk Level', values='Count'), annot=True, cmap='YlGnBu', ax=ax)
332
+ ax.set_title("Risk Heatmap")
333
+
334
+ buf = io.BytesIO()
335
+ plt.savefig(buf, format="png", bbox_inches='tight')
336
+ buf.seek(0)
337
+
338
+ img_str = base64.b64encode(buf.read()).decode('utf-8')
339
+ buf.close()
340
+
341
+ return img_str
342
+
343
+ # Function to convert base64 to image
344
+ def base64_to_image(data):
345
+ return io.BytesIO(base64.b64decode(data))
346
+
347
+ # Function to generate PDF document with improved aesthetics
348
+ def generate_pdf_analysis(document_text, summary, detected_clauses, hidden_obligations, detected_risks, risk_assessment_matrix, risk_level_distribution, risks_by_type, stacked_bar_chart, risk_heatmap):
349
+ pdf = FPDF()
350
+ pdf.add_page()
351
+
352
+ # Set page borders
353
+ pdf.set_draw_color(0, 0, 0)
354
+ pdf.rect(5, 5, 200, 287)
355
+
356
+ # Add Arial font
357
+ pdf.add_font("Arial", "", "arial.ttf", uni=True)
358
+ pdf.set_font("Arial", size=12)
359
+
360
+ # Title
361
+ pdf.set_font("Arial", 'B', 16)
362
+ pdf.cell(0, 10, 'Legal Document Analysis Report', ln=True, align='C')
363
+ pdf.ln(10)
364
+
365
+ # Executive Summary
366
+ pdf.set_font("Arial", 'B', 14)
367
+ pdf.cell(0, 10, 'Executive Summary', ln=True)
368
+ pdf.set_font("Arial", '', 12)
369
+ pdf.multi_cell(0, 10, summary)
370
+ pdf.ln(10)
371
+
372
+ # Risks Section
373
+ pdf.set_font("Arial", 'B', 14)
374
+ pdf.cell(0, 10, 'Risk Analysis', ln=True)
375
+ pdf.set_font("Arial", '', 12)
376
+ for risk in detected_risks:
377
+ pdf.cell(0, 10, f"{risk['phrase']}: {risk['summary']} (Risk Level: {risk['risk_level']})", ln=True)
378
+ pdf.ln(10)
379
+
380
+ # Add visualizations for risks
381
+ pdf.image(base64_to_image(risk_assessment_matrix), x=10, y=pdf.get_y(), w=90)
382
+ pdf.image(base64_to_image(risk_level_distribution), x=110, y=pdf.get_y()-50, w=90) # Position next to the first image
383
+ pdf.ln(60)
384
+
385
+ pdf.image(base64_to_image(risks_by_type), x=10, y=pdf.get_y(), w=90)
386
+ pdf.image(base64_to_image(stacked_bar_chart), x=110, y=pdf.get_y()-50, w=90) # Position next to the previous image
387
+ pdf.ln(60)
388
+
389
+ pdf.image(base64_to_image(risk_heatmap), x=10, y=pdf.get_y(), w=190) # Fit image to width
390
+ pdf.ln(10)
391
+
392
+ # Footer
393
+ pdf.set_y(-15)
394
+ pdf.set_font("Arial", 'I', 8)
395
+ pdf.cell(0, 10, f'Page {pdf.page_no()}', 0, 0, 'C')
396
+
397
+ return pdf
398
+
399
+ # Function to handle chatbot interaction
400
+ def chatbot_query(user_input):
401
+ try:
402
+ response = model({"text": user_input})
403
+ if isinstance(response, dict) and 'text' in response:
404
+ return response['text']
405
+ else:
406
+ return "Error: Unexpected response format."
407
+ except Exception as e:
408
+ return f"Error: {str(e)}"
409
+
410
+ # Function to generate suggestions for improvement
411
+ def generate_suggestions(text):
412
+ suggestions = []
413
+
414
+ if "shall" in text.lower():
415
+ suggestions.append("Consider replacing 'shall' with 'must' for clarity.")
416
+ if "may" in text.lower():
417
+ suggestions.append("Clarify the conditions under which actions 'may' be taken.")
418
+ if "if" in text.lower() and "then" not in text.lower():
419
+ suggestions.append("Ensure conditional statements are clear and complete.")
420
+ if "not" in text.lower():
421
+ suggestions.append("Review negative clauses to ensure they are not overly restrictive.")
422
+
423
+ return suggestions
424
+
425
+ # Function to send feedback via email
426
+ def send_feedback(feedback_content):
427
+ sender_email = os.getenv("SENDER_EMAIL")
428
+ receiver_email = os.getenv("FEEDBACK_EMAIL")
429
+ password = os.getenv("EMAIL_PASS")
430
+
431
+ msg = MIMEMultipart()
432
+ msg['From'] = sender_email
433
+ msg['To'] = receiver_email
434
+ msg['Subject'] = "User Feedback on Legal Document Analysis"
435
+
436
+ msg.attach(MIMEText(feedback_content, 'plain'))
437
+
438
+ try:
439
+ with smtplib.SMTP('smtp.gmail.com', 587) as server:
440
+ server.starttls()
441
+ server.login(sender_email, password)
442
+ server.send_message(msg)
443
+ return True
444
+ except Exception as e:
445
+ return False
446
+
447
+ # Function to send PDF via email
448
+ def send_pdf_via_email(pdf_buffer, recipient_email):
449
+ sender_email = os.getenv("SENDER_EMAIL")
450
+ password = os.getenv("EMAIL_PASS")
451
+
452
+ msg = MIMEMultipart()
453
+ msg['From'] = sender_email
454
+ msg['To'] = recipient_email
455
+ msg['Subject'] = "Legal Document Analysis PDF"
456
+
457
+ msg.attach(MIMEText("Please find the attached analysis of your legal document.", 'plain'))
458
+
459
+ # Attach the PDF
460
+ pdf_attachment = io.BytesIO(pdf_buffer.getvalue())
461
+ pdf_attachment.seek(0)
462
+ part = MIMEApplication(pdf_attachment.read(), Name='legal_document_analysis.pdf')
463
+ part['Content-Disposition'] = 'attachment; filename="legal_document_analysis.pdf"'
464
+ msg.attach(part)
465
+
466
+ try:
467
+ with smtplib.SMTP('smtp.gmail.com', 587) as server:
468
+ server.starttls()
469
+ server.login(sender_email, password)
470
+ server.send_message(msg)
471
+ return True
472
+ except Exception as e:
473
+ return False
474
+
475
+ # Function to simulate tracking updates in the document
476
+ def track_updates(document_text):
477
+ updates = [
478
+ {"update": "Updated confidentiality clause.", "suggestion": "Consider specifying the duration of confidentiality."},
479
+ {"update": "Revised liability limits.", "suggestion": "Ensure the limits are realistic and compliant with regulations."},
480
+ {"update": "Clarified termination conditions.", "suggestion": "Check if all potential termination scenarios are covered."},
481
+ ]
482
+ return updates
483
+
484
+ # Function to get suggestion from Groq API based on the update
485
+ def get_update_suggestion(update):
486
+ prompt = f"Suggest improvements or updates for this legal clause: {update}"
487
+ suggestion = generate_summary(prompt)
488
+ return suggestion if suggestion else "No suggestion available."
489
+
490
+ # Function to display feedback form
491
+ def display_feedback_form():
492
+ st.subheader("Feedback Form")
493
+ feedback = st.text_area("Please provide your feedback or suggestions:")
494
+
495
+ question1 = st.radio("How would you rate the analysis?", ("Excellent", "Good", "Fair", "Poor"))
496
+ question2 = st.radio("Would you recommend this tool to others?", ("Yes", "No"))
497
+
498
+ if st.button("Submit Feedback"):
499
+ feedback_content = f"Feedback: {feedback}\nRating: {question1}\nRecommendation: {question2}"
500
+ if send_feedback(feedback_content):
501
+ st.success("Thank you for your feedback! It has been sent.")
502
+ else:
503
+ st.error("Failed to send feedback. Please try again later.")
504
+
505
+ # Main function to display the legal analysis page
506
+ def display_legal_analysis_page():
507
+ st.title("πŸ“œ Legal Document Analysis with Groq API")
508
+
509
+ uploaded_file = st.file_uploader("Upload your legal document (PDF or DOCX)", type=["pdf", "docx"])
510
+ if uploaded_file:
511
+ if uploaded_file.name.endswith(".pdf"):
512
+ document_text = preprocess_text(read_pdf(uploaded_file))
513
+ elif uploaded_file.name.endswith(".docx"):
514
+ document_text = preprocess_text(extract_text_from_docx(uploaded_file))
515
+ else:
516
+ st.error("Unsupported file type!")
517
+ return
518
+
519
+ tabs = st.tabs(["πŸ“„ Document Text", "πŸ” Summary", "πŸ”‘ Key Clauses", "πŸ”’ Hidden Obligations", "⚠ Risk Analysis", "πŸ’‘ Suggestions & Chatbot", "πŸ”„ Update Tracker"])
520
+
521
+ with tabs[0]:
522
+ st.subheader("Document Text")
523
+ st.write(document_text)
524
+
525
+ with tabs[1]:
526
+ st.subheader("Summary")
527
+ summary = summarize_large_text(document_text)
528
+ st.write(summary)
529
+
530
+ with tabs[2]:
531
+ st.subheader("Key Clauses Identified")
532
+ detected_clauses = detect_key_clauses(document_text)
533
+ if detected_clauses:
534
+ for clause in detected_clauses:
535
+ with st.expander(clause['clause'], expanded=False):
536
+ st.write(f"*Summary:* {clause['summary']}")
537
+ st.write(f"*Context:* {clause['explanation']}")
538
+
539
+ else:
540
+ st.write("No key clauses detected.")
541
+
542
+ with tabs[3]:
543
+ st.subheader("Hidden Obligations and Dependencies")
544
+ hidden_obligations = detect_hidden_obligations_or_dependencies(document_text, summary)
545
+ if hidden_obligations:
546
+ for obligation in hidden_obligations:
547
+ st.write(f"{obligation['phrase']}: {obligation['summary']}")
548
+ st.write(obligation['context'])
549
+ else:
550
+ st.write("No hidden obligations detected.")
551
+
552
+ with tabs[4]:
553
+ st.subheader("Risk Analysis")
554
+ detected_risks = detect_risks(document_text, summary)
555
+ overall_risk_score = calculate_overall_risk_score(detected_risks)
556
+
557
+ st.write(f"*Overall Risk Score:* {overall_risk_score}")
558
+
559
+ if detected_risks:
560
+ for risk in detected_risks:
561
+ with st.expander(risk['phrase'], expanded=False):
562
+ st.write(f"*Summary:* {risk['summary']} (Risk Level: {risk['risk_level']})")
563
+ short_context = risk['context'].strip().split('. ')[0] + '.'
564
+ st.write(f"*Context:* {short_context}")
565
+ else:
566
+ st.write("No risks detected.")
567
+
568
+ # Generate all visualizations
569
+ risk_assessment_matrix = plot_risk_assessment_matrix(detected_risks)
570
+ risk_level_distribution = plot_risk_level_distribution(detected_risks)
571
+ risks_by_type = plot_risks_by_type(detected_risks)
572
+ stacked_bar_chart = plot_stacked_bar_chart(detected_risks)
573
+ risk_heatmap = plot_risk_heatmap(detected_risks)
574
+
575
+ # Display the charts
576
+ st.image(f"data:image/png;base64,{risk_assessment_matrix}", caption="Risk Assessment Matrix")
577
+ st.image(f"data:image/png;base64,{risk_level_distribution}", caption="Risk Level Distribution")
578
+ st.image(f"data:image/png;base64,{risks_by_type}", caption="Risks by Type")
579
+ st.image(f"data:image/png;base64,{stacked_bar_chart}", caption="Stacked Bar Chart of Risks by Level")
580
+ st.image(f"data:image/png;base64,{risk_heatmap}", caption="Risk Heatmap")
581
+
582
+ with tabs[5]:
583
+ st.subheader("Suggestions for Improvement")
584
+ suggestions = generate_suggestions(document_text)
585
+ for suggestion in suggestions:
586
+ st.write(f"- {suggestion}")
587
+
588
+ st.subheader("Chatbot for Analysis")
589
+ user_input = st.text_input("Ask the chatbot about your document:")
590
+ if st.button("Send"):
591
+ if user_input:
592
+ chatbot_response = chatbot_query(user_input)
593
+ st.write("*Chatbot Response:*")
594
+ st.write(chatbot_response)
595
+ else:
596
+ st.warning("Please enter a question.")
597
+
598
+ # Download PDF Analysis Button
599
+ st.subheader("Download Analysis as PDF")
600
+ pdf_buffer = io.BytesIO()
601
+ pdf = generate_pdf_analysis(document_text, summary, detected_clauses, hidden_obligations, detected_risks, risk_assessment_matrix, risk_level_distribution, risks_by_type, stacked_bar_chart, risk_heatmap)
602
+ pdf.output(pdf_buffer, 'F')
603
+ pdf_buffer.seek(0)
604
+
605
+ # Add download button for PDF
606
+ st.download_button(
607
+ label="Download PDF Analysis",
608
+ data=pdf_buffer,
609
+ file_name="legal_document_analysis.pdf",
610
+ mime="application/pdf"
611
+ )
612
+
613
+ # Input for recipient email
614
+ recipient_email = st.text_input("Enter your email address to receive the PDF:")
615
+
616
+ # Button to send PDF via email
617
+ if st.button("Send PDF Analysis"):
618
+ if recipient_email:
619
+ if send_pdf_via_email(pdf_buffer, recipient_email):
620
+ st.success("PDF has been sent successfully!")
621
+ else:
622
+ st.error("Failed to send PDF. Please try again.")
623
+ else:
624
+ st.warning("Please enter a valid email address.")
625
+
626
+ # Feedback Form Section
627
+ display_feedback_form()
628
+
629
+ with tabs[6]: # Update Tracker Tab
630
+ st.subheader("Document Updates")
631
+ updates = track_updates(document_text)
632
+ if st.button("Show Updates"):
633
+ if updates:
634
+ for update in updates:
635
+ with st.expander(update['update'], expanded=False):
636
+ suggestion = get_update_suggestion(update['update'])
637
+ st.write(f"*Suggestion:* {suggestion}")
638
+ else:
639
+ st.write("No updates detected.")
640
+
641
+ # Run the application
642
+ if __name__ == "__main__":
643
  display_legal_analysis_page()