sohampawar1030 commited on
Commit
15d4aba
Β·
verified Β·
1 Parent(s): 838659c

Update legal_document_analysis.py

Browse files
Files changed (1) hide show
  1. legal_document_analysis.py +121 -56
legal_document_analysis.py CHANGED
@@ -6,7 +6,7 @@ from langchain_groq import ChatGroq
6
  from docx import Document
7
  import matplotlib.pyplot as plt
8
  import io
9
- import tempfile
10
  from email.mime.multipart import MIMEMultipart
11
  from email.mime.text import MIMEText
12
  from email.mime.application import MIMEApplication
@@ -15,6 +15,8 @@ from fpdf import FPDF
15
  import getpass
16
  import pandas as pd
17
  import seaborn as sns
 
 
18
 
19
  # Load environment variables from .env file
20
  load_dotenv()
@@ -238,11 +240,14 @@ def plot_risk_assessment_matrix(detected_risks):
238
  for i in range(len(detected_risks)):
239
  ax.annotate(detected_risks[i]['phrase'], (likelihood[i], impact[i]))
240
 
241
- # Save to a temporary file
242
- with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
243
- plt.savefig(tmpfile.name, format="png", bbox_inches='tight')
244
- plt.close()
245
- return tmpfile.name # Return the file path
 
 
 
246
 
247
  # Function to plot risk level distribution pie chart
248
  def plot_risk_level_distribution(detected_risks):
@@ -255,11 +260,14 @@ def plot_risk_level_distribution(detected_risks):
255
 
256
  plt.title("Risk Level Distribution", fontsize=10)
257
 
258
- # Save to a temporary file
259
- with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
260
- plt.savefig(tmpfile.name, format="png", bbox_inches='tight')
261
- plt.close()
262
- return tmpfile.name # Return the file path
 
 
 
263
 
264
  # Function to plot risks by type bar chart
265
  def plot_risks_by_type(detected_risks):
@@ -272,11 +280,14 @@ def plot_risks_by_type(detected_risks):
272
  ax.set_title("Risks by Type", fontsize=10)
273
  ax.set_ylabel("Count")
274
 
275
- # Save to a temporary file
276
- with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
277
- plt.savefig(tmpfile.name, format="png", bbox_inches='tight')
278
- plt.close()
279
- return tmpfile.name # Return the file path
 
 
 
280
 
281
  # Function to plot stacked bar chart of risks by level
282
  def plot_stacked_bar_chart(detected_risks):
@@ -291,11 +302,14 @@ def plot_stacked_bar_chart(detected_risks):
291
  ax.set_title("Stacked Bar Chart of Risks by Level", fontsize=10)
292
  ax.set_ylabel("Count")
293
 
294
- # Save to a temporary file
295
- with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
296
- plt.savefig(tmpfile.name, format="png", bbox_inches='tight')
297
- plt.close()
298
- return tmpfile.name # Return the file path
 
 
 
299
 
300
  # Function to plot risk heatmap
301
  def plot_risk_heatmap(detected_risks):
@@ -312,14 +326,21 @@ def plot_risk_heatmap(detected_risks):
312
  sns.heatmap(heatmap_data.pivot_table(index='Risk Level', values='Count'), annot=True, cmap='YlGnBu', ax=ax)
313
  ax.set_title("Risk Heatmap")
314
 
315
- # Save to a temporary file
316
- with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
317
- plt.savefig(tmpfile.name, format="png", bbox_inches='tight')
318
- plt.close()
319
- return tmpfile.name # Return the file path
 
 
 
 
 
 
 
320
 
321
  # Function to generate PDF document with improved aesthetics
322
- def generate_pdf_analysis(document_text, summary, detected_clauses, hidden_obligations, detected_risks, risk_assessment_matrix_path, risk_level_distribution_path, risks_by_type_path, stacked_bar_chart_path, risk_heatmap_path):
323
  pdf = FPDF()
324
  pdf.add_page()
325
 
@@ -352,15 +373,15 @@ def generate_pdf_analysis(document_text, summary, detected_clauses, hidden_oblig
352
  pdf.ln(10)
353
 
354
  # Add visualizations for risks
355
- pdf.image(risk_assessment_matrix_path, x=10, y=pdf.get_y(), w=90)
356
- pdf.image(risk_level_distribution_path, x=110, y=pdf.get_y()-50, w=90) # Position next to the first image
357
  pdf.ln(60)
358
 
359
- pdf.image(risks_by_type_path, x=10, y=pdf.get_y(), w=90)
360
- pdf.image(stacked_bar_chart_path, x=110, y=pdf.get_y()-50, w=90) # Position next to the previous image
361
  pdf.ln(60)
362
 
363
- pdf.image(risk_heatmap_path, x=10, y=pdf.get_y(), w=190) # Fit image to width
364
  pdf.ln(10)
365
 
366
  # Footer
@@ -368,11 +389,7 @@ def generate_pdf_analysis(document_text, summary, detected_clauses, hidden_oblig
368
  pdf.set_font("Arial", 'I', 8)
369
  pdf.cell(0, 10, f'Page {pdf.page_no()}', 0, 0, 'C')
370
 
371
- # Save the PDF to a temporary file
372
- pdf_file_path = tempfile.mktemp(suffix=".pdf")
373
- pdf.output(pdf_file_path, 'F')
374
-
375
- return pdf_file_path # Return the path to the saved PDF
376
 
377
  # Function to handle chatbot interaction
378
  def chatbot_query(user_input):
@@ -435,8 +452,11 @@ def send_pdf_via_email(pdf_buffer, recipient_email):
435
  msg.attach(MIMEText("Please find the attached analysis of your legal document.", 'plain'))
436
 
437
  # Attach the PDF
438
- pdf_attachment = io.BytesIO(pdf_buffer.getvalue())
 
 
439
  pdf_attachment.seek(0)
 
440
  part = MIMEApplication(pdf_attachment.read(), Name='legal_document_analysis.pdf')
441
  part['Content-Disposition'] = 'attachment; filename="legal_document_analysis.pdf"'
442
  msg.attach(part)
@@ -494,7 +514,7 @@ def display_legal_analysis_page():
494
  st.error("Unsupported file type!")
495
  return
496
 
497
- tabs = st.tabs(["πŸ“„ Document Text", "πŸ” Summary", "πŸ”‘ Key Clauses", "πŸ”’ Hidden Obligations", "⚠ Risk Analysis", "πŸ’‘ Suggestions & Chatbot", "πŸ”„ Update Tracker"])
498
 
499
  with tabs[0]:
500
  st.subheader("Document Text")
@@ -513,7 +533,6 @@ def display_legal_analysis_page():
513
  with st.expander(clause['clause'], expanded=False):
514
  st.write(f"*Summary:* {clause['summary']}")
515
  st.write(f"*Context:* {clause['explanation']}")
516
-
517
  else:
518
  st.write("No key clauses detected.")
519
 
@@ -544,18 +563,18 @@ def display_legal_analysis_page():
544
  st.write("No risks detected.")
545
 
546
  # Generate all visualizations
547
- risk_assessment_matrix_path = plot_risk_assessment_matrix(detected_risks)
548
- risk_level_distribution_path = plot_risk_level_distribution(detected_risks)
549
- risks_by_type_path = plot_risks_by_type(detected_risks)
550
- stacked_bar_chart_path = plot_stacked_bar_chart(detected_risks)
551
- risk_heatmap_path = plot_risk_heatmap(detected_risks)
552
 
553
  # Display the charts
554
- st.image(risk_assessment_matrix_path, caption="Risk Assessment Matrix")
555
- st.image(risk_level_distribution_path, caption="Risk Level Distribution")
556
- st.image(risks_by_type_path, caption="Risks by Type")
557
- st.image(stacked_bar_chart_path, caption="Stacked Bar Chart of Risks by Level")
558
- st.image(risk_heatmap_path, caption="Risk Heatmap")
559
 
560
  with tabs[5]:
561
  st.subheader("Suggestions for Improvement")
@@ -575,13 +594,9 @@ def display_legal_analysis_page():
575
 
576
  # Download PDF Analysis Button
577
  st.subheader("Download Analysis as PDF")
578
- pdf_file_path = generate_pdf_analysis(document_text, summary, detected_clauses, hidden_obligations, detected_risks, risk_assessment_matrix_path, risk_level_distribution_path, risks_by_type_path, stacked_bar_chart_path, risk_heatmap_path)
579
-
580
- # Read PDF into BytesIO for download
581
  pdf_buffer = io.BytesIO()
582
- with open(pdf_file_path, 'rb') as f:
583
- pdf_buffer.write(f.read())
584
-
585
  pdf_buffer.seek(0)
586
 
587
  # Add download button for PDF
@@ -617,9 +632,59 @@ def display_legal_analysis_page():
617
  with st.expander(update['update'], expanded=False):
618
  suggestion = get_update_suggestion(update['update'])
619
  st.write(f"*Suggestion:* {suggestion}")
 
 
 
620
  else:
621
  st.write("No updates detected.")
622
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
623
  # Run the application
624
  if __name__ == "__main__":
625
  display_legal_analysis_page()
 
6
  from docx import Document
7
  import matplotlib.pyplot as plt
8
  import io
9
+ import base64
10
  from email.mime.multipart import MIMEMultipart
11
  from email.mime.text import MIMEText
12
  from email.mime.application import MIMEApplication
 
15
  import getpass
16
  import pandas as pd
17
  import seaborn as sns
18
+ import requests
19
+ from bs4 import BeautifulSoup
20
 
21
  # Load environment variables from .env file
22
  load_dotenv()
 
240
  for i in range(len(detected_risks)):
241
  ax.annotate(detected_risks[i]['phrase'], (likelihood[i], impact[i]))
242
 
243
+ buf = io.BytesIO()
244
+ plt.savefig(buf, format="png", bbox_inches='tight')
245
+ buf.seek(0)
246
+
247
+ img_str = base64.b64encode(buf.read()).decode('utf-8')
248
+ buf.close()
249
+
250
+ return img_str
251
 
252
  # Function to plot risk level distribution pie chart
253
  def plot_risk_level_distribution(detected_risks):
 
260
 
261
  plt.title("Risk Level Distribution", fontsize=10)
262
 
263
+ buf = io.BytesIO()
264
+ plt.savefig(buf, format="png", bbox_inches='tight')
265
+ buf.seek(0)
266
+
267
+ img_str = base64.b64encode(buf.read()).decode('utf-8')
268
+ buf.close()
269
+
270
+ return img_str
271
 
272
  # Function to plot risks by type bar chart
273
  def plot_risks_by_type(detected_risks):
 
280
  ax.set_title("Risks by Type", fontsize=10)
281
  ax.set_ylabel("Count")
282
 
283
+ buf = io.BytesIO()
284
+ plt.savefig(buf, format="png", bbox_inches='tight')
285
+ buf.seek(0)
286
+
287
+ img_str = base64.b64encode(buf.read()).decode('utf-8')
288
+ buf.close()
289
+
290
+ return img_str
291
 
292
  # Function to plot stacked bar chart of risks by level
293
  def plot_stacked_bar_chart(detected_risks):
 
302
  ax.set_title("Stacked Bar Chart of Risks by Level", fontsize=10)
303
  ax.set_ylabel("Count")
304
 
305
+ buf = io.BytesIO()
306
+ plt.savefig(buf, format="png", bbox_inches='tight')
307
+ buf.seek(0)
308
+
309
+ img_str = base64.b64encode(buf.read()).decode('utf-8')
310
+ buf.close()
311
+
312
+ return img_str
313
 
314
  # Function to plot risk heatmap
315
  def plot_risk_heatmap(detected_risks):
 
326
  sns.heatmap(heatmap_data.pivot_table(index='Risk Level', values='Count'), annot=True, cmap='YlGnBu', ax=ax)
327
  ax.set_title("Risk Heatmap")
328
 
329
+ buf = io.BytesIO()
330
+ plt.savefig(buf, format="png", bbox_inches='tight')
331
+ buf.seek(0)
332
+
333
+ img_str = base64.b64encode(buf.read()).decode('utf-8')
334
+ buf.close()
335
+
336
+ return img_str
337
+
338
+ # Function to convert base64 to image
339
+ def base64_to_image(data):
340
+ return io.BytesIO(base64.b64decode(data))
341
 
342
  # Function to generate PDF document with improved aesthetics
343
+ def generate_pdf_analysis(document_text, summary, detected_clauses, hidden_obligations, detected_risks, risk_assessment_matrix, risk_level_distribution, risks_by_type, stacked_bar_chart, risk_heatmap):
344
  pdf = FPDF()
345
  pdf.add_page()
346
 
 
373
  pdf.ln(10)
374
 
375
  # Add visualizations for risks
376
+ pdf.image(base64_to_image(risk_assessment_matrix), x=10, y=pdf.get_y(), w=90)
377
+ pdf.image(base64_to_image(risk_level_distribution), x=110, y=pdf.get_y()-50, w=90) # Position next to the first image
378
  pdf.ln(60)
379
 
380
+ pdf.image(base64_to_image(risks_by_type), x=10, y=pdf.get_y(), w=90)
381
+ pdf.image(base64_to_image(stacked_bar_chart), x=110, y=pdf.get_y()-50, w=90) # Position next to the previous image
382
  pdf.ln(60)
383
 
384
+ pdf.image(base64_to_image(risk_heatmap), x=10, y=pdf.get_y(), w=190) # Fit image to width
385
  pdf.ln(10)
386
 
387
  # Footer
 
389
  pdf.set_font("Arial", 'I', 8)
390
  pdf.cell(0, 10, f'Page {pdf.page_no()}', 0, 0, 'C')
391
 
392
+ return pdf
 
 
 
 
393
 
394
  # Function to handle chatbot interaction
395
  def chatbot_query(user_input):
 
452
  msg.attach(MIMEText("Please find the attached analysis of your legal document.", 'plain'))
453
 
454
  # Attach the PDF
455
+ pdf_attachment = io.BytesIO()
456
+ pdf_buffer.seek(0)
457
+ pdf_attachment.write(pdf_buffer.read())
458
  pdf_attachment.seek(0)
459
+
460
  part = MIMEApplication(pdf_attachment.read(), Name='legal_document_analysis.pdf')
461
  part['Content-Disposition'] = 'attachment; filename="legal_document_analysis.pdf"'
462
  msg.attach(part)
 
514
  st.error("Unsupported file type!")
515
  return
516
 
517
+ tabs = st.tabs(["πŸ“„ Document Text", "πŸ” Summary", "πŸ”‘ Key Clauses", "πŸ”’ Hidden Obligations", "⚠ Risk Analysis", "πŸ’‘ Suggestions & Chatbot", "πŸ”„ Update Tracker", "πŸ“œ GDPR Updates"])
518
 
519
  with tabs[0]:
520
  st.subheader("Document Text")
 
533
  with st.expander(clause['clause'], expanded=False):
534
  st.write(f"*Summary:* {clause['summary']}")
535
  st.write(f"*Context:* {clause['explanation']}")
 
536
  else:
537
  st.write("No key clauses detected.")
538
 
 
563
  st.write("No risks detected.")
564
 
565
  # Generate all visualizations
566
+ risk_assessment_matrix = plot_risk_assessment_matrix(detected_risks)
567
+ risk_level_distribution = plot_risk_level_distribution(detected_risks)
568
+ risks_by_type = plot_risks_by_type(detected_risks)
569
+ stacked_bar_chart = plot_stacked_bar_chart(detected_risks)
570
+ risk_heatmap = plot_risk_heatmap(detected_risks)
571
 
572
  # Display the charts
573
+ st.image(f"data:image/png;base64,{risk_assessment_matrix}", caption="Risk Assessment Matrix")
574
+ st.image(f"data:image/png;base64,{risk_level_distribution}", caption="Risk Level Distribution")
575
+ st.image(f"data:image/png;base64,{risks_by_type}", caption="Risks by Type")
576
+ st.image(f"data:image/png;base64,{stacked_bar_chart}", caption="Stacked Bar Chart of Risks by Level")
577
+ st.image(f"data:image/png;base64,{risk_heatmap}", caption="Risk Heatmap")
578
 
579
  with tabs[5]:
580
  st.subheader("Suggestions for Improvement")
 
594
 
595
  # Download PDF Analysis Button
596
  st.subheader("Download Analysis as PDF")
 
 
 
597
  pdf_buffer = io.BytesIO()
598
+ pdf = generate_pdf_analysis(document_text, summary, detected_clauses, hidden_obligations, detected_risks, risk_assessment_matrix, risk_level_distribution, risks_by_type, stacked_bar_chart, risk_heatmap)
599
+ pdf.output(pdf_buffer, 'F')
 
600
  pdf_buffer.seek(0)
601
 
602
  # Add download button for PDF
 
632
  with st.expander(update['update'], expanded=False):
633
  suggestion = get_update_suggestion(update['update'])
634
  st.write(f"*Suggestion:* {suggestion}")
635
+ # Additional functionality
636
+ if st.button(f"Mark '{update['update']}' as addressed"):
637
+ st.success(f"'{update['update']}' has been marked as addressed.")
638
  else:
639
  st.write("No updates detected.")
640
 
641
+ with tabs[7]: # GDPR Updates Tab
642
+ st.subheader("GDPR Website Updates")
643
+ if st.button("Fetch Live Recitals"):
644
+ with st.spinner("Fetching updates..."):
645
+ recitals = fetch_gdpr_recitals()
646
+ if recitals:
647
+ for number, details in recitals.items():
648
+ st.markdown(f"*Recital {number}: {details['title']}*")
649
+ st.write(details['content'])
650
+ else:
651
+ st.write("No recitals found.")
652
+
653
+ # Function to fetch live recitals from the GDPR website
654
+ def fetch_gdpr_recitals():
655
+ url = "https://gdpr-info.eu/recitals/"
656
+ response = requests.get(url)
657
+
658
+ # Check if the request was successful
659
+ if response.status_code != 200:
660
+ st.error("Failed to fetch data from the GDPR website.")
661
+ return {}
662
+
663
+ soup = BeautifulSoup(response.content, 'html.parser')
664
+
665
+ recitals = {}
666
+ # Locate all recital links
667
+ articles = soup.find_all('div', class_='artikel')
668
+
669
+ # Extract each recital's link and title
670
+ for i, article in enumerate(articles):
671
+ if i >= 3: # Limit to the first 3 recitals
672
+ break
673
+ link = article.find('a')['href']
674
+ number = article.find('span', class_='nummer').text.strip('()')
675
+ title = article.find('span', class_='titel').text.strip()
676
+
677
+ # Fetch the content of each recital
678
+ rec_response = requests.get(link)
679
+ if rec_response.status_code == 200:
680
+ rec_soup = BeautifulSoup(rec_response.content, 'html.parser')
681
+ content = rec_soup.find('div', class_='entry-content').get_text(strip=True)
682
+ recitals[number] = {'title': title, 'content': content}
683
+ else:
684
+ print(f"Failed to fetch recital {number} from {link}")
685
+
686
+ return recitals
687
+
688
  # Run the application
689
  if __name__ == "__main__":
690
  display_legal_analysis_page()