sohampawar1030 commited on
Commit
94f0703
·
verified ·
1 Parent(s): d2608e3

Update legal_document_analysis.py

Browse files
Files changed (1) hide show
  1. legal_document_analysis.py +43 -61
legal_document_analysis.py CHANGED
@@ -7,6 +7,7 @@ from docx import Document
7
  import matplotlib.pyplot as plt
8
  import io
9
  import base64
 
10
  from email.mime.multipart import MIMEMultipart
11
  from email.mime.text import MIMEText
12
  from email.mime.application import MIMEApplication
@@ -238,14 +239,11 @@ def plot_risk_assessment_matrix(detected_risks):
238
  for i in range(len(detected_risks)):
239
  ax.annotate(detected_risks[i]['phrase'], (likelihood[i], impact[i]))
240
 
241
- buf = io.BytesIO()
242
- plt.savefig(buf, format="png", bbox_inches='tight')
243
- buf.seek(0)
244
-
245
- img_str = base64.b64encode(buf.read()).decode('utf-8')
246
- buf.close()
247
-
248
- return img_str
249
 
250
  # Function to plot risk level distribution pie chart
251
  def plot_risk_level_distribution(detected_risks):
@@ -258,14 +256,11 @@ def plot_risk_level_distribution(detected_risks):
258
 
259
  plt.title("Risk Level Distribution", fontsize=10)
260
 
261
- buf = io.BytesIO()
262
- plt.savefig(buf, format="png", bbox_inches='tight')
263
- buf.seek(0)
264
-
265
- img_str = base64.b64encode(buf.read()).decode('utf-8')
266
- buf.close()
267
-
268
- return img_str
269
 
270
  # Function to plot risks by type bar chart
271
  def plot_risks_by_type(detected_risks):
@@ -278,14 +273,11 @@ def plot_risks_by_type(detected_risks):
278
  ax.set_title("Risks by Type", fontsize=10)
279
  ax.set_ylabel("Count")
280
 
281
- buf = io.BytesIO()
282
- plt.savefig(buf, format="png", bbox_inches='tight')
283
- buf.seek(0)
284
-
285
- img_str = base64.b64encode(buf.read()).decode('utf-8')
286
- buf.close()
287
-
288
- return img_str
289
 
290
  # Function to plot stacked bar chart of risks by level
291
  def plot_stacked_bar_chart(detected_risks):
@@ -300,14 +292,11 @@ def plot_stacked_bar_chart(detected_risks):
300
  ax.set_title("Stacked Bar Chart of Risks by Level", fontsize=10)
301
  ax.set_ylabel("Count")
302
 
303
- buf = io.BytesIO()
304
- plt.savefig(buf, format="png", bbox_inches='tight')
305
- buf.seek(0)
306
-
307
- img_str = base64.b64encode(buf.read()).decode('utf-8')
308
- buf.close()
309
-
310
- return img_str
311
 
312
  # Function to plot risk heatmap
313
  def plot_risk_heatmap(detected_risks):
@@ -324,21 +313,14 @@ def plot_risk_heatmap(detected_risks):
324
  sns.heatmap(heatmap_data.pivot_table(index='Risk Level', values='Count'), annot=True, cmap='YlGnBu', ax=ax)
325
  ax.set_title("Risk Heatmap")
326
 
327
- buf = io.BytesIO()
328
- plt.savefig(buf, format="png", bbox_inches='tight')
329
- buf.seek(0)
330
-
331
- img_str = base64.b64encode(buf.read()).decode('utf-8')
332
- buf.close()
333
-
334
- return img_str
335
-
336
- # Function to convert base64 to image
337
- def base64_to_image(data):
338
- return io.BytesIO(base64.b64decode(data))
339
 
340
  # Function to generate PDF document with improved aesthetics
341
- def generate_pdf_analysis(document_text, summary, detected_clauses, hidden_obligations, detected_risks, risk_assessment_matrix, risk_level_distribution, risks_by_type, stacked_bar_chart, risk_heatmap):
342
  pdf = FPDF()
343
  pdf.add_page()
344
 
@@ -371,15 +353,15 @@ def generate_pdf_analysis(document_text, summary, detected_clauses, hidden_oblig
371
  pdf.ln(10)
372
 
373
  # Add visualizations for risks
374
- pdf.image(base64_to_image(risk_assessment_matrix), x=10, y=pdf.get_y(), w=90)
375
- pdf.image(base64_to_image(risk_level_distribution), x=110, y=pdf.get_y()-50, w=90) # Position next to the first image
376
  pdf.ln(60)
377
 
378
- pdf.image(base64_to_image(risks_by_type), x=10, y=pdf.get_y(), w=90)
379
- pdf.image(base64_to_image(stacked_bar_chart), x=110, y=pdf.get_y()-50, w=90) # Position next to the previous image
380
  pdf.ln(60)
381
 
382
- pdf.image(base64_to_image(risk_heatmap), x=10, y=pdf.get_y(), w=190) # Fit image to width
383
  pdf.ln(10)
384
 
385
  # Footer
@@ -559,18 +541,18 @@ def display_legal_analysis_page():
559
  st.write("No risks detected.")
560
 
561
  # Generate all visualizations
562
- risk_assessment_matrix = plot_risk_assessment_matrix(detected_risks)
563
- risk_level_distribution = plot_risk_level_distribution(detected_risks)
564
- risks_by_type = plot_risks_by_type(detected_risks)
565
- stacked_bar_chart = plot_stacked_bar_chart(detected_risks)
566
- risk_heatmap = plot_risk_heatmap(detected_risks)
567
 
568
  # Display the charts
569
- st.image(f"data:image/png;base64,{risk_assessment_matrix}", caption="Risk Assessment Matrix")
570
- st.image(f"data:image/png;base64,{risk_level_distribution}", caption="Risk Level Distribution")
571
- st.image(f"data:image/png;base64,{risks_by_type}", caption="Risks by Type")
572
- st.image(f"data:image/png;base64,{stacked_bar_chart}", caption="Stacked Bar Chart of Risks by Level")
573
- st.image(f"data:image/png;base64,{risk_heatmap}", caption="Risk Heatmap")
574
 
575
  with tabs[5]:
576
  st.subheader("Suggestions for Improvement")
@@ -591,7 +573,7 @@ def display_legal_analysis_page():
591
  # Download PDF Analysis Button
592
  st.subheader("Download Analysis as PDF")
593
  pdf_buffer = io.BytesIO()
594
- pdf = generate_pdf_analysis(document_text, summary, detected_clauses, hidden_obligations, detected_risks, risk_assessment_matrix, risk_level_distribution, risks_by_type, stacked_bar_chart, risk_heatmap)
595
  pdf.output(pdf_buffer, 'F')
596
  pdf_buffer.seek(0)
597
 
 
7
  import matplotlib.pyplot as plt
8
  import io
9
  import base64
10
+ import tempfile
11
  from email.mime.multipart import MIMEMultipart
12
  from email.mime.text import MIMEText
13
  from email.mime.application import MIMEApplication
 
239
  for i in range(len(detected_risks)):
240
  ax.annotate(detected_risks[i]['phrase'], (likelihood[i], impact[i]))
241
 
242
+ # Save to a temporary file
243
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
244
+ plt.savefig(tmpfile.name, format="png", bbox_inches='tight')
245
+ plt.close()
246
+ return tmpfile.name # Return the file path
 
 
 
247
 
248
  # Function to plot risk level distribution pie chart
249
  def plot_risk_level_distribution(detected_risks):
 
256
 
257
  plt.title("Risk Level Distribution", fontsize=10)
258
 
259
+ # Save to a temporary file
260
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
261
+ plt.savefig(tmpfile.name, format="png", bbox_inches='tight')
262
+ plt.close()
263
+ return tmpfile.name # Return the file path
 
 
 
264
 
265
  # Function to plot risks by type bar chart
266
  def plot_risks_by_type(detected_risks):
 
273
  ax.set_title("Risks by Type", fontsize=10)
274
  ax.set_ylabel("Count")
275
 
276
+ # Save to a temporary file
277
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
278
+ plt.savefig(tmpfile.name, format="png", bbox_inches='tight')
279
+ plt.close()
280
+ return tmpfile.name # Return the file path
 
 
 
281
 
282
  # Function to plot stacked bar chart of risks by level
283
  def plot_stacked_bar_chart(detected_risks):
 
292
  ax.set_title("Stacked Bar Chart of Risks by Level", fontsize=10)
293
  ax.set_ylabel("Count")
294
 
295
+ # Save to a temporary file
296
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
297
+ plt.savefig(tmpfile.name, format="png", bbox_inches='tight')
298
+ plt.close()
299
+ return tmpfile.name # Return the file path
 
 
 
300
 
301
  # Function to plot risk heatmap
302
  def plot_risk_heatmap(detected_risks):
 
313
  sns.heatmap(heatmap_data.pivot_table(index='Risk Level', values='Count'), annot=True, cmap='YlGnBu', ax=ax)
314
  ax.set_title("Risk Heatmap")
315
 
316
+ # Save to a temporary file
317
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
318
+ plt.savefig(tmpfile.name, format="png", bbox_inches='tight')
319
+ plt.close()
320
+ return tmpfile.name # Return the file path
 
 
 
 
 
 
 
321
 
322
  # Function to generate PDF document with improved aesthetics
323
+ def generate_pdf_analysis(document_text, summary, detected_clauses, hidden_obligations, detected_risks, risk_assessment_matrix_path, risk_level_distribution_path, risks_by_type_path, stacked_bar_chart_path, risk_heatmap_path):
324
  pdf = FPDF()
325
  pdf.add_page()
326
 
 
353
  pdf.ln(10)
354
 
355
  # Add visualizations for risks
356
+ pdf.image(risk_assessment_matrix_path, x=10, y=pdf.get_y(), w=90)
357
+ pdf.image(risk_level_distribution_path, x=110, y=pdf.get_y()-50, w=90) # Position next to the first image
358
  pdf.ln(60)
359
 
360
+ pdf.image(risks_by_type_path, x=10, y=pdf.get_y(), w=90)
361
+ pdf.image(stacked_bar_chart_path, x=110, y=pdf.get_y()-50, w=90) # Position next to the previous image
362
  pdf.ln(60)
363
 
364
+ pdf.image(risk_heatmap_path, x=10, y=pdf.get_y(), w=190) # Fit image to width
365
  pdf.ln(10)
366
 
367
  # Footer
 
541
  st.write("No risks detected.")
542
 
543
  # Generate all visualizations
544
+ risk_assessment_matrix_path = plot_risk_assessment_matrix(detected_risks)
545
+ risk_level_distribution_path = plot_risk_level_distribution(detected_risks)
546
+ risks_by_type_path = plot_risks_by_type(detected_risks)
547
+ stacked_bar_chart_path = plot_stacked_bar_chart(detected_risks)
548
+ risk_heatmap_path = plot_risk_heatmap(detected_risks)
549
 
550
  # Display the charts
551
+ st.image(risk_assessment_matrix_path, caption="Risk Assessment Matrix")
552
+ st.image(risk_level_distribution_path, caption="Risk Level Distribution")
553
+ st.image(risks_by_type_path, caption="Risks by Type")
554
+ st.image(stacked_bar_chart_path, caption="Stacked Bar Chart of Risks by Level")
555
+ st.image(risk_heatmap_path, caption="Risk Heatmap")
556
 
557
  with tabs[5]:
558
  st.subheader("Suggestions for Improvement")
 
573
  # Download PDF Analysis Button
574
  st.subheader("Download Analysis as PDF")
575
  pdf_buffer = io.BytesIO()
576
+ pdf = generate_pdf_analysis(document_text, summary, detected_clauses, hidden_obligations, detected_risks, risk_assessment_matrix_path, risk_level_distribution_path, risks_by_type_path, stacked_bar_chart_path, risk_heatmap_path)
577
  pdf.output(pdf_buffer, 'F')
578
  pdf_buffer.seek(0)
579