Update legal_document_analysis.py
Browse files- legal_document_analysis.py +43 -61
legal_document_analysis.py
CHANGED
@@ -7,6 +7,7 @@ from docx import Document
|
|
7 |
import matplotlib.pyplot as plt
|
8 |
import io
|
9 |
import base64
|
|
|
10 |
from email.mime.multipart import MIMEMultipart
|
11 |
from email.mime.text import MIMEText
|
12 |
from email.mime.application import MIMEApplication
|
@@ -238,14 +239,11 @@ def plot_risk_assessment_matrix(detected_risks):
|
|
238 |
for i in range(len(detected_risks)):
|
239 |
ax.annotate(detected_risks[i]['phrase'], (likelihood[i], impact[i]))
|
240 |
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
buf.close()
|
247 |
-
|
248 |
-
return img_str
|
249 |
|
250 |
# Function to plot risk level distribution pie chart
|
251 |
def plot_risk_level_distribution(detected_risks):
|
@@ -258,14 +256,11 @@ def plot_risk_level_distribution(detected_risks):
|
|
258 |
|
259 |
plt.title("Risk Level Distribution", fontsize=10)
|
260 |
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
buf.close()
|
267 |
-
|
268 |
-
return img_str
|
269 |
|
270 |
# Function to plot risks by type bar chart
|
271 |
def plot_risks_by_type(detected_risks):
|
@@ -278,14 +273,11 @@ def plot_risks_by_type(detected_risks):
|
|
278 |
ax.set_title("Risks by Type", fontsize=10)
|
279 |
ax.set_ylabel("Count")
|
280 |
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
buf.close()
|
287 |
-
|
288 |
-
return img_str
|
289 |
|
290 |
# Function to plot stacked bar chart of risks by level
|
291 |
def plot_stacked_bar_chart(detected_risks):
|
@@ -300,14 +292,11 @@ def plot_stacked_bar_chart(detected_risks):
|
|
300 |
ax.set_title("Stacked Bar Chart of Risks by Level", fontsize=10)
|
301 |
ax.set_ylabel("Count")
|
302 |
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
buf.close()
|
309 |
-
|
310 |
-
return img_str
|
311 |
|
312 |
# Function to plot risk heatmap
|
313 |
def plot_risk_heatmap(detected_risks):
|
@@ -324,21 +313,14 @@ def plot_risk_heatmap(detected_risks):
|
|
324 |
sns.heatmap(heatmap_data.pivot_table(index='Risk Level', values='Count'), annot=True, cmap='YlGnBu', ax=ax)
|
325 |
ax.set_title("Risk Heatmap")
|
326 |
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
buf.close()
|
333 |
-
|
334 |
-
return img_str
|
335 |
-
|
336 |
-
# Function to convert base64 to image
|
337 |
-
def base64_to_image(data):
|
338 |
-
return io.BytesIO(base64.b64decode(data))
|
339 |
|
340 |
# Function to generate PDF document with improved aesthetics
|
341 |
-
def generate_pdf_analysis(document_text, summary, detected_clauses, hidden_obligations, detected_risks,
|
342 |
pdf = FPDF()
|
343 |
pdf.add_page()
|
344 |
|
@@ -371,15 +353,15 @@ def generate_pdf_analysis(document_text, summary, detected_clauses, hidden_oblig
|
|
371 |
pdf.ln(10)
|
372 |
|
373 |
# Add visualizations for risks
|
374 |
-
pdf.image(
|
375 |
-
pdf.image(
|
376 |
pdf.ln(60)
|
377 |
|
378 |
-
pdf.image(
|
379 |
-
pdf.image(
|
380 |
pdf.ln(60)
|
381 |
|
382 |
-
pdf.image(
|
383 |
pdf.ln(10)
|
384 |
|
385 |
# Footer
|
@@ -559,18 +541,18 @@ def display_legal_analysis_page():
|
|
559 |
st.write("No risks detected.")
|
560 |
|
561 |
# Generate all visualizations
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
|
567 |
|
568 |
# Display the charts
|
569 |
-
st.image(
|
570 |
-
st.image(
|
571 |
-
st.image(
|
572 |
-
st.image(
|
573 |
-
st.image(
|
574 |
|
575 |
with tabs[5]:
|
576 |
st.subheader("Suggestions for Improvement")
|
@@ -591,7 +573,7 @@ def display_legal_analysis_page():
|
|
591 |
# Download PDF Analysis Button
|
592 |
st.subheader("Download Analysis as PDF")
|
593 |
pdf_buffer = io.BytesIO()
|
594 |
-
pdf = generate_pdf_analysis(document_text, summary, detected_clauses, hidden_obligations, detected_risks,
|
595 |
pdf.output(pdf_buffer, 'F')
|
596 |
pdf_buffer.seek(0)
|
597 |
|
|
|
7 |
import matplotlib.pyplot as plt
|
8 |
import io
|
9 |
import base64
|
10 |
+
import tempfile
|
11 |
from email.mime.multipart import MIMEMultipart
|
12 |
from email.mime.text import MIMEText
|
13 |
from email.mime.application import MIMEApplication
|
|
|
239 |
for i in range(len(detected_risks)):
|
240 |
ax.annotate(detected_risks[i]['phrase'], (likelihood[i], impact[i]))
|
241 |
|
242 |
+
# Save to a temporary file
|
243 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
|
244 |
+
plt.savefig(tmpfile.name, format="png", bbox_inches='tight')
|
245 |
+
plt.close()
|
246 |
+
return tmpfile.name # Return the file path
|
|
|
|
|
|
|
247 |
|
248 |
# Function to plot risk level distribution pie chart
|
249 |
def plot_risk_level_distribution(detected_risks):
|
|
|
256 |
|
257 |
plt.title("Risk Level Distribution", fontsize=10)
|
258 |
|
259 |
+
# Save to a temporary file
|
260 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
|
261 |
+
plt.savefig(tmpfile.name, format="png", bbox_inches='tight')
|
262 |
+
plt.close()
|
263 |
+
return tmpfile.name # Return the file path
|
|
|
|
|
|
|
264 |
|
265 |
# Function to plot risks by type bar chart
|
266 |
def plot_risks_by_type(detected_risks):
|
|
|
273 |
ax.set_title("Risks by Type", fontsize=10)
|
274 |
ax.set_ylabel("Count")
|
275 |
|
276 |
+
# Save to a temporary file
|
277 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
|
278 |
+
plt.savefig(tmpfile.name, format="png", bbox_inches='tight')
|
279 |
+
plt.close()
|
280 |
+
return tmpfile.name # Return the file path
|
|
|
|
|
|
|
281 |
|
282 |
# Function to plot stacked bar chart of risks by level
|
283 |
def plot_stacked_bar_chart(detected_risks):
|
|
|
292 |
ax.set_title("Stacked Bar Chart of Risks by Level", fontsize=10)
|
293 |
ax.set_ylabel("Count")
|
294 |
|
295 |
+
# Save to a temporary file
|
296 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
|
297 |
+
plt.savefig(tmpfile.name, format="png", bbox_inches='tight')
|
298 |
+
plt.close()
|
299 |
+
return tmpfile.name # Return the file path
|
|
|
|
|
|
|
300 |
|
301 |
# Function to plot risk heatmap
|
302 |
def plot_risk_heatmap(detected_risks):
|
|
|
313 |
sns.heatmap(heatmap_data.pivot_table(index='Risk Level', values='Count'), annot=True, cmap='YlGnBu', ax=ax)
|
314 |
ax.set_title("Risk Heatmap")
|
315 |
|
316 |
+
# Save to a temporary file
|
317 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
|
318 |
+
plt.savefig(tmpfile.name, format="png", bbox_inches='tight')
|
319 |
+
plt.close()
|
320 |
+
return tmpfile.name # Return the file path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
321 |
|
322 |
# Function to generate PDF document with improved aesthetics
|
323 |
+
def generate_pdf_analysis(document_text, summary, detected_clauses, hidden_obligations, detected_risks, risk_assessment_matrix_path, risk_level_distribution_path, risks_by_type_path, stacked_bar_chart_path, risk_heatmap_path):
|
324 |
pdf = FPDF()
|
325 |
pdf.add_page()
|
326 |
|
|
|
353 |
pdf.ln(10)
|
354 |
|
355 |
# Add visualizations for risks
|
356 |
+
pdf.image(risk_assessment_matrix_path, x=10, y=pdf.get_y(), w=90)
|
357 |
+
pdf.image(risk_level_distribution_path, x=110, y=pdf.get_y()-50, w=90) # Position next to the first image
|
358 |
pdf.ln(60)
|
359 |
|
360 |
+
pdf.image(risks_by_type_path, x=10, y=pdf.get_y(), w=90)
|
361 |
+
pdf.image(stacked_bar_chart_path, x=110, y=pdf.get_y()-50, w=90) # Position next to the previous image
|
362 |
pdf.ln(60)
|
363 |
|
364 |
+
pdf.image(risk_heatmap_path, x=10, y=pdf.get_y(), w=190) # Fit image to width
|
365 |
pdf.ln(10)
|
366 |
|
367 |
# Footer
|
|
|
541 |
st.write("No risks detected.")
|
542 |
|
543 |
# Generate all visualizations
|
544 |
+
risk_assessment_matrix_path = plot_risk_assessment_matrix(detected_risks)
|
545 |
+
risk_level_distribution_path = plot_risk_level_distribution(detected_risks)
|
546 |
+
risks_by_type_path = plot_risks_by_type(detected_risks)
|
547 |
+
stacked_bar_chart_path = plot_stacked_bar_chart(detected_risks)
|
548 |
+
risk_heatmap_path = plot_risk_heatmap(detected_risks)
|
549 |
|
550 |
# Display the charts
|
551 |
+
st.image(risk_assessment_matrix_path, caption="Risk Assessment Matrix")
|
552 |
+
st.image(risk_level_distribution_path, caption="Risk Level Distribution")
|
553 |
+
st.image(risks_by_type_path, caption="Risks by Type")
|
554 |
+
st.image(stacked_bar_chart_path, caption="Stacked Bar Chart of Risks by Level")
|
555 |
+
st.image(risk_heatmap_path, caption="Risk Heatmap")
|
556 |
|
557 |
with tabs[5]:
|
558 |
st.subheader("Suggestions for Improvement")
|
|
|
573 |
# Download PDF Analysis Button
|
574 |
st.subheader("Download Analysis as PDF")
|
575 |
pdf_buffer = io.BytesIO()
|
576 |
+
pdf = generate_pdf_analysis(document_text, summary, detected_clauses, hidden_obligations, detected_risks, risk_assessment_matrix_path, risk_level_distribution_path, risks_by_type_path, stacked_bar_chart_path, risk_heatmap_path)
|
577 |
pdf.output(pdf_buffer, 'F')
|
578 |
pdf_buffer.seek(0)
|
579 |
|