Spaces:

sohampawar1030
/

legal_document_summarization_final

Sleeping

App Files Files Community

sohampawar1030 commited on Jan 23

Commit

15d4aba

verified ·

1 Parent(s): 838659c

Update legal_document_analysis.py

Browse files

Files changed (1) hide show

legal_document_analysis.py +121 -56

legal_document_analysis.py CHANGED Viewed

@@ -6,7 +6,7 @@ from langchain_groq import ChatGroq
 from docx import Document
 import matplotlib.pyplot as plt
 import io
-import tempfile
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 from email.mime.application import MIMEApplication
@@ -15,6 +15,8 @@ from fpdf import FPDF
 import getpass
 import pandas as pd
 import seaborn as sns
 # Load environment variables from .env file
 load_dotenv()
@@ -238,11 +240,14 @@ def plot_risk_assessment_matrix(detected_risks):
     for i in range(len(detected_risks)):
         ax.annotate(detected_risks[i]['phrase'], (likelihood[i], impact[i]))
-    # Save to a temporary file
-    with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
-        plt.savefig(tmpfile.name, format="png", bbox_inches='tight')
-        plt.close()
-        return tmpfile.name  # Return the file path
 # Function to plot risk level distribution pie chart
 def plot_risk_level_distribution(detected_risks):
@@ -255,11 +260,14 @@ def plot_risk_level_distribution(detected_risks):
     plt.title("Risk Level Distribution", fontsize=10)
-    # Save to a temporary file
-    with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
-        plt.savefig(tmpfile.name, format="png", bbox_inches='tight')
-        plt.close()
-        return tmpfile.name  # Return the file path
 # Function to plot risks by type bar chart
 def plot_risks_by_type(detected_risks):
@@ -272,11 +280,14 @@ def plot_risks_by_type(detected_risks):
     ax.set_title("Risks by Type", fontsize=10)
     ax.set_ylabel("Count")
-    # Save to a temporary file
-    with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
-        plt.savefig(tmpfile.name, format="png", bbox_inches='tight')
-        plt.close()
-        return tmpfile.name  # Return the file path
 # Function to plot stacked bar chart of risks by level
 def plot_stacked_bar_chart(detected_risks):
@@ -291,11 +302,14 @@ def plot_stacked_bar_chart(detected_risks):
     ax.set_title("Stacked Bar Chart of Risks by Level", fontsize=10)
     ax.set_ylabel("Count")
-    # Save to a temporary file
-    with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
-        plt.savefig(tmpfile.name, format="png", bbox_inches='tight')
-        plt.close()
-        return tmpfile.name  # Return the file path
 # Function to plot risk heatmap
 def plot_risk_heatmap(detected_risks):
@@ -312,14 +326,21 @@ def plot_risk_heatmap(detected_risks):
     sns.heatmap(heatmap_data.pivot_table(index='Risk Level', values='Count'), annot=True, cmap='YlGnBu', ax=ax)
     ax.set_title("Risk Heatmap")
-    # Save to a temporary file
-    with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
-        plt.savefig(tmpfile.name, format="png", bbox_inches='tight')
-        plt.close()
-        return tmpfile.name  # Return the file path
 # Function to generate PDF document with improved aesthetics
-def generate_pdf_analysis(document_text, summary, detected_clauses, hidden_obligations, detected_risks, risk_assessment_matrix_path, risk_level_distribution_path, risks_by_type_path, stacked_bar_chart_path, risk_heatmap_path):
     pdf = FPDF()
     pdf.add_page()
@@ -352,15 +373,15 @@ def generate_pdf_analysis(document_text, summary, detected_clauses, hidden_oblig
     pdf.ln(10)
     # Add visualizations for risks
-    pdf.image(risk_assessment_matrix_path, x=10, y=pdf.get_y(), w=90)
-    pdf.image(risk_level_distribution_path, x=110, y=pdf.get_y()-50, w=90)  # Position next to the first image
     pdf.ln(60)
-    pdf.image(risks_by_type_path, x=10, y=pdf.get_y(), w=90)
-    pdf.image(stacked_bar_chart_path, x=110, y=pdf.get_y()-50, w=90)  # Position next to the previous image
     pdf.ln(60)
-    pdf.image(risk_heatmap_path, x=10, y=pdf.get_y(), w=190)  # Fit image to width
     pdf.ln(10)
     # Footer
@@ -368,11 +389,7 @@ def generate_pdf_analysis(document_text, summary, detected_clauses, hidden_oblig
     pdf.set_font("Arial", 'I', 8)
     pdf.cell(0, 10, f'Page {pdf.page_no()}', 0, 0, 'C')
-    # Save the PDF to a temporary file
-    pdf_file_path = tempfile.mktemp(suffix=".pdf")
-    pdf.output(pdf_file_path, 'F')
-    return pdf_file_path  # Return the path to the saved PDF
 # Function to handle chatbot interaction
 def chatbot_query(user_input):
@@ -435,8 +452,11 @@ def send_pdf_via_email(pdf_buffer, recipient_email):
     msg.attach(MIMEText("Please find the attached analysis of your legal document.", 'plain'))
     # Attach the PDF
-    pdf_attachment = io.BytesIO(pdf_buffer.getvalue())
     pdf_attachment.seek(0)
     part = MIMEApplication(pdf_attachment.read(), Name='legal_document_analysis.pdf')
     part['Content-Disposition'] = 'attachment; filename="legal_document_analysis.pdf"'
     msg.attach(part)
@@ -494,7 +514,7 @@ def display_legal_analysis_page():
             st.error("Unsupported file type!")
             return
-        tabs = st.tabs(["📄 Document Text", "🔍 Summary", "🔑 Key Clauses", "🔒 Hidden Obligations", "⚠ Risk Analysis", "💡 Suggestions & Chatbot", "🔄 Update Tracker"])
         with tabs[0]:
             st.subheader("Document Text")
@@ -513,7 +533,6 @@ def display_legal_analysis_page():
                     with st.expander(clause['clause'], expanded=False):
                         st.write(f"*Summary:* {clause['summary']}")
                         st.write(f"*Context:* {clause['explanation']}")
             else:
                 st.write("No key clauses detected.")
@@ -544,18 +563,18 @@ def display_legal_analysis_page():
                 st.write("No risks detected.")
             # Generate all visualizations
-            risk_assessment_matrix_path = plot_risk_assessment_matrix(detected_risks)
-            risk_level_distribution_path = plot_risk_level_distribution(detected_risks)
-            risks_by_type_path = plot_risks_by_type(detected_risks)
-            stacked_bar_chart_path = plot_stacked_bar_chart(detected_risks)
-            risk_heatmap_path = plot_risk_heatmap(detected_risks)
             # Display the charts
-            st.image(risk_assessment_matrix_path, caption="Risk Assessment Matrix")
-            st.image(risk_level_distribution_path, caption="Risk Level Distribution")
-            st.image(risks_by_type_path, caption="Risks by Type")
-            st.image(stacked_bar_chart_path, caption="Stacked Bar Chart of Risks by Level")
-            st.image(risk_heatmap_path, caption="Risk Heatmap")
         with tabs[5]:
             st.subheader("Suggestions for Improvement")
@@ -575,13 +594,9 @@ def display_legal_analysis_page():
             # Download PDF Analysis Button
             st.subheader("Download Analysis as PDF")
-            pdf_file_path = generate_pdf_analysis(document_text, summary, detected_clauses, hidden_obligations, detected_risks, risk_assessment_matrix_path, risk_level_distribution_path, risks_by_type_path, stacked_bar_chart_path, risk_heatmap_path)
-            # Read PDF into BytesIO for download
             pdf_buffer = io.BytesIO()
-            with open(pdf_file_path, 'rb') as f:
-                pdf_buffer.write(f.read())
             pdf_buffer.seek(0)
             # Add download button for PDF
@@ -617,9 +632,59 @@ def display_legal_analysis_page():
                         with st.expander(update['update'], expanded=False):
                             suggestion = get_update_suggestion(update['update'])
                             st.write(f"*Suggestion:* {suggestion}")
                 else:
                     st.write("No updates detected.")
 # Run the application
 if __name__ == "__main__":
     display_legal_analysis_page()

 from docx import Document
 import matplotlib.pyplot as plt
 import io
+import base64
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 from email.mime.application import MIMEApplication
 import getpass
 import pandas as pd
 import seaborn as sns
+import requests
+from bs4 import BeautifulSoup
 # Load environment variables from .env file
 load_dotenv()
     for i in range(len(detected_risks)):
         ax.annotate(detected_risks[i]['phrase'], (likelihood[i], impact[i]))
+    buf = io.BytesIO()
+    plt.savefig(buf, format="png", bbox_inches='tight')
+    buf.seek(0)
+    img_str = base64.b64encode(buf.read()).decode('utf-8')
+    buf.close()
+    return img_str
 # Function to plot risk level distribution pie chart
 def plot_risk_level_distribution(detected_risks):
     plt.title("Risk Level Distribution", fontsize=10)
+    buf = io.BytesIO()
+    plt.savefig(buf, format="png", bbox_inches='tight')
+    buf.seek(0)
+    img_str = base64.b64encode(buf.read()).decode('utf-8')
+    buf.close()
+    return img_str
 # Function to plot risks by type bar chart
 def plot_risks_by_type(detected_risks):
     ax.set_title("Risks by Type", fontsize=10)
     ax.set_ylabel("Count")
+    buf = io.BytesIO()
+    plt.savefig(buf, format="png", bbox_inches='tight')
+    buf.seek(0)
+    img_str = base64.b64encode(buf.read()).decode('utf-8')
+    buf.close()
+    return img_str
 # Function to plot stacked bar chart of risks by level
 def plot_stacked_bar_chart(detected_risks):
     ax.set_title("Stacked Bar Chart of Risks by Level", fontsize=10)
     ax.set_ylabel("Count")
+    buf = io.BytesIO()
+    plt.savefig(buf, format="png", bbox_inches='tight')
+    buf.seek(0)
+    img_str = base64.b64encode(buf.read()).decode('utf-8')
+    buf.close()
+    return img_str
 # Function to plot risk heatmap
 def plot_risk_heatmap(detected_risks):
     sns.heatmap(heatmap_data.pivot_table(index='Risk Level', values='Count'), annot=True, cmap='YlGnBu', ax=ax)
     ax.set_title("Risk Heatmap")
+    buf = io.BytesIO()
+    plt.savefig(buf, format="png", bbox_inches='tight')
+    buf.seek(0)
+    img_str = base64.b64encode(buf.read()).decode('utf-8')
+    buf.close()
+    return img_str
+# Function to convert base64 to image
+def base64_to_image(data):
+    return io.BytesIO(base64.b64decode(data))
 # Function to generate PDF document with improved aesthetics
+def generate_pdf_analysis(document_text, summary, detected_clauses, hidden_obligations, detected_risks, risk_assessment_matrix, risk_level_distribution, risks_by_type, stacked_bar_chart, risk_heatmap):
     pdf = FPDF()
     pdf.add_page()
     pdf.ln(10)
     # Add visualizations for risks
+    pdf.image(base64_to_image(risk_assessment_matrix), x=10, y=pdf.get_y(), w=90)
+    pdf.image(base64_to_image(risk_level_distribution), x=110, y=pdf.get_y()-50, w=90)  # Position next to the first image
     pdf.ln(60)
+    pdf.image(base64_to_image(risks_by_type), x=10, y=pdf.get_y(), w=90)
+    pdf.image(base64_to_image(stacked_bar_chart), x=110, y=pdf.get_y()-50, w=90)  # Position next to the previous image
     pdf.ln(60)
+    pdf.image(base64_to_image(risk_heatmap), x=10, y=pdf.get_y(), w=190)  # Fit image to width
     pdf.ln(10)
     # Footer
     pdf.set_font("Arial", 'I', 8)
     pdf.cell(0, 10, f'Page {pdf.page_no()}', 0, 0, 'C')
+    return pdf
 # Function to handle chatbot interaction
 def chatbot_query(user_input):
     msg.attach(MIMEText("Please find the attached analysis of your legal document.", 'plain'))
     # Attach the PDF
+    pdf_attachment = io.BytesIO()
+    pdf_buffer.seek(0)
+    pdf_attachment.write(pdf_buffer.read())
     pdf_attachment.seek(0)
     part = MIMEApplication(pdf_attachment.read(), Name='legal_document_analysis.pdf')
     part['Content-Disposition'] = 'attachment; filename="legal_document_analysis.pdf"'
     msg.attach(part)
             st.error("Unsupported file type!")
             return
+        tabs = st.tabs(["📄 Document Text", "🔍 Summary", "🔑 Key Clauses", "🔒 Hidden Obligations", "⚠ Risk Analysis", "💡 Suggestions & Chatbot", "🔄 Update Tracker", "📜 GDPR Updates"])
         with tabs[0]:
             st.subheader("Document Text")
                     with st.expander(clause['clause'], expanded=False):
                         st.write(f"*Summary:* {clause['summary']}")
                         st.write(f"*Context:* {clause['explanation']}")
             else:
                 st.write("No key clauses detected.")
                 st.write("No risks detected.")
             # Generate all visualizations
+            risk_assessment_matrix = plot_risk_assessment_matrix(detected_risks)
+            risk_level_distribution = plot_risk_level_distribution(detected_risks)
+            risks_by_type = plot_risks_by_type(detected_risks)
+            stacked_bar_chart = plot_stacked_bar_chart(detected_risks)
+            risk_heatmap = plot_risk_heatmap(detected_risks)
             # Display the charts
+            st.image(f"data:image/png;base64,{risk_assessment_matrix}", caption="Risk Assessment Matrix")
+            st.image(f"data:image/png;base64,{risk_level_distribution}", caption="Risk Level Distribution")
+            st.image(f"data:image/png;base64,{risks_by_type}", caption="Risks by Type")
+            st.image(f"data:image/png;base64,{stacked_bar_chart}", caption="Stacked Bar Chart of Risks by Level")
+            st.image(f"data:image/png;base64,{risk_heatmap}", caption="Risk Heatmap")
         with tabs[5]:
             st.subheader("Suggestions for Improvement")
             # Download PDF Analysis Button
             st.subheader("Download Analysis as PDF")
             pdf_buffer = io.BytesIO()
+            pdf = generate_pdf_analysis(document_text, summary, detected_clauses, hidden_obligations, detected_risks, risk_assessment_matrix, risk_level_distribution, risks_by_type, stacked_bar_chart, risk_heatmap)
+            pdf.output(pdf_buffer, 'F')
             pdf_buffer.seek(0)
             # Add download button for PDF
                         with st.expander(update['update'], expanded=False):
                             suggestion = get_update_suggestion(update['update'])
                             st.write(f"*Suggestion:* {suggestion}")
+                            # Additional functionality
+                            if st.button(f"Mark '{update['update']}' as addressed"):
+                                st.success(f"'{update['update']}' has been marked as addressed.")
                 else:
                     st.write("No updates detected.")
+        with tabs[7]:  # GDPR Updates Tab
+            st.subheader("GDPR Website Updates")
+            if st.button("Fetch Live Recitals"):
+                with st.spinner("Fetching updates..."):
+                    recitals = fetch_gdpr_recitals()
+                    if recitals:
+                        for number, details in recitals.items():
+                            st.markdown(f"*Recital {number}: {details['title']}*")
+                            st.write(details['content'])
+                    else:
+                        st.write("No recitals found.")
+# Function to fetch live recitals from the GDPR website
+def fetch_gdpr_recitals():
+    url = "https://gdpr-info.eu/recitals/"
+    response = requests.get(url)
+    # Check if the request was successful
+    if response.status_code != 200:
+        st.error("Failed to fetch data from the GDPR website.")
+        return {}
+    soup = BeautifulSoup(response.content, 'html.parser')
+    recitals = {}
+    # Locate all recital links
+    articles = soup.find_all('div', class_='artikel')
+    # Extract each recital's link and title
+    for i, article in enumerate(articles):
+        if i >= 3:  # Limit to the first 3 recitals
+            break
+        link = article.find('a')['href']
+        number = article.find('span', class_='nummer').text.strip('()')
+        title = article.find('span', class_='titel').text.strip()
+        # Fetch the content of each recital
+        rec_response = requests.get(link)
+        if rec_response.status_code == 200:
+            rec_soup = BeautifulSoup(rec_response.content, 'html.parser')
+            content = rec_soup.find('div', class_='entry-content').get_text(strip=True)
+            recitals[number] = {'title': title, 'content': content}
+        else:
+            print(f"Failed to fetch recital {number} from {link}")
+    return recitals
 # Run the application
 if __name__ == "__main__":
     display_legal_analysis_page()