import streamlit as st from groq import Groq from PyPDF2 import PdfReader from docx import Document from tiktoken import get_encoding, Encoding import concurrent.futures import matplotlib.pyplot as plt import io import base64 import os # Groq API client initialization client = Groq(api_key="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") # Replace with your actual API key. def extract_text_from_pdf(file): reader = PdfReader(file) text = "" for page in reader.pages: text += page.extract_text() return text def extract_text_from_docx(file): doc = Document(file) text = "\n".join([paragraph.text for paragraph in doc.paragraphs]) return text def preprocess_text(text): return " ".join(text.replace("\n", " ").replace("\r", " ").split()) def get_default_encoding(): return get_encoding("cl100k_base") def split_into_chunks(text, token_limit=5500): encoding = get_default_encoding() words = text.split() chunks = [] current_chunk = [] current_tokens = 0 for word in words: word_tokens = len(encoding.encode(word + " ")) if current_tokens + word_tokens > token_limit: chunks.append(" ".join(current_chunk)) current_chunk = [word] current_tokens = word_tokens else: current_chunk.append(word) current_tokens += word_tokens if current_chunk: chunks.append(" ".join(current_chunk)) return chunks def summarize_text(text): try: response = client.chat.completions.create( messages=[{ "role": "user", "content": f"Summarize the following legal document in a concise manner: {text}" }], model="llama-3.1-8b-instant", stream=False ) if response and response.choices: return response.choices[0].message.content else: return "Error: Received an empty or invalid response from Groq API." except Exception as e: return f"Error generating summary: {e}" def summarize_large_text(text, chunk_limit=5000): chunks = split_into_chunks(text, token_limit=chunk_limit) summaries = [] for chunk in chunks: summaries.append(summarize_text(chunk)) return " ".join(summaries) def detect_key_clauses(text): key_clauses = [ {"clause": "confidentiality", "summary": "Confidentiality clauses ensure that sensitive information remains protected."}, {"clause": "liability", "summary": "Liability clauses outline the responsibility for damages or losses incurred."}, {"clause": "termination", "summary": "Termination clauses specify the conditions under which a contract may be ended."}, {"clause": "force majeure", "summary": "Force majeure clauses excuse parties from performance obligations due to unforeseen events."}, {"clause": "governing law", "summary": "Governing law clauses specify which jurisdiction's laws will govern the contract."}, {"clause": "dispute resolution", "summary": "Dispute resolution clauses specify how conflicts between parties will be resolved."}, {"clause": "amendment", "summary": "Amendment clauses outline the process for changing the terms of the contract."}, {"clause": "warranty", "summary": "Warranty clauses provide assurances regarding the quality or condition of goods or services."}, ] detected_clauses = [] for clause in key_clauses: if clause["clause"].lower() in text.lower(): clause_start = text.lower().find(clause["clause"].lower()) context = text[clause_start - 50: clause_start + 200] explanation = f"The document mentions '{clause['clause']}' clause. Context: {context.strip()}..." detected_clauses.append({ "clause": clause["clause"].capitalize(), "summary": clause["summary"], "explanation": explanation }) return detected_clauses def detect_hidden_obligations_or_dependencies(text, summary): hidden_obligations = [ {"phrase": "dependent upon", "summary": "This suggests that some action is conditional upon another."}, {"phrase": "if", "summary": "This indicates that certain conditions must be met to fulfill the obligation."}, {"phrase": "may be required", "summary": "Implies that the party could be obligated to perform an action under specific conditions."}, {"phrase": "should", "summary": "Implies a recommendation or requirement, though not explicitly mandatory."}, {"phrase": "obligated to", "summary": "Indicates a clear, binding duty to perform an action."}, ] hidden_dependencies = [] for item in hidden_obligations: if item["phrase"].lower() in text.lower() or item["phrase"].lower() in summary.lower(): phrase_start = text.lower().find(item["phrase"].lower()) context = text[phrase_start - 50: phrase_start + 200] hidden_dependencies.append({ "phrase": item["phrase"], "summary": item["summary"], "context": context.strip() }) return hidden_dependencies def detect_risks(text, summary): risk_phrases = [ {"phrase": "penalty", "summary": "Penalty clauses may impose financial or legal consequences on the parties involved."}, {"phrase": "liability", "summary": "Liability clauses may indicate potential financial responsibility or legal risks."}, {"phrase": "default", "summary": "Default clauses can expose parties to consequences for failure to perform obligations."}, {"phrase": "breach", "summary": "Breach of contract can lead to serious legal consequences including financial penalties."}, {"phrase": "suspension", "summary": "Suspension clauses may indicate risks of halting services or operations in case of non-compliance."}, ] detected_risks = [] for item in risk_phrases: if item["phrase"].lower() in text.lower() or item["phrase"].lower() in summary.lower(): phrase_start = text.lower().find(item["phrase"].lower()) context = text[phrase_start - 50: phrase_start + 200] detected_risks.append({ "phrase": item["phrase"], "summary": item["summary"], "context": context.strip() }) return detected_risks def plot_risk_pie_chart(detected_clauses, hidden_obligations, detected_risks): # Calculate counts for each category num_clauses = len(detected_clauses) num_obligations = len(hidden_obligations) num_risks = len(detected_risks) # Create a pie chart labels = ['Detected Key Clauses', 'Hidden Obligations or Dependencies', 'Detected Risks'] sizes = [num_clauses, num_obligations, num_risks] colors = ['#ff9999','#66b3ff','#99ff99'] fig, ax = plt.subplots() ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors, wedgeprops={'edgecolor': 'black'}) ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle. # Create a buffer to save the plot as an image in memory buf = io.BytesIO() plt.savefig(buf, format="png") buf.seek(0) # Encode the image to base64 img_str = base64.b64encode(buf.read()).decode('utf-8') buf.close() return img_str def generate_analysis_document(document_text, summary, detected_clauses, hidden_obligations, detected_risks): doc = Document() doc.add_heading('Legal Document Analysis', level=1) doc.add_heading('Extracted Document Text', level=2) doc.add_paragraph(document_text) doc.add_heading('Summary', level=2) doc.add_paragraph(summary) doc.add_heading('Key Clauses', level=2) if detected_clauses: for clause in detected_clauses: doc.add_paragraph(f"Clause: {clause['clause']}") doc.add_paragraph(f"Summary: {clause['summary']}") doc.add_paragraph(f"Explanation: {clause['explanation']}") else: doc.add_paragraph("No key clauses detected.") doc.add_heading('Hidden Obligations or Dependencies', level=2) if hidden_obligations: for obligation in hidden_obligations: doc.add_paragraph(f"Phrase: {obligation['phrase']}") doc.add_paragraph(f"Summary: {obligation['summary']}") doc.add_paragraph(f"Context: {obligation['context']}") else: doc.add_paragraph("No hidden obligations detected.") doc.add_heading('Risks', level=2) if detected_risks: for risk in detected_risks: doc.add_paragraph(f"Risk Phrase: {risk['phrase']}") doc.add_paragraph(f"Summary: {risk['summary']}") doc.add_paragraph(f"Context: {risk['context']}") else: doc.add_paragraph("No risks detected.") return doc def display_legal_analysis_page(): st.title("Legal Document Analysis with Groq API") uploaded_file = st.file_uploader("Upload your legal document (PDF or DOCX)", type=["pdf", "docx"]) if uploaded_file: if uploaded_file.name.endswith(".pdf"): document_text = preprocess_text(extract_text_from_pdf(uploaded_file)) elif uploaded_file.name.endswith(".docx"): document_text = preprocess_text(extract_text_from_docx(uploaded_file)) else: st.error("Unsupported file type!") return tabs = st.tabs(["Document Text", "Summary", "Key Clauses", "Hidden Obligations or Dependencies", "Risk Analysis"]) with tabs[0]: st.subheader("Extracted Legal Document Text") st.text_area("Document Text", document_text, height=300) with tabs[1]: st.subheader("Quick Summary") summary = summarize_large_text(document_text) if "Error" in summary: st.warning("Summary generation failed.") summary = "Summary not available." st.write(summary) with tabs[2]: st.subheader("Detected Key Clauses") detected_clauses = detect_key_clauses(document_text) if not detected_clauses: st.write("No key clauses detected.") else: # Count occurrences of each detected clause clause_counts = {} for clause in detected_clauses: clause_counts[clause['clause']] = clause_counts.get(clause['clause'], 0) + 1 # Create a bar chart for detected clauses if clause_counts: labels = list(clause_counts.keys()) values = list(clause_counts.values()) fig, ax = plt.subplots() ax.bar(labels, values, color='skyblue') # Rotate x-axis labels for better visibility plt.xticks(rotation=45, ha='right') # Add titles and labels ax.set_title("Detected Key Clauses Visualization") ax.set_xlabel("Clause") ax.set_ylabel("Count") # Display the plot st.pyplot(fig) # Display details of each clause for clause in detected_clauses: if st.button(f"Show Explanation for {clause['clause']} Clause"): st.write(f"**Clause: {clause['clause']}**") st.write(f"Summary: {clause['summary']}\nExplanation: {clause['explanation']}") with tabs[3]: st.subheader("Detected Hidden Obligations or Dependencies") hidden_obligations = detect_hidden_obligations_or_dependencies(document_text, summary) if not hidden_obligations: st.write("No hidden obligations or dependencies detected.") else: for item in hidden_obligations: st.write(f"**Phrase: {item['phrase']}**") st.write(f"Summary: {item['summary']}\nContext: {item['context']}") with tabs[4]: st.subheader("Risk Analysis & Visualization") detected_clauses = detect_key_clauses(document_text) hidden_obligations = detect_hidden_obligations_or_dependencies(document_text, summary) detected_risks = detect_risks(document_text, summary) # Generate and display the pie chart img_str = plot_risk_pie_chart(detected_clauses, hidden_obligations, detected_risks) st.image(f"data:image/png;base64,{img_str}", use_column_width=True) # Display the detected risks after the visualization st.write("### Detected Risks:") if detected_risks: for risk in detected_risks: st.write(f"**{risk['phrase']}**: {risk['summary']}") # Optionally, show other categories (Key Clauses, Hidden Obligations) after risks st.write("### Detected Key Clauses:") for clause in detected_clauses: st.write(f"**{clause['clause']}**: {clause['explanation']}") st.write("### Hidden Obligations or Dependencies:") for obligation in hidden_obligations: st.write(f"**{obligation['phrase']}**: {obligation['summary']}") # Generate the full analysis document for download analysis_doc = generate_analysis_document(document_text, summary, detected_clauses, hidden_obligations, detected_risks) with st.expander("Download Analysis"): output_path = "analysis_report.docx" analysis_doc.save(output_path) with open(output_path, "rb") as f: st.download_button("Download Analysis", data=f, file_name="analysis_report.docx", mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document") if __name__ == "__main__": display_legal_analysis_page()