import streamlit as st import os import openai import json import pandas as pd from docx import Document from concurrent.futures import ThreadPoolExecutor, as_completed from dotenv import load_dotenv # Load the OpenAI API key from environment variables load_dotenv() api_key = os.getenv("OPENAI_API_KEY") openai.api_key = api_key def extract_text_from_docx(docx_path): doc = Document(docx_path) return "\n".join([para.text for para in doc.paragraphs]) def extract_terms_from_contract(contract_text): prompt = ( "You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains " "various sections and subsections with terms related to budget constraints, types of allowable work, timelines, " "penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and " "structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. " "Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection " "contains multiple terms, list them all.\n\n" "Contract text:\n" f"{contract_text}\n\n" "Provide the extracted terms in JSON format." ) response = openai.ChatCompletion.create( model="gpt-4", messages=[ {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."}, {"role": "user", "content": prompt}, ], max_tokens=4096, n=1, stop=None, temperature=0.1, ) return response.choices[0].message["content"] def analyze_task_compliance(task_description, cost_estimate, contract_terms): prompt = ( "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. " "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. " "Your job is to analyze the task description and specify if it violates any conditions from the contract. " "If there are violations, list the reasons for each violation.\n\n" f"Contract terms:\n{json.dumps(contract_terms, indent=4)}\n\n" f"Task description:\n{task_description}\n" f"Cost estimate:\n{cost_estimate}\n\n" "Provide the compliance analysis in a clear JSON format." ) response = openai.ChatCompletion.create( model="gpt-4", messages=[ {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."}, {"role": "user", "content": prompt}, ], max_tokens=4096, n=1, stop=None, temperature=0.1, ) # Parse the response to extract structured explanations response_content = response.choices[0].message["content"] compliance_analysis = json.loads(response_content) return compliance_analysis def main(): st.title("Contract Compliance Analyzer") # File upload buttons in the same row col1, col2 = st.columns(2) with col1: docx_file = st.file_uploader("Upload Contract Document (DOCX)", type="docx") with col2: xlsx_file = st.file_uploader("Upload Task Descriptions (XLSX)", type="xlsx") if docx_file and xlsx_file: # Extract contract text and terms contract_text = extract_text_from_docx(docx_file) extracted_terms_json = extract_terms_from_contract(contract_text) try: contract_terms = json.loads(extracted_terms_json) except json.JSONDecodeError as e: st.error(f"JSON decoding error: {e}") return # Read task descriptions and cost estimates from XLSX tasks_df = pd.read_excel(xlsx_file) compliance_results = [] futures = [] # Use ThreadPoolExecutor to analyze tasks concurrently with ThreadPoolExecutor(max_workers=10) as executor: # Adjust max_workers as needed for _, row in tasks_df.iterrows(): task_description = row['Task Description'] cost_estimate = row['Amount'] futures.append(executor.submit(analyze_task_compliance, task_description, cost_estimate, contract_terms)) for future in as_completed(futures): try: result = future.result() compliance_results.append(result) except Exception as e: st.error(f"An error occurred: {e}") col1, col2 = st.columns(2) with col1: st.write("Extracted Contract Terms:") st.json(contract_terms) # Download button for contract terms st.download_button( label="Download Contract Terms", data=json.dumps(contract_terms, indent=4), file_name="contract_terms.json", mime="application/json" ) with col2: st.write("Compliance Results:") st.json(compliance_results) # Download button for compliance results compliance_results_json = json.dumps(compliance_results, indent=4) st.download_button( label="Download Compliance Results", data=compliance_results_json, file_name="compliance_results.json", mime="application/json" ) if __name__ == "__main__": main()