| # import streamlit as st | |
| # import os | |
| # import openai | |
| # import json | |
| # import pandas as pd | |
| # from docx import Document | |
| # from concurrent.futures import ThreadPoolExecutor, as_completed | |
| # from dotenv import load_dotenv | |
| # import time | |
| # # Load the OpenAI API key from environment variables | |
| # load_dotenv() | |
| # api_key = os.getenv("OPENAI_API_KEY") | |
| # openai.api_key = api_key | |
| # # Streamlit app layout | |
| # st.set_page_config(layout="wide") | |
| # # Add custom CSS for center alignment | |
| # st.markdown(""" | |
| # <style> | |
| # .centered-title { | |
| # text-align: center; | |
| # font-size: 2.5em; | |
| # margin-top: 0; | |
| # } | |
| # </style> | |
| # """, unsafe_allow_html=True) | |
| # def extract_text_from_docx(docx_path): | |
| # doc = Document(docx_path) | |
| # return "\n".join([para.text for para in doc.paragraphs]) | |
| # def extract_terms_from_contract(contract_text): | |
| # prompt = ( | |
| # "You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains " | |
| # "various sections and subsections with terms related to budget constraints, types of allowable work, timelines, " | |
| # "penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and " | |
| # "structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. " | |
| # "Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection " | |
| # "contains multiple terms, list them all.\n\n" | |
| # "Contract text:\n" | |
| # f"{contract_text}\n\n" | |
| # "Provide the extracted terms in JSON format." | |
| # ) | |
| # retries = 2 | |
| # wait_time = 1 | |
| # for i in range(retries): | |
| # try: | |
| # response = openai.ChatCompletion.create( | |
| # model="gpt-4", | |
| # messages=[ | |
| # {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."}, | |
| # {"role": "user", "content": prompt}, | |
| # ], | |
| # max_tokens=4096, | |
| # n=1, | |
| # stop=None, | |
| # temperature=0.1, | |
| # ) | |
| # return response.choices[0].message["content"] | |
| # except openai.error.RateLimitError: | |
| # if i < retries - 1: | |
| # st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...") | |
| # time.sleep(wait_time) | |
| # wait_time *= 2 # Exponential backoff | |
| # else: | |
| # st.error("Rate limit exceeded. Please try again later.") | |
| # return None | |
| # def analyze_task_compliance(task_description, cost_estimate, contract_terms): | |
| # prompt = ( | |
| # "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. " | |
| # "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. " | |
| # "Your job is to analyze the task description and specify if it violates any conditions from the contract. " | |
| # "If there are violations, list the reasons for each violation.\n\n" | |
| # f"Contract terms:\n{json.dumps(contract_terms, indent=4)}\n\n" | |
| # f"Task description:\n{task_description}\n" | |
| # f"Cost estimate:\n{cost_estimate}\n\n" | |
| # "Provide the compliance analysis in a clear JSON format." | |
| # ) | |
| # retries = 5 | |
| # wait_time = 1 | |
| # for i in range(retries): | |
| # try: | |
| # response = openai.ChatCompletion.create( | |
| # model="gpt-4", | |
| # messages=[ | |
| # {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."}, | |
| # {"role": "user", "content": prompt}, | |
| # ], | |
| # max_tokens=4096, | |
| # n=1, | |
| # stop=None, | |
| # temperature=0.1, | |
| # stream=True, | |
| # ) | |
| # compliance_analysis = "" | |
| # for chunk in response: | |
| # chunk_text = chunk['choices'][0]['delta'].get('content', '') | |
| # compliance_analysis += chunk_text | |
| # st.write(chunk_text) | |
| # st.json(chunk_text) | |
| # return json.loads(compliance_analysis) | |
| # except openai.error.RateLimitError: | |
| # if i < retries - 1: | |
| # st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...") | |
| # time.sleep(wait_time) | |
| # wait_time *= 2 # Exponential backoff | |
| # else: | |
| # st.error("Rate limit exceeded. Please try again later.") | |
| # return None | |
| # def main(): | |
| # st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True) | |
| # # File upload buttons one after another | |
| # st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file") | |
| # st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file") | |
| # submit_button = st.sidebar.button("Submit") | |
| # docx_file = st.session_state.get("docx_file") | |
| # data_file = st.session_state.get("data_file") | |
| # if submit_button and docx_file and data_file: | |
| # # Clear previous information | |
| # st.session_state.clear() | |
| # # Extract contract text and terms | |
| # contract_text = extract_text_from_docx(docx_file) | |
| # extracted_terms_json = extract_terms_from_contract(contract_text) | |
| # if extracted_terms_json is None: | |
| # return | |
| # try: | |
| # contract_terms = json.loads(extracted_terms_json) | |
| # except json.JSONDecodeError as e: | |
| # st.error(f"JSON decoding error: {e}") | |
| # return | |
| # # Read task descriptions and cost estimates from XLSX or CSV | |
| # if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": | |
| # tasks_df = pd.read_excel(data_file) | |
| # else: | |
| # tasks_df = pd.read_csv(data_file) | |
| # compliance_results = [] | |
| # futures = [] | |
| # # Use ThreadPoolExecutor to analyze tasks concurrently | |
| # with ThreadPoolExecutor(max_workers=10) as executor: # Adjust max_workers as needed | |
| # for _, row in tasks_df.iterrows(): | |
| # task_description = row['Task Description'] | |
| # cost_estimate = row['Amount'] | |
| # futures.append(executor.submit(analyze_task_compliance, task_description, cost_estimate, contract_terms)) | |
| # for future in as_completed(futures): | |
| # try: | |
| # result = future.result() | |
| # if result is not None: | |
| # compliance_results.append(result) | |
| # except Exception as e: | |
| # st.error(f"An error occurred: {e}") | |
| # col1, col2 = st.columns(2) | |
| # with col1: | |
| # st.write("Extracted Contract Terms:") | |
| # st.json(contract_terms) | |
| # # Download button for contract terms | |
| # st.download_button( | |
| # label="Download Contract Terms", | |
| # data=json.dumps(contract_terms, indent=4), | |
| # file_name="contract_terms.json", | |
| # mime="application/json" | |
| # ) | |
| # with col2: | |
| # st.write("Compliance Results:") | |
| # st.json(compliance_results) | |
| # # Download button for compliance results | |
| # compliance_results_json = json.dumps(compliance_results, indent=4) | |
| # st.download_button( | |
| # label="Download Compliance Results", | |
| # data=compliance_results_json, | |
| # file_name="compliance_results.json", | |
| # mime="application/json" | |
| # ) | |
| # if __name__ == "__main__": | |
| # main() | |
| # import streamlit as st | |
| # import os | |
| # import openai | |
| # import json | |
| # import pandas as pd | |
| # from docx import Document | |
| # from dotenv import load_dotenv | |
| # import time | |
| # # Load the OpenAI API key from environment variables | |
| # load_dotenv() | |
| # api_key = os.getenv("OPENAI_API_KEY") | |
| # openai.api_key = api_key | |
| # # Streamlit app layout | |
| # st.set_page_config(layout="wide") | |
| # # Add custom CSS for center alignment | |
| # st.markdown(""" | |
| # <style> | |
| # .centered-title { | |
| # text-align: center; | |
| # font-size: 2.5em; | |
| # margin-top: 0; | |
| # } | |
| # </style> | |
| # """, unsafe_allow_html=True) | |
| # def extract_text_from_docx(docx_path): | |
| # doc = Document(docx_path) | |
| # return "\n".join([para.text for para in doc.paragraphs]) | |
| # def extract_terms_from_contract(contract_text): | |
| # prompt = ( | |
| # "You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains " | |
| # "various sections and subsections with terms related to budget constraints, types of allowable work, timelines, " | |
| # "penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and " | |
| # "structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. " | |
| # "Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection " | |
| # "contains multiple terms, list them all.\n\n" | |
| # "Contract text:\n" | |
| # f"{contract_text}\n\n" | |
| # "Provide the extracted terms in JSON format." | |
| # ) | |
| # try: | |
| # response = openai.ChatCompletion.create( | |
| # model="gpt-4", | |
| # messages=[ | |
| # {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."}, | |
| # {"role": "user", "content": prompt}, | |
| # ], | |
| # max_tokens=4096, | |
| # n=1, | |
| # stop=None, | |
| # temperature=0.1, | |
| # ) | |
| # return response.choices[0].message["content"] | |
| # except openai.error.OpenAIError as e: | |
| # st.error(f"Error extracting terms from contract: {e}") | |
| # return None | |
| # def analyze_task_compliance(task_description, cost_estimate, contract_text): | |
| # prompt = ( | |
| # "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. " | |
| # "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. " | |
| # "Your job is to analyze the task description and specify if it violates any conditions from the contract. " | |
| # "If there are violations, list the reasons for each violation.\n\n" | |
| # f"Contract terms:\n{contract_text}\n\n" | |
| # f"Task description:\n{task_description}\n" | |
| # f"Cost estimate:\n{cost_estimate}\n\n" | |
| # "Provide the compliance analysis in a clear JSON format." | |
| # ) | |
| # try: | |
| # response = openai.ChatCompletion.create( | |
| # model="gpt-4", | |
| # messages=[ | |
| # {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."}, | |
| # {"role": "user", "content": prompt}, | |
| # ], | |
| # max_tokens=4096, | |
| # n=1, | |
| # stop=None, | |
| # temperature=0.1, | |
| # ) | |
| # return json.loads(response.choices[0].message["content"]) | |
| # except openai.error.OpenAIError as e: | |
| # st.error(f"Error analyzing task compliance: {e}") | |
| # return None | |
| # def main(): | |
| # start = time.time() | |
| # st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True) | |
| # # File upload buttons one after another | |
| # st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file") | |
| # st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file") | |
| # submit_button = st.sidebar.button("Submit") | |
| # docx_file = st.session_state.get("docx_file") | |
| # data_file = st.session_state.get("data_file") | |
| # if submit_button and docx_file and data_file: | |
| # # Clear previous information | |
| # st.session_state.clear() | |
| # # Extract contract text and terms | |
| # contract_text = extract_text_from_docx(docx_file) | |
| # extracted_terms_json = extract_terms_from_contract(contract_text) | |
| # if extracted_terms_json is None: | |
| # return | |
| # try: | |
| # contract_terms = json.loads(extracted_terms_json) | |
| # except json.JSONDecodeError as e: | |
| # st.error(f"JSON decoding error: {e}") | |
| # return | |
| # # Introducing a 1-second delay before analyzing task compliance | |
| # time.sleep(8) | |
| # # Read task descriptions and cost estimates from XLSX or CSV | |
| # if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": | |
| # tasks_df = pd.read_excel(data_file) | |
| # else: | |
| # tasks_df = pd.read_csv(data_file) | |
| # compliance_results = [] | |
| # # Process tasks sequentially | |
| # for _, row in tasks_df.iterrows(): | |
| # task_description = row['Task Description'] | |
| # cost_estimate = row['Amount'] | |
| # result = analyze_task_compliance(task_description, cost_estimate, contract_text) | |
| # if result is not None: | |
| # compliance_results.append(result) | |
| # col1, col2 = st.columns(2) | |
| # with col1: | |
| # st.write("Extracted Contract Terms:") | |
| # st.json(contract_terms) | |
| # # Download button for contract terms | |
| # st.download_button( | |
| # label="Download Contract Terms", | |
| # data=json.dumps(contract_terms, indent=4), | |
| # file_name="contract_terms.json", | |
| # mime="application/json" | |
| # ) | |
| # with col2: | |
| # st.write("Compliance Results:") | |
| # st.json(compliance_results) | |
| # # Download button for compliance results | |
| # compliance_results_json = json.dumps(compliance_results, indent=4) | |
| # st.download_button( | |
| # label="Download Compliance Results", | |
| # data=compliance_results_json, | |
| # file_name="compliance_results.json", | |
| # mime="application/json" | |
| # ) | |
| # end = time.time() | |
| # print("Total Time: ", end-start) | |
| # if __name__ == "__main__": | |
| # main() | |
| import streamlit as st | |
| import os | |
| import openai | |
| import json | |
| import pandas as pd | |
| from docx import Document | |
| from dotenv import load_dotenv | |
| import time | |
| import retrying | |
| # Load the OpenAI API key from environment variables | |
| load_dotenv() | |
| api_key = os.getenv("OPENAI_API_KEY") | |
| openai.api_key = api_key | |
| # Streamlit app layout | |
| st.set_page_config(layout="wide") | |
| # Add custom CSS for center alignment | |
| st.markdown(""" | |
| <style> | |
| .centered-title { | |
| text-align: center; | |
| font-size: 2.5em; | |
| margin-top: 0; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| def extract_text_from_docx(docx_path): | |
| doc = Document(docx_path) | |
| return "\n".join([para.text for para in doc.paragraphs]) | |
| def extract_terms_from_contract(contract_text): | |
| prompt = ( | |
| "You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains " | |
| "various sections and subsections with terms related to budget constraints, types of allowable work, timelines, " | |
| "penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and " | |
| "structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. " | |
| "Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection " | |
| "contains multiple terms, list them all.\n\n" | |
| "Contract text:\n" | |
| f"{contract_text}\n\n" | |
| "Provide the extracted terms in JSON format." | |
| ) | |
| try: | |
| response = openai.ChatCompletion.create( | |
| model="gpt-4", | |
| messages=[ | |
| {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."}, | |
| {"role": "user", "content": prompt}, | |
| ], | |
| max_tokens=4096, | |
| n=1, | |
| stop=None, | |
| temperature=0.1, | |
| ) | |
| return response.choices[0].message["content"] | |
| except openai.error.OpenAIError as e: | |
| st.error(f"Error extracting terms from contract: {e}") | |
| return None | |
| # Add a retry decorator with exponential backoff | |
| def analyze_task_compliance(task_description, cost_estimate, contract_text): | |
| prompt = ( | |
| "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. " | |
| "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. " | |
| "Your job is to analyze the task description and specify if it violates any conditions from the contract. " | |
| "If there are violations, list the reasons for each violation.\n\n" | |
| f"Contract terms:\n{contract_text}\n\n" | |
| f"Task description:\n{task_description}\n" | |
| f"Cost estimate:\n{cost_estimate}\n\n" | |
| "Provide the compliance analysis in a clear JSON format." | |
| ) | |
| try: | |
| response = openai.ChatCompletion.create( | |
| model="gpt-4", | |
| messages=[ | |
| {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."}, | |
| {"role": "user", "content": prompt}, | |
| ], | |
| max_tokens=4096, | |
| n=1, | |
| stop=None, | |
| temperature=0.1, | |
| ) | |
| return json.loads(response.choices[0].message["content"]) | |
| except openai.error.OpenAIError as e: | |
| st.error(f"Error analyzing task compliance: {e}") | |
| return None | |
| def main(): | |
| start = time.time() | |
| st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True) | |
| # File upload buttons one after another | |
| st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file") | |
| st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file") | |
| submit_button = st.sidebar.button("Submit") | |
| docx_file = st.session_state.get("docx_file") | |
| data_file = st.session_state.get("data_file") | |
| if submit_button and docx_file and data_file: | |
| # Clear previous information | |
| st.session_state.clear() | |
| # Extract contract text and terms | |
| contract_text = extract_text_from_docx(docx_file) | |
| extracted_terms_json = extract_terms_from_contract(contract_text) | |
| if extracted_terms_json is None: | |
| return | |
| try: | |
| contract_terms = json.loads(extracted_terms_json) | |
| except json.JSONDecodeError as e: | |
| st.error(f"JSON decoding error: {e}") | |
| return | |
| # Read task descriptions and cost estimates from XLSX or CSV | |
| if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": | |
| tasks_df = pd.read_excel(data_file) | |
| else: | |
| tasks_df = pd.read_csv(data_file) | |
| compliance_results = [] | |
| # Process tasks sequentially | |
| for _, row in tasks_df.iterrows(): | |
| task_description = row['Task Description'] | |
| cost_estimate = row['Amount'] | |
| result = analyze_task_compliance(task_description, cost_estimate, contract_text) | |
| if result is not None: | |
| compliance_results.append(result) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.write("Extracted Contract Terms:") | |
| st.json(contract_terms) | |
| # Download button for contract terms | |
| st.download_button( | |
| label="Download Contract Terms", | |
| data=json.dumps(contract_terms, indent=4), | |
| file_name="contract_terms.json", | |
| mime="application/json" | |
| ) | |
| with col2: | |
| st.write("Compliance Results:") | |
| st.json(compliance_results) | |
| # Download button for compliance results | |
| compliance_results_json = json.dumps(compliance_results, indent=4) | |
| st.download_button( | |
| label="Download Compliance Results", | |
| data=compliance_results_json, | |
| file_name="compliance_results.json", | |
| mime="application/json" | |
| ) | |
| end = time.time() | |
| print("Total Time: ", end-start) | |
| if __name__ == "__main__": | |
| main() | |