# import streamlit as st # import os # import openai # import json # import pandas as pd # from docx import Document # from concurrent.futures import ThreadPoolExecutor, as_completed # from dotenv import load_dotenv # import time # # Load the OpenAI API key from environment variables # load_dotenv() # api_key = os.getenv("OPENAI_API_KEY") # openai.api_key = api_key # # Streamlit app layout # st.set_page_config(layout="wide") # # Add custom CSS for center alignment # st.markdown(""" # # """, unsafe_allow_html=True) # def extract_text_from_docx(docx_path): # doc = Document(docx_path) # return "\n".join([para.text for para in doc.paragraphs]) # def extract_terms_from_contract(contract_text): # prompt = ( # "You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains " # "various sections and subsections with terms related to budget constraints, types of allowable work, timelines, " # "penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and " # "structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. " # "Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection " # "contains multiple terms, list them all.\n\n" # "Contract text:\n" # f"{contract_text}\n\n" # "Provide the extracted terms in JSON format." # ) # retries = 2 # wait_time = 1 # for i in range(retries): # try: # response = openai.ChatCompletion.create( # model="gpt-4", # messages=[ # {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."}, # {"role": "user", "content": prompt}, # ], # max_tokens=4096, # n=1, # stop=None, # temperature=0.1, # ) # return response.choices[0].message["content"] # except openai.error.RateLimitError: # if i < retries - 1: # st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...") # time.sleep(wait_time) # wait_time *= 2 # Exponential backoff # else: # st.error("Rate limit exceeded. Please try again later.") # return None # def analyze_task_compliance(task_description, cost_estimate, contract_terms): # prompt = ( # "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. " # "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. " # "Your job is to analyze the task description and specify if it violates any conditions from the contract. " # "If there are violations, list the reasons for each violation.\n\n" # f"Contract terms:\n{json.dumps(contract_terms, indent=4)}\n\n" # f"Task description:\n{task_description}\n" # f"Cost estimate:\n{cost_estimate}\n\n" # "Provide the compliance analysis in a clear JSON format." # ) # retries = 5 # wait_time = 1 # for i in range(retries): # try: # response = openai.ChatCompletion.create( # model="gpt-4", # messages=[ # {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."}, # {"role": "user", "content": prompt}, # ], # max_tokens=4096, # n=1, # stop=None, # temperature=0.1, # stream=True, # ) # compliance_analysis = "" # for chunk in response: # chunk_text = chunk['choices'][0]['delta'].get('content', '') # compliance_analysis += chunk_text # st.write(chunk_text) # st.json(chunk_text) # return json.loads(compliance_analysis) # except openai.error.RateLimitError: # if i < retries - 1: # st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...") # time.sleep(wait_time) # wait_time *= 2 # Exponential backoff # else: # st.error("Rate limit exceeded. Please try again later.") # return None # def main(): # st.markdown("

Contract Compliance Analyzer

", unsafe_allow_html=True) # # File upload buttons one after another # st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file") # st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file") # submit_button = st.sidebar.button("Submit") # docx_file = st.session_state.get("docx_file") # data_file = st.session_state.get("data_file") # if submit_button and docx_file and data_file: # # Clear previous information # st.session_state.clear() # # Extract contract text and terms # contract_text = extract_text_from_docx(docx_file) # extracted_terms_json = extract_terms_from_contract(contract_text) # if extracted_terms_json is None: # return # try: # contract_terms = json.loads(extracted_terms_json) # except json.JSONDecodeError as e: # st.error(f"JSON decoding error: {e}") # return # # Read task descriptions and cost estimates from XLSX or CSV # if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": # tasks_df = pd.read_excel(data_file) # else: # tasks_df = pd.read_csv(data_file) # compliance_results = [] # futures = [] # # Use ThreadPoolExecutor to analyze tasks concurrently # with ThreadPoolExecutor(max_workers=10) as executor: # Adjust max_workers as needed # for _, row in tasks_df.iterrows(): # task_description = row['Task Description'] # cost_estimate = row['Amount'] # futures.append(executor.submit(analyze_task_compliance, task_description, cost_estimate, contract_terms)) # for future in as_completed(futures): # try: # result = future.result() # if result is not None: # compliance_results.append(result) # except Exception as e: # st.error(f"An error occurred: {e}") # col1, col2 = st.columns(2) # with col1: # st.write("Extracted Contract Terms:") # st.json(contract_terms) # # Download button for contract terms # st.download_button( # label="Download Contract Terms", # data=json.dumps(contract_terms, indent=4), # file_name="contract_terms.json", # mime="application/json" # ) # with col2: # st.write("Compliance Results:") # st.json(compliance_results) # # Download button for compliance results # compliance_results_json = json.dumps(compliance_results, indent=4) # st.download_button( # label="Download Compliance Results", # data=compliance_results_json, # file_name="compliance_results.json", # mime="application/json" # ) # if __name__ == "__main__": # main() # import streamlit as st # import os # import openai # import json # import pandas as pd # from docx import Document # from dotenv import load_dotenv # import time # # Load the OpenAI API key from environment variables # load_dotenv() # api_key = os.getenv("OPENAI_API_KEY") # openai.api_key = api_key # # Streamlit app layout # st.set_page_config(layout="wide") # # Add custom CSS for center alignment # st.markdown(""" # # """, unsafe_allow_html=True) # def extract_text_from_docx(docx_path): # doc = Document(docx_path) # return "\n".join([para.text for para in doc.paragraphs]) # def extract_terms_from_contract(contract_text): # prompt = ( # "You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains " # "various sections and subsections with terms related to budget constraints, types of allowable work, timelines, " # "penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and " # "structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. " # "Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection " # "contains multiple terms, list them all.\n\n" # "Contract text:\n" # f"{contract_text}\n\n" # "Provide the extracted terms in JSON format." # ) # try: # response = openai.ChatCompletion.create( # model="gpt-4", # messages=[ # {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."}, # {"role": "user", "content": prompt}, # ], # max_tokens=4096, # n=1, # stop=None, # temperature=0.1, # ) # return response.choices[0].message["content"] # except openai.error.OpenAIError as e: # st.error(f"Error extracting terms from contract: {e}") # return None # def analyze_task_compliance(task_description, cost_estimate, contract_text): # prompt = ( # "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. " # "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. " # "Your job is to analyze the task description and specify if it violates any conditions from the contract. " # "If there are violations, list the reasons for each violation.\n\n" # f"Contract terms:\n{contract_text}\n\n" # f"Task description:\n{task_description}\n" # f"Cost estimate:\n{cost_estimate}\n\n" # "Provide the compliance analysis in a clear JSON format." # ) # try: # response = openai.ChatCompletion.create( # model="gpt-4", # messages=[ # {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."}, # {"role": "user", "content": prompt}, # ], # max_tokens=4096, # n=1, # stop=None, # temperature=0.1, # ) # return json.loads(response.choices[0].message["content"]) # except openai.error.OpenAIError as e: # st.error(f"Error analyzing task compliance: {e}") # return None # def main(): # start = time.time() # st.markdown("

Contract Compliance Analyzer

", unsafe_allow_html=True) # # File upload buttons one after another # st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file") # st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file") # submit_button = st.sidebar.button("Submit") # docx_file = st.session_state.get("docx_file") # data_file = st.session_state.get("data_file") # if submit_button and docx_file and data_file: # # Clear previous information # st.session_state.clear() # # Extract contract text and terms # contract_text = extract_text_from_docx(docx_file) # extracted_terms_json = extract_terms_from_contract(contract_text) # if extracted_terms_json is None: # return # try: # contract_terms = json.loads(extracted_terms_json) # except json.JSONDecodeError as e: # st.error(f"JSON decoding error: {e}") # return # # Introducing a 1-second delay before analyzing task compliance # time.sleep(8) # # Read task descriptions and cost estimates from XLSX or CSV # if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": # tasks_df = pd.read_excel(data_file) # else: # tasks_df = pd.read_csv(data_file) # compliance_results = [] # # Process tasks sequentially # for _, row in tasks_df.iterrows(): # task_description = row['Task Description'] # cost_estimate = row['Amount'] # result = analyze_task_compliance(task_description, cost_estimate, contract_text) # if result is not None: # compliance_results.append(result) # col1, col2 = st.columns(2) # with col1: # st.write("Extracted Contract Terms:") # st.json(contract_terms) # # Download button for contract terms # st.download_button( # label="Download Contract Terms", # data=json.dumps(contract_terms, indent=4), # file_name="contract_terms.json", # mime="application/json" # ) # with col2: # st.write("Compliance Results:") # st.json(compliance_results) # # Download button for compliance results # compliance_results_json = json.dumps(compliance_results, indent=4) # st.download_button( # label="Download Compliance Results", # data=compliance_results_json, # file_name="compliance_results.json", # mime="application/json" # ) # end = time.time() # print("Total Time: ", end-start) # if __name__ == "__main__": # main() import streamlit as st import os import openai import json import pandas as pd from docx import Document from dotenv import load_dotenv import time import retrying # Load the OpenAI API key from environment variables load_dotenv() api_key = os.getenv("OPENAI_API_KEY") openai.api_key = api_key # Streamlit app layout st.set_page_config(layout="wide") # Add custom CSS for center alignment st.markdown(""" """, unsafe_allow_html=True) def extract_text_from_docx(docx_path): doc = Document(docx_path) return "\n".join([para.text for para in doc.paragraphs]) def extract_terms_from_contract(contract_text): prompt = ( "You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains " "various sections and subsections with terms related to budget constraints, types of allowable work, timelines, " "penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and " "structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. " "Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection " "contains multiple terms, list them all.\n\n" "Contract text:\n" f"{contract_text}\n\n" "Provide the extracted terms in JSON format." ) try: response = openai.ChatCompletion.create( model="gpt-4", messages=[ {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."}, {"role": "user", "content": prompt}, ], max_tokens=4096, n=1, stop=None, temperature=0.1, ) return response.choices[0].message["content"] except openai.error.OpenAIError as e: st.error(f"Error extracting terms from contract: {e}") return None # Add a retry decorator with exponential backoff @retrying.retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5) def analyze_task_compliance(task_description, cost_estimate, contract_text): prompt = ( "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. " "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. " "Your job is to analyze the task description and specify if it violates any conditions from the contract. " "If there are violations, list the reasons for each violation.\n\n" f"Contract terms:\n{contract_text}\n\n" f"Task description:\n{task_description}\n" f"Cost estimate:\n{cost_estimate}\n\n" "Provide the compliance analysis in a clear JSON format." ) try: response = openai.ChatCompletion.create( model="gpt-4", messages=[ {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."}, {"role": "user", "content": prompt}, ], max_tokens=4096, n=1, stop=None, temperature=0.1, ) return json.loads(response.choices[0].message["content"]) except openai.error.OpenAIError as e: st.error(f"Error analyzing task compliance: {e}") return None def main(): start = time.time() st.markdown("

Contract Compliance Analyzer

", unsafe_allow_html=True) # File upload buttons one after another st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file") st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file") submit_button = st.sidebar.button("Submit") docx_file = st.session_state.get("docx_file") data_file = st.session_state.get("data_file") if submit_button and docx_file and data_file: # Clear previous information st.session_state.clear() # Extract contract text and terms contract_text = extract_text_from_docx(docx_file) extracted_terms_json = extract_terms_from_contract(contract_text) if extracted_terms_json is None: return try: contract_terms = json.loads(extracted_terms_json) except json.JSONDecodeError as e: st.error(f"JSON decoding error: {e}") return # Read task descriptions and cost estimates from XLSX or CSV if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": tasks_df = pd.read_excel(data_file) else: tasks_df = pd.read_csv(data_file) compliance_results = [] # Process tasks sequentially for _, row in tasks_df.iterrows(): task_description = row['Task Description'] cost_estimate = row['Amount'] result = analyze_task_compliance(task_description, cost_estimate, contract_text) if result is not None: compliance_results.append(result) col1, col2 = st.columns(2) with col1: st.write("Extracted Contract Terms:") st.json(contract_terms) # Download button for contract terms st.download_button( label="Download Contract Terms", data=json.dumps(contract_terms, indent=4), file_name="contract_terms.json", mime="application/json" ) with col2: st.write("Compliance Results:") st.json(compliance_results) # Download button for compliance results compliance_results_json = json.dumps(compliance_results, indent=4) st.download_button( label="Download Compliance Results", data=compliance_results_json, file_name="compliance_results.json", mime="application/json" ) end = time.time() print("Total Time: ", end-start) if __name__ == "__main__": main()