# import streamlit as st | |
# import os | |
# import openai | |
# import json | |
# import pandas as pd | |
# from docx import Document | |
# from concurrent.futures import ThreadPoolExecutor, as_completed | |
# from dotenv import load_dotenv | |
# import time | |
# # Load the OpenAI API key from environment variables | |
# load_dotenv() | |
# api_key = os.getenv("OPENAI_API_KEY") | |
# openai.api_key = api_key | |
# # Streamlit app layout | |
# st.set_page_config(layout="wide") | |
# # Add custom CSS for center alignment | |
# st.markdown(""" | |
# <style> | |
# .centered-title { | |
# text-align: center; | |
# font-size: 2.5em; | |
# margin-top: 0; | |
# } | |
# </style> | |
# """, unsafe_allow_html=True) | |
# def extract_text_from_docx(docx_path): | |
# doc = Document(docx_path) | |
# return "\n".join([para.text for para in doc.paragraphs]) | |
# def extract_terms_from_contract(contract_text): | |
# prompt = ( | |
# "You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains " | |
# "various sections and subsections with terms related to budget constraints, types of allowable work, timelines, " | |
# "penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and " | |
# "structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. " | |
# "Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection " | |
# "contains multiple terms, list them all.\n\n" | |
# "Contract text:\n" | |
# f"{contract_text}\n\n" | |
# "Provide the extracted terms in JSON format." | |
# ) | |
# retries = 2 | |
# wait_time = 1 | |
# for i in range(retries): | |
# try: | |
# response = openai.ChatCompletion.create( | |
# model="gpt-4", | |
# messages=[ | |
# {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."}, | |
# {"role": "user", "content": prompt}, | |
# ], | |
# max_tokens=4096, | |
# n=1, | |
# stop=None, | |
# temperature=0.1, | |
# ) | |
# return response.choices[0].message["content"] | |
# except openai.error.RateLimitError: | |
# if i < retries - 1: | |
# st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...") | |
# time.sleep(wait_time) | |
# wait_time *= 2 # Exponential backoff | |
# else: | |
# st.error("Rate limit exceeded. Please try again later.") | |
# return None | |
# def analyze_task_compliance(task_description, cost_estimate, contract_terms): | |
# prompt = ( | |
# "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. " | |
# "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. " | |
# "Your job is to analyze the task description and specify if it violates any conditions from the contract. " | |
# "If there are violations, list the reasons for each violation.\n\n" | |
# f"Contract terms:\n{json.dumps(contract_terms, indent=4)}\n\n" | |
# f"Task description:\n{task_description}\n" | |
# f"Cost estimate:\n{cost_estimate}\n\n" | |
# "Provide the compliance analysis in a clear JSON format." | |
# ) | |
# retries = 5 | |
# wait_time = 1 | |
# for i in range(retries): | |
# try: | |
# response = openai.ChatCompletion.create( | |
# model="gpt-4", | |
# messages=[ | |
# {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."}, | |
# {"role": "user", "content": prompt}, | |
# ], | |
# max_tokens=4096, | |
# n=1, | |
# stop=None, | |
# temperature=0.1, | |
# stream=True, | |
# ) | |
# compliance_analysis = "" | |
# for chunk in response: | |
# chunk_text = chunk['choices'][0]['delta'].get('content', '') | |
# compliance_analysis += chunk_text | |
# st.write(chunk_text) | |
# st.json(chunk_text) | |
# return json.loads(compliance_analysis) | |
# except openai.error.RateLimitError: | |
# if i < retries - 1: | |
# st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...") | |
# time.sleep(wait_time) | |
# wait_time *= 2 # Exponential backoff | |
# else: | |
# st.error("Rate limit exceeded. Please try again later.") | |
# return None | |
# def main(): | |
# st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True) | |
# # File upload buttons one after another | |
# st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file") | |
# st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file") | |
# submit_button = st.sidebar.button("Submit") | |
# docx_file = st.session_state.get("docx_file") | |
# data_file = st.session_state.get("data_file") | |
# if submit_button and docx_file and data_file: | |
# # Clear previous information | |
# st.session_state.clear() | |
# # Extract contract text and terms | |
# contract_text = extract_text_from_docx(docx_file) | |
# extracted_terms_json = extract_terms_from_contract(contract_text) | |
# if extracted_terms_json is None: | |
# return | |
# try: | |
# contract_terms = json.loads(extracted_terms_json) | |
# except json.JSONDecodeError as e: | |
# st.error(f"JSON decoding error: {e}") | |
# return | |
# # Read task descriptions and cost estimates from XLSX or CSV | |
# if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": | |
# tasks_df = pd.read_excel(data_file) | |
# else: | |
# tasks_df = pd.read_csv(data_file) | |
# compliance_results = [] | |
# futures = [] | |
# # Use ThreadPoolExecutor to analyze tasks concurrently | |
# with ThreadPoolExecutor(max_workers=10) as executor: # Adjust max_workers as needed | |
# for _, row in tasks_df.iterrows(): | |
# task_description = row['Task Description'] | |
# cost_estimate = row['Amount'] | |
# futures.append(executor.submit(analyze_task_compliance, task_description, cost_estimate, contract_terms)) | |
# for future in as_completed(futures): | |
# try: | |
# result = future.result() | |
# if result is not None: | |
# compliance_results.append(result) | |
# except Exception as e: | |
# st.error(f"An error occurred: {e}") | |
# col1, col2 = st.columns(2) | |
# with col1: | |
# st.write("Extracted Contract Terms:") | |
# st.json(contract_terms) | |
# # Download button for contract terms | |
# st.download_button( | |
# label="Download Contract Terms", | |
# data=json.dumps(contract_terms, indent=4), | |
# file_name="contract_terms.json", | |
# mime="application/json" | |
# ) | |
# with col2: | |
# st.write("Compliance Results:") | |
# st.json(compliance_results) | |
# # Download button for compliance results | |
# compliance_results_json = json.dumps(compliance_results, indent=4) | |
# st.download_button( | |
# label="Download Compliance Results", | |
# data=compliance_results_json, | |
# file_name="compliance_results.json", | |
# mime="application/json" | |
# ) | |
# if __name__ == "__main__": | |
# main() | |
# import streamlit as st | |
# import os | |
# import openai | |
# import json | |
# import pandas as pd | |
# from docx import Document | |
# from dotenv import load_dotenv | |
# import time | |
# # Load the OpenAI API key from environment variables | |
# load_dotenv() | |
# api_key = os.getenv("OPENAI_API_KEY") | |
# openai.api_key = api_key | |
# # Streamlit app layout | |
# st.set_page_config(layout="wide") | |
# # Add custom CSS for center alignment | |
# st.markdown(""" | |
# <style> | |
# .centered-title { | |
# text-align: center; | |
# font-size: 2.5em; | |
# margin-top: 0; | |
# } | |
# </style> | |
# """, unsafe_allow_html=True) | |
# def extract_text_from_docx(docx_path): | |
# doc = Document(docx_path) | |
# return "\n".join([para.text for para in doc.paragraphs]) | |
# def extract_terms_from_contract(contract_text): | |
# prompt = ( | |
# "You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains " | |
# "various sections and subsections with terms related to budget constraints, types of allowable work, timelines, " | |
# "penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and " | |
# "structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. " | |
# "Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection " | |
# "contains multiple terms, list them all.\n\n" | |
# "Contract text:\n" | |
# f"{contract_text}\n\n" | |
# "Provide the extracted terms in JSON format." | |
# ) | |
# try: | |
# response = openai.ChatCompletion.create( | |
# model="gpt-4", | |
# messages=[ | |
# {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."}, | |
# {"role": "user", "content": prompt}, | |
# ], | |
# max_tokens=4096, | |
# n=1, | |
# stop=None, | |
# temperature=0.1, | |
# ) | |
# return response.choices[0].message["content"] | |
# except openai.error.OpenAIError as e: | |
# st.error(f"Error extracting terms from contract: {e}") | |
# return None | |
# def analyze_task_compliance(task_description, cost_estimate, contract_text): | |
# prompt = ( | |
# "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. " | |
# "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. " | |
# "Your job is to analyze the task description and specify if it violates any conditions from the contract. " | |
# "If there are violations, list the reasons for each violation.\n\n" | |
# f"Contract terms:\n{contract_text}\n\n" | |
# f"Task description:\n{task_description}\n" | |
# f"Cost estimate:\n{cost_estimate}\n\n" | |
# "Provide the compliance analysis in a clear JSON format." | |
# ) | |
# try: | |
# response = openai.ChatCompletion.create( | |
# model="gpt-4", | |
# messages=[ | |
# {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."}, | |
# {"role": "user", "content": prompt}, | |
# ], | |
# max_tokens=4096, | |
# n=1, | |
# stop=None, | |
# temperature=0.1, | |
# ) | |
# return json.loads(response.choices[0].message["content"]) | |
# except openai.error.OpenAIError as e: | |
# st.error(f"Error analyzing task compliance: {e}") | |
# return None | |
# def main(): | |
# start = time.time() | |
# st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True) | |
# # File upload buttons one after another | |
# st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file") | |
# st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file") | |
# submit_button = st.sidebar.button("Submit") | |
# docx_file = st.session_state.get("docx_file") | |
# data_file = st.session_state.get("data_file") | |
# if submit_button and docx_file and data_file: | |
# # Clear previous information | |
# st.session_state.clear() | |
# # Extract contract text and terms | |
# contract_text = extract_text_from_docx(docx_file) | |
# extracted_terms_json = extract_terms_from_contract(contract_text) | |
# if extracted_terms_json is None: | |
# return | |
# try: | |
# contract_terms = json.loads(extracted_terms_json) | |
# except json.JSONDecodeError as e: | |
# st.error(f"JSON decoding error: {e}") | |
# return | |
# # Introducing a 1-second delay before analyzing task compliance | |
# time.sleep(8) | |
# # Read task descriptions and cost estimates from XLSX or CSV | |
# if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": | |
# tasks_df = pd.read_excel(data_file) | |
# else: | |
# tasks_df = pd.read_csv(data_file) | |
# compliance_results = [] | |
# # Process tasks sequentially | |
# for _, row in tasks_df.iterrows(): | |
# task_description = row['Task Description'] | |
# cost_estimate = row['Amount'] | |
# result = analyze_task_compliance(task_description, cost_estimate, contract_text) | |
# if result is not None: | |
# compliance_results.append(result) | |
# col1, col2 = st.columns(2) | |
# with col1: | |
# st.write("Extracted Contract Terms:") | |
# st.json(contract_terms) | |
# # Download button for contract terms | |
# st.download_button( | |
# label="Download Contract Terms", | |
# data=json.dumps(contract_terms, indent=4), | |
# file_name="contract_terms.json", | |
# mime="application/json" | |
# ) | |
# with col2: | |
# st.write("Compliance Results:") | |
# st.json(compliance_results) | |
# # Download button for compliance results | |
# compliance_results_json = json.dumps(compliance_results, indent=4) | |
# st.download_button( | |
# label="Download Compliance Results", | |
# data=compliance_results_json, | |
# file_name="compliance_results.json", | |
# mime="application/json" | |
# ) | |
# end = time.time() | |
# print("Total Time: ", end-start) | |
# if __name__ == "__main__": | |
# main() | |
import streamlit as st | |
import os | |
import openai | |
import json | |
import pandas as pd | |
from docx import Document | |
from dotenv import load_dotenv | |
import time | |
import retrying | |
# Load the OpenAI API key from environment variables | |
load_dotenv() | |
api_key = os.getenv("OPENAI_API_KEY") | |
openai.api_key = api_key | |
# Streamlit app layout | |
st.set_page_config(layout="wide") | |
# Add custom CSS for center alignment | |
st.markdown(""" | |
<style> | |
.centered-title { | |
text-align: center; | |
font-size: 2.5em; | |
margin-top: 0; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
def extract_text_from_docx(docx_path): | |
doc = Document(docx_path) | |
return "\n".join([para.text for para in doc.paragraphs]) | |
def extract_terms_from_contract(contract_text): | |
prompt = ( | |
"You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains " | |
"various sections and subsections with terms related to budget constraints, types of allowable work, timelines, " | |
"penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and " | |
"structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. " | |
"Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection " | |
"contains multiple terms, list them all.\n\n" | |
"Contract text:\n" | |
f"{contract_text}\n\n" | |
"Provide the extracted terms in JSON format." | |
) | |
try: | |
response = openai.ChatCompletion.create( | |
model="gpt-4", | |
messages=[ | |
{"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."}, | |
{"role": "user", "content": prompt}, | |
], | |
max_tokens=4096, | |
n=1, | |
stop=None, | |
temperature=0.1, | |
) | |
return response.choices[0].message["content"] | |
except openai.error.OpenAIError as e: | |
st.error(f"Error extracting terms from contract: {e}") | |
return None | |
# Add a retry decorator with exponential backoff | |
def analyze_task_compliance(task_description, cost_estimate, contract_text): | |
prompt = ( | |
"You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. " | |
"Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. " | |
"Your job is to analyze the task description and specify if it violates any conditions from the contract. " | |
"If there are violations, list the reasons for each violation.\n\n" | |
f"Contract terms:\n{contract_text}\n\n" | |
f"Task description:\n{task_description}\n" | |
f"Cost estimate:\n{cost_estimate}\n\n" | |
"Provide the compliance analysis in a clear JSON format." | |
) | |
try: | |
response = openai.ChatCompletion.create( | |
model="gpt-4", | |
messages=[ | |
{"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."}, | |
{"role": "user", "content": prompt}, | |
], | |
max_tokens=4096, | |
n=1, | |
stop=None, | |
temperature=0.1, | |
) | |
return json.loads(response.choices[0].message["content"]) | |
except openai.error.OpenAIError as e: | |
st.error(f"Error analyzing task compliance: {e}") | |
return None | |
def main(): | |
start = time.time() | |
st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True) | |
# File upload buttons one after another | |
st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file") | |
st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file") | |
submit_button = st.sidebar.button("Submit") | |
docx_file = st.session_state.get("docx_file") | |
data_file = st.session_state.get("data_file") | |
if submit_button and docx_file and data_file: | |
# Clear previous information | |
st.session_state.clear() | |
# Extract contract text and terms | |
contract_text = extract_text_from_docx(docx_file) | |
extracted_terms_json = extract_terms_from_contract(contract_text) | |
if extracted_terms_json is None: | |
return | |
try: | |
contract_terms = json.loads(extracted_terms_json) | |
except json.JSONDecodeError as e: | |
st.error(f"JSON decoding error: {e}") | |
return | |
# Read task descriptions and cost estimates from XLSX or CSV | |
if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": | |
tasks_df = pd.read_excel(data_file) | |
else: | |
tasks_df = pd.read_csv(data_file) | |
compliance_results = [] | |
# Process tasks sequentially | |
for _, row in tasks_df.iterrows(): | |
task_description = row['Task Description'] | |
cost_estimate = row['Amount'] | |
result = analyze_task_compliance(task_description, cost_estimate, contract_text) | |
if result is not None: | |
compliance_results.append(result) | |
col1, col2 = st.columns(2) | |
with col1: | |
st.write("Extracted Contract Terms:") | |
st.json(contract_terms) | |
# Download button for contract terms | |
st.download_button( | |
label="Download Contract Terms", | |
data=json.dumps(contract_terms, indent=4), | |
file_name="contract_terms.json", | |
mime="application/json" | |
) | |
with col2: | |
st.write("Compliance Results:") | |
st.json(compliance_results) | |
# Download button for compliance results | |
compliance_results_json = json.dumps(compliance_results, indent=4) | |
st.download_button( | |
label="Download Compliance Results", | |
data=compliance_results_json, | |
file_name="compliance_results.json", | |
mime="application/json" | |
) | |
end = time.time() | |
print("Total Time: ", end-start) | |
if __name__ == "__main__": | |
main() | |