Bikas0's picture
Update the code
f4f51e9
raw
history blame
21.2 kB
# import streamlit as st
# import os
# import openai
# import json
# import pandas as pd
# from docx import Document
# from concurrent.futures import ThreadPoolExecutor, as_completed
# from dotenv import load_dotenv
# import time
# # Load the OpenAI API key from environment variables
# load_dotenv()
# api_key = os.getenv("OPENAI_API_KEY")
# openai.api_key = api_key
# # Streamlit app layout
# st.set_page_config(layout="wide")
# # Add custom CSS for center alignment
# st.markdown("""
# <style>
# .centered-title {
# text-align: center;
# font-size: 2.5em;
# margin-top: 0;
# }
# </style>
# """, unsafe_allow_html=True)
# def extract_text_from_docx(docx_path):
# doc = Document(docx_path)
# return "\n".join([para.text for para in doc.paragraphs])
# def extract_terms_from_contract(contract_text):
# prompt = (
# "You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains "
# "various sections and subsections with terms related to budget constraints, types of allowable work, timelines, "
# "penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and "
# "structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. "
# "Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection "
# "contains multiple terms, list them all.\n\n"
# "Contract text:\n"
# f"{contract_text}\n\n"
# "Provide the extracted terms in JSON format."
# )
# retries = 2
# wait_time = 1
# for i in range(retries):
# try:
# response = openai.ChatCompletion.create(
# model="gpt-4",
# messages=[
# {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."},
# {"role": "user", "content": prompt},
# ],
# max_tokens=4096,
# n=1,
# stop=None,
# temperature=0.1,
# )
# return response.choices[0].message["content"]
# except openai.error.RateLimitError:
# if i < retries - 1:
# st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
# time.sleep(wait_time)
# wait_time *= 2 # Exponential backoff
# else:
# st.error("Rate limit exceeded. Please try again later.")
# return None
# def analyze_task_compliance(task_description, cost_estimate, contract_terms):
# prompt = (
# "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. "
# "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. "
# "Your job is to analyze the task description and specify if it violates any conditions from the contract. "
# "If there are violations, list the reasons for each violation.\n\n"
# f"Contract terms:\n{json.dumps(contract_terms, indent=4)}\n\n"
# f"Task description:\n{task_description}\n"
# f"Cost estimate:\n{cost_estimate}\n\n"
# "Provide the compliance analysis in a clear JSON format."
# )
# retries = 5
# wait_time = 1
# for i in range(retries):
# try:
# response = openai.ChatCompletion.create(
# model="gpt-4",
# messages=[
# {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
# {"role": "user", "content": prompt},
# ],
# max_tokens=4096,
# n=1,
# stop=None,
# temperature=0.1,
# stream=True,
# )
# compliance_analysis = ""
# for chunk in response:
# chunk_text = chunk['choices'][0]['delta'].get('content', '')
# compliance_analysis += chunk_text
# st.write(chunk_text)
# st.json(chunk_text)
# return json.loads(compliance_analysis)
# except openai.error.RateLimitError:
# if i < retries - 1:
# st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
# time.sleep(wait_time)
# wait_time *= 2 # Exponential backoff
# else:
# st.error("Rate limit exceeded. Please try again later.")
# return None
# def main():
# st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True)
# # File upload buttons one after another
# st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file")
# st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file")
# submit_button = st.sidebar.button("Submit")
# docx_file = st.session_state.get("docx_file")
# data_file = st.session_state.get("data_file")
# if submit_button and docx_file and data_file:
# # Clear previous information
# st.session_state.clear()
# # Extract contract text and terms
# contract_text = extract_text_from_docx(docx_file)
# extracted_terms_json = extract_terms_from_contract(contract_text)
# if extracted_terms_json is None:
# return
# try:
# contract_terms = json.loads(extracted_terms_json)
# except json.JSONDecodeError as e:
# st.error(f"JSON decoding error: {e}")
# return
# # Read task descriptions and cost estimates from XLSX or CSV
# if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
# tasks_df = pd.read_excel(data_file)
# else:
# tasks_df = pd.read_csv(data_file)
# compliance_results = []
# futures = []
# # Use ThreadPoolExecutor to analyze tasks concurrently
# with ThreadPoolExecutor(max_workers=10) as executor: # Adjust max_workers as needed
# for _, row in tasks_df.iterrows():
# task_description = row['Task Description']
# cost_estimate = row['Amount']
# futures.append(executor.submit(analyze_task_compliance, task_description, cost_estimate, contract_terms))
# for future in as_completed(futures):
# try:
# result = future.result()
# if result is not None:
# compliance_results.append(result)
# except Exception as e:
# st.error(f"An error occurred: {e}")
# col1, col2 = st.columns(2)
# with col1:
# st.write("Extracted Contract Terms:")
# st.json(contract_terms)
# # Download button for contract terms
# st.download_button(
# label="Download Contract Terms",
# data=json.dumps(contract_terms, indent=4),
# file_name="contract_terms.json",
# mime="application/json"
# )
# with col2:
# st.write("Compliance Results:")
# st.json(compliance_results)
# # Download button for compliance results
# compliance_results_json = json.dumps(compliance_results, indent=4)
# st.download_button(
# label="Download Compliance Results",
# data=compliance_results_json,
# file_name="compliance_results.json",
# mime="application/json"
# )
# if __name__ == "__main__":
# main()
# import streamlit as st
# import os
# import openai
# import json
# import pandas as pd
# from docx import Document
# from dotenv import load_dotenv
# import time
# # Load the OpenAI API key from environment variables
# load_dotenv()
# api_key = os.getenv("OPENAI_API_KEY")
# openai.api_key = api_key
# # Streamlit app layout
# st.set_page_config(layout="wide")
# # Add custom CSS for center alignment
# st.markdown("""
# <style>
# .centered-title {
# text-align: center;
# font-size: 2.5em;
# margin-top: 0;
# }
# </style>
# """, unsafe_allow_html=True)
# def extract_text_from_docx(docx_path):
# doc = Document(docx_path)
# return "\n".join([para.text for para in doc.paragraphs])
# def extract_terms_from_contract(contract_text):
# prompt = (
# "You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains "
# "various sections and subsections with terms related to budget constraints, types of allowable work, timelines, "
# "penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and "
# "structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. "
# "Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection "
# "contains multiple terms, list them all.\n\n"
# "Contract text:\n"
# f"{contract_text}\n\n"
# "Provide the extracted terms in JSON format."
# )
# try:
# response = openai.ChatCompletion.create(
# model="gpt-4",
# messages=[
# {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."},
# {"role": "user", "content": prompt},
# ],
# max_tokens=4096,
# n=1,
# stop=None,
# temperature=0.1,
# )
# return response.choices[0].message["content"]
# except openai.error.OpenAIError as e:
# st.error(f"Error extracting terms from contract: {e}")
# return None
# def analyze_task_compliance(task_description, cost_estimate, contract_text):
# prompt = (
# "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. "
# "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. "
# "Your job is to analyze the task description and specify if it violates any conditions from the contract. "
# "If there are violations, list the reasons for each violation.\n\n"
# f"Contract terms:\n{contract_text}\n\n"
# f"Task description:\n{task_description}\n"
# f"Cost estimate:\n{cost_estimate}\n\n"
# "Provide the compliance analysis in a clear JSON format."
# )
# try:
# response = openai.ChatCompletion.create(
# model="gpt-4",
# messages=[
# {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
# {"role": "user", "content": prompt},
# ],
# max_tokens=4096,
# n=1,
# stop=None,
# temperature=0.1,
# )
# return json.loads(response.choices[0].message["content"])
# except openai.error.OpenAIError as e:
# st.error(f"Error analyzing task compliance: {e}")
# return None
# def main():
# start = time.time()
# st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True)
# # File upload buttons one after another
# st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file")
# st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file")
# submit_button = st.sidebar.button("Submit")
# docx_file = st.session_state.get("docx_file")
# data_file = st.session_state.get("data_file")
# if submit_button and docx_file and data_file:
# # Clear previous information
# st.session_state.clear()
# # Extract contract text and terms
# contract_text = extract_text_from_docx(docx_file)
# extracted_terms_json = extract_terms_from_contract(contract_text)
# if extracted_terms_json is None:
# return
# try:
# contract_terms = json.loads(extracted_terms_json)
# except json.JSONDecodeError as e:
# st.error(f"JSON decoding error: {e}")
# return
# # Introducing a 1-second delay before analyzing task compliance
# time.sleep(8)
# # Read task descriptions and cost estimates from XLSX or CSV
# if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
# tasks_df = pd.read_excel(data_file)
# else:
# tasks_df = pd.read_csv(data_file)
# compliance_results = []
# # Process tasks sequentially
# for _, row in tasks_df.iterrows():
# task_description = row['Task Description']
# cost_estimate = row['Amount']
# result = analyze_task_compliance(task_description, cost_estimate, contract_text)
# if result is not None:
# compliance_results.append(result)
# col1, col2 = st.columns(2)
# with col1:
# st.write("Extracted Contract Terms:")
# st.json(contract_terms)
# # Download button for contract terms
# st.download_button(
# label="Download Contract Terms",
# data=json.dumps(contract_terms, indent=4),
# file_name="contract_terms.json",
# mime="application/json"
# )
# with col2:
# st.write("Compliance Results:")
# st.json(compliance_results)
# # Download button for compliance results
# compliance_results_json = json.dumps(compliance_results, indent=4)
# st.download_button(
# label="Download Compliance Results",
# data=compliance_results_json,
# file_name="compliance_results.json",
# mime="application/json"
# )
# end = time.time()
# print("Total Time: ", end-start)
# if __name__ == "__main__":
# main()
import streamlit as st
import os
import openai
import json
import pandas as pd
from docx import Document
from dotenv import load_dotenv
import time
import retrying
# Load the OpenAI API key from environment variables
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
openai.api_key = api_key
# Streamlit app layout
st.set_page_config(layout="wide")
# Add custom CSS for center alignment
st.markdown("""
<style>
.centered-title {
text-align: center;
font-size: 2.5em;
margin-top: 0;
}
</style>
""", unsafe_allow_html=True)
def extract_text_from_docx(docx_path):
doc = Document(docx_path)
return "\n".join([para.text for para in doc.paragraphs])
def extract_terms_from_contract(contract_text):
prompt = (
"You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains "
"various sections and subsections with terms related to budget constraints, types of allowable work, timelines, "
"penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and "
"structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. "
"Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection "
"contains multiple terms, list them all.\n\n"
"Contract text:\n"
f"{contract_text}\n\n"
"Provide the extracted terms in JSON format."
)
try:
response = openai.ChatCompletion.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."},
{"role": "user", "content": prompt},
],
max_tokens=4096,
n=1,
stop=None,
temperature=0.1,
)
return response.choices[0].message["content"]
except openai.error.OpenAIError as e:
st.error(f"Error extracting terms from contract: {e}")
return None
# Add a retry decorator with exponential backoff
@retrying.retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5)
def analyze_task_compliance(task_description, cost_estimate, contract_text):
prompt = (
"You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. "
"Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. "
"Your job is to analyze the task description and specify if it violates any conditions from the contract. "
"If there are violations, list the reasons for each violation.\n\n"
f"Contract terms:\n{contract_text}\n\n"
f"Task description:\n{task_description}\n"
f"Cost estimate:\n{cost_estimate}\n\n"
"Provide the compliance analysis in a clear JSON format."
)
try:
response = openai.ChatCompletion.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
{"role": "user", "content": prompt},
],
max_tokens=4096,
n=1,
stop=None,
temperature=0.1,
)
return json.loads(response.choices[0].message["content"])
except openai.error.OpenAIError as e:
st.error(f"Error analyzing task compliance: {e}")
return None
def main():
start = time.time()
st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True)
# File upload buttons one after another
st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file")
st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file")
submit_button = st.sidebar.button("Submit")
docx_file = st.session_state.get("docx_file")
data_file = st.session_state.get("data_file")
if submit_button and docx_file and data_file:
# Clear previous information
st.session_state.clear()
# Extract contract text and terms
contract_text = extract_text_from_docx(docx_file)
extracted_terms_json = extract_terms_from_contract(contract_text)
if extracted_terms_json is None:
return
try:
contract_terms = json.loads(extracted_terms_json)
except json.JSONDecodeError as e:
st.error(f"JSON decoding error: {e}")
return
# Read task descriptions and cost estimates from XLSX or CSV
if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
tasks_df = pd.read_excel(data_file)
else:
tasks_df = pd.read_csv(data_file)
compliance_results = []
# Process tasks sequentially
for _, row in tasks_df.iterrows():
task_description = row['Task Description']
cost_estimate = row['Amount']
result = analyze_task_compliance(task_description, cost_estimate, contract_text)
if result is not None:
compliance_results.append(result)
col1, col2 = st.columns(2)
with col1:
st.write("Extracted Contract Terms:")
st.json(contract_terms)
# Download button for contract terms
st.download_button(
label="Download Contract Terms",
data=json.dumps(contract_terms, indent=4),
file_name="contract_terms.json",
mime="application/json"
)
with col2:
st.write("Compliance Results:")
st.json(compliance_results)
# Download button for compliance results
compliance_results_json = json.dumps(compliance_results, indent=4)
st.download_button(
label="Download Compliance Results",
data=compliance_results_json,
file_name="compliance_results.json",
mime="application/json"
)
end = time.time()
print("Total Time: ", end-start)
if __name__ == "__main__":
main()