Spaces:

Bikas0
/

Contract-Conditions-Extraction-and-Verification

Sleeping

App Files Files Community

Contract-Conditions-Extraction-and-Verification / app.py

Bikas0

Update the code

f4f51e9 over 1 year ago

raw

history blame

21.2 kB

	# import streamlit as st
	# import os
	# import openai
	# import json
	# import pandas as pd
	# from docx import Document
	# from concurrent.futures import ThreadPoolExecutor, as_completed
	# from dotenv import load_dotenv
	# import time

	# # Load the OpenAI API key from environment variables
	# load_dotenv()
	# api_key = os.getenv("OPENAI_API_KEY")
	# openai.api_key = api_key

	# # Streamlit app layout
	# st.set_page_config(layout="wide")

	# # Add custom CSS for center alignment
	# st.markdown("""
	# <style>
	# .centered-title {
	# text-align: center;
	# font-size: 2.5em;
	# margin-top: 0;
	# }
	# </style>
	# """, unsafe_allow_html=True)

	# def extract_text_from_docx(docx_path):
	# doc = Document(docx_path)
	# return "\n".join([para.text for para in doc.paragraphs])

	# def extract_terms_from_contract(contract_text):
	# prompt = (
	# "You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains "
	# "various sections and subsections with terms related to budget constraints, types of allowable work, timelines, "
	# "penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and "
	# "structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. "
	# "Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection "
	# "contains multiple terms, list them all.\n\n"
	# "Contract text:\n"
	# f"{contract_text}\n\n"
	# "Provide the extracted terms in JSON format."
	# )

	# retries = 2
	# wait_time = 1
	# for i in range(retries):
	# try:
	# response = openai.ChatCompletion.create(
	# model="gpt-4",
	# messages=[
	# {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."},
	# {"role": "user", "content": prompt},
	# ],
	# max_tokens=4096,
	# n=1,
	# stop=None,
	# temperature=0.1,
	# )
	# return response.choices[0].message["content"]
	# except openai.error.RateLimitError:
	# if i < retries - 1:
	# st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
	# time.sleep(wait_time)
	# wait_time *= 2 # Exponential backoff
	# else:
	# st.error("Rate limit exceeded. Please try again later.")
	# return None

	# def analyze_task_compliance(task_description, cost_estimate, contract_terms):
	# prompt = (
	# "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. "
	# "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. "
	# "Your job is to analyze the task description and specify if it violates any conditions from the contract. "
	# "If there are violations, list the reasons for each violation.\n\n"
	# f"Contract terms:\n{json.dumps(contract_terms, indent=4)}\n\n"
	# f"Task description:\n{task_description}\n"
	# f"Cost estimate:\n{cost_estimate}\n\n"
	# "Provide the compliance analysis in a clear JSON format."
	# )

	# retries = 5
	# wait_time = 1
	# for i in range(retries):
	# try:
	# response = openai.ChatCompletion.create(
	# model="gpt-4",
	# messages=[
	# {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
	# {"role": "user", "content": prompt},
	# ],
	# max_tokens=4096,
	# n=1,
	# stop=None,
	# temperature=0.1,
	# stream=True,
	# )

	# compliance_analysis = ""
	# for chunk in response:
	# chunk_text = chunk['choices'][0]['delta'].get('content', '')
	# compliance_analysis += chunk_text
	# st.write(chunk_text)
	# st.json(chunk_text)

	# return json.loads(compliance_analysis)
	# except openai.error.RateLimitError:
	# if i < retries - 1:
	# st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
	# time.sleep(wait_time)
	# wait_time *= 2 # Exponential backoff
	# else:
	# st.error("Rate limit exceeded. Please try again later.")
	# return None

	# def main():
	# st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True)

	# # File upload buttons one after another
	# st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file")
	# st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file")
	# submit_button = st.sidebar.button("Submit")

	# docx_file = st.session_state.get("docx_file")
	# data_file = st.session_state.get("data_file")

	# if submit_button and docx_file and data_file:
	# # Clear previous information
	# st.session_state.clear()

	# # Extract contract text and terms
	# contract_text = extract_text_from_docx(docx_file)
	# extracted_terms_json = extract_terms_from_contract(contract_text)

	# if extracted_terms_json is None:
	# return

	# try:
	# contract_terms = json.loads(extracted_terms_json)
	# except json.JSONDecodeError as e:
	# st.error(f"JSON decoding error: {e}")
	# return

	# # Read task descriptions and cost estimates from XLSX or CSV
	# if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
	# tasks_df = pd.read_excel(data_file)
	# else:
	# tasks_df = pd.read_csv(data_file)

	# compliance_results = []
	# futures = []

	# # Use ThreadPoolExecutor to analyze tasks concurrently
	# with ThreadPoolExecutor(max_workers=10) as executor: # Adjust max_workers as needed
	# for _, row in tasks_df.iterrows():
	# task_description = row['Task Description']
	# cost_estimate = row['Amount']
	# futures.append(executor.submit(analyze_task_compliance, task_description, cost_estimate, contract_terms))

	# for future in as_completed(futures):
	# try:
	# result = future.result()
	# if result is not None:
	# compliance_results.append(result)
	# except Exception as e:
	# st.error(f"An error occurred: {e}")

	# col1, col2 = st.columns(2)

	# with col1:
	# st.write("Extracted Contract Terms:")
	# st.json(contract_terms)

	# # Download button for contract terms
	# st.download_button(
	# label="Download Contract Terms",
	# data=json.dumps(contract_terms, indent=4),
	# file_name="contract_terms.json",
	# mime="application/json"
	# )

	# with col2:
	# st.write("Compliance Results:")
	# st.json(compliance_results)

	# # Download button for compliance results
	# compliance_results_json = json.dumps(compliance_results, indent=4)
	# st.download_button(
	# label="Download Compliance Results",
	# data=compliance_results_json,
	# file_name="compliance_results.json",
	# mime="application/json"
	# )

	# if __name__ == "__main__":
	# main()


	# import streamlit as st
	# import os
	# import openai
	# import json
	# import pandas as pd
	# from docx import Document
	# from dotenv import load_dotenv
	# import time


	# # Load the OpenAI API key from environment variables
	# load_dotenv()
	# api_key = os.getenv("OPENAI_API_KEY")
	# openai.api_key = api_key

	# # Streamlit app layout
	# st.set_page_config(layout="wide")

	# # Add custom CSS for center alignment
	# st.markdown("""
	# <style>
	# .centered-title {
	# text-align: center;
	# font-size: 2.5em;
	# margin-top: 0;
	# }
	# </style>
	# """, unsafe_allow_html=True)

	# def extract_text_from_docx(docx_path):
	# doc = Document(docx_path)
	# return "\n".join([para.text for para in doc.paragraphs])

	# def extract_terms_from_contract(contract_text):
	# prompt = (
	# "You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains "
	# "various sections and subsections with terms related to budget constraints, types of allowable work, timelines, "
	# "penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and "
	# "structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. "
	# "Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection "
	# "contains multiple terms, list them all.\n\n"
	# "Contract text:\n"
	# f"{contract_text}\n\n"
	# "Provide the extracted terms in JSON format."
	# )

	# try:
	# response = openai.ChatCompletion.create(
	# model="gpt-4",
	# messages=[
	# {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."},
	# {"role": "user", "content": prompt},
	# ],
	# max_tokens=4096,
	# n=1,
	# stop=None,
	# temperature=0.1,
	# )
	# return response.choices[0].message["content"]
	# except openai.error.OpenAIError as e:
	# st.error(f"Error extracting terms from contract: {e}")
	# return None

	# def analyze_task_compliance(task_description, cost_estimate, contract_text):
	# prompt = (
	# "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. "
	# "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. "
	# "Your job is to analyze the task description and specify if it violates any conditions from the contract. "
	# "If there are violations, list the reasons for each violation.\n\n"
	# f"Contract terms:\n{contract_text}\n\n"
	# f"Task description:\n{task_description}\n"
	# f"Cost estimate:\n{cost_estimate}\n\n"
	# "Provide the compliance analysis in a clear JSON format."
	# )

	# try:
	# response = openai.ChatCompletion.create(
	# model="gpt-4",
	# messages=[
	# {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
	# {"role": "user", "content": prompt},
	# ],
	# max_tokens=4096,
	# n=1,
	# stop=None,
	# temperature=0.1,
	# )

	# return json.loads(response.choices[0].message["content"])
	# except openai.error.OpenAIError as e:
	# st.error(f"Error analyzing task compliance: {e}")
	# return None

	# def main():
	# start = time.time()
	# st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True)

	# # File upload buttons one after another
	# st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file")
	# st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file")
	# submit_button = st.sidebar.button("Submit")

	# docx_file = st.session_state.get("docx_file")
	# data_file = st.session_state.get("data_file")

	# if submit_button and docx_file and data_file:
	# # Clear previous information
	# st.session_state.clear()

	# # Extract contract text and terms
	# contract_text = extract_text_from_docx(docx_file)
	# extracted_terms_json = extract_terms_from_contract(contract_text)

	# if extracted_terms_json is None:
	# return

	# try:
	# contract_terms = json.loads(extracted_terms_json)
	# except json.JSONDecodeError as e:
	# st.error(f"JSON decoding error: {e}")
	# return
	# # Introducing a 1-second delay before analyzing task compliance
	# time.sleep(8)
	# # Read task descriptions and cost estimates from XLSX or CSV
	# if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
	# tasks_df = pd.read_excel(data_file)
	# else:
	# tasks_df = pd.read_csv(data_file)

	# compliance_results = []

	# # Process tasks sequentially
	# for _, row in tasks_df.iterrows():
	# task_description = row['Task Description']
	# cost_estimate = row['Amount']
	# result = analyze_task_compliance(task_description, cost_estimate, contract_text)

	# if result is not None:
	# compliance_results.append(result)

	# col1, col2 = st.columns(2)

	# with col1:
	# st.write("Extracted Contract Terms:")
	# st.json(contract_terms)

	# # Download button for contract terms
	# st.download_button(
	# label="Download Contract Terms",
	# data=json.dumps(contract_terms, indent=4),
	# file_name="contract_terms.json",
	# mime="application/json"
	# )

	# with col2:
	# st.write("Compliance Results:")
	# st.json(compliance_results)

	# # Download button for compliance results
	# compliance_results_json = json.dumps(compliance_results, indent=4)
	# st.download_button(
	# label="Download Compliance Results",
	# data=compliance_results_json,
	# file_name="compliance_results.json",
	# mime="application/json"
	# )
	# end = time.time()
	# print("Total Time: ", end-start)

	# if __name__ == "__main__":
	# main()


	import streamlit as st
	import os
	import openai
	import json
	import pandas as pd
	from docx import Document
	from dotenv import load_dotenv
	import time
	import retrying

	# Load the OpenAI API key from environment variables
	load_dotenv()
	api_key = os.getenv("OPENAI_API_KEY")
	openai.api_key = api_key

	# Streamlit app layout
	st.set_page_config(layout="wide")

	# Add custom CSS for center alignment
	st.markdown("""
	<style>
	.centered-title {
	text-align: center;
	font-size: 2.5em;
	margin-top: 0;
	}
	</style>
	""", unsafe_allow_html=True)

	def extract_text_from_docx(docx_path):
	doc = Document(docx_path)
	return "\n".join([para.text for para in doc.paragraphs])

	def extract_terms_from_contract(contract_text):
	prompt = (
	"You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains "
	"various sections and subsections with terms related to budget constraints, types of allowable work, timelines, "
	"penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and "
	"structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. "
	"Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection "
	"contains multiple terms, list them all.\n\n"
	"Contract text:\n"
	f"{contract_text}\n\n"
	"Provide the extracted terms in JSON format."
	)

	try:
	response = openai.ChatCompletion.create(
	model="gpt-4",
	messages=[
	{"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."},
	{"role": "user", "content": prompt},
	],
	max_tokens=4096,
	n=1,
	stop=None,
	temperature=0.1,
	)
	return response.choices[0].message["content"]
	except openai.error.OpenAIError as e:
	st.error(f"Error extracting terms from contract: {e}")
	return None

	# Add a retry decorator with exponential backoff
	@retrying.retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5)
	def analyze_task_compliance(task_description, cost_estimate, contract_text):
	prompt = (
	"You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. "
	"Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. "
	"Your job is to analyze the task description and specify if it violates any conditions from the contract. "
	"If there are violations, list the reasons for each violation.\n\n"
	f"Contract terms:\n{contract_text}\n\n"
	f"Task description:\n{task_description}\n"
	f"Cost estimate:\n{cost_estimate}\n\n"
	"Provide the compliance analysis in a clear JSON format."
	)

	try:
	response = openai.ChatCompletion.create(
	model="gpt-4",
	messages=[
	{"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
	{"role": "user", "content": prompt},
	],
	max_tokens=4096,
	n=1,
	stop=None,
	temperature=0.1,
	)

	return json.loads(response.choices[0].message["content"])
	except openai.error.OpenAIError as e:
	st.error(f"Error analyzing task compliance: {e}")
	return None

	def main():
	start = time.time()
	st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True)

	# File upload buttons one after another
	st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file")
	st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file")
	submit_button = st.sidebar.button("Submit")

	docx_file = st.session_state.get("docx_file")
	data_file = st.session_state.get("data_file")

	if submit_button and docx_file and data_file:
	# Clear previous information
	st.session_state.clear()

	# Extract contract text and terms
	contract_text = extract_text_from_docx(docx_file)
	extracted_terms_json = extract_terms_from_contract(contract_text)

	if extracted_terms_json is None:
	return

	try:
	contract_terms = json.loads(extracted_terms_json)
	except json.JSONDecodeError as e:
	st.error(f"JSON decoding error: {e}")
	return

	# Read task descriptions and cost estimates from XLSX or CSV
	if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
	tasks_df = pd.read_excel(data_file)
	else:
	tasks_df = pd.read_csv(data_file)

	compliance_results = []

	# Process tasks sequentially
	for _, row in tasks_df.iterrows():
	task_description = row['Task Description']
	cost_estimate = row['Amount']
	result = analyze_task_compliance(task_description, cost_estimate, contract_text)

	if result is not None:
	compliance_results.append(result)

	col1, col2 = st.columns(2)

	with col1:
	st.write("Extracted Contract Terms:")
	st.json(contract_terms)

	# Download button for contract terms
	st.download_button(
	label="Download Contract Terms",
	data=json.dumps(contract_terms, indent=4),
	file_name="contract_terms.json",
	mime="application/json"
	)

	with col2:
	st.write("Compliance Results:")
	st.json(compliance_results)

	# Download button for compliance results
	compliance_results_json = json.dumps(compliance_results, indent=4)
	st.download_button(
	label="Download Compliance Results",
	data=compliance_results_json,
	file_name="compliance_results.json",
	mime="application/json"
	)
	end = time.time()
	print("Total Time: ", end-start)

	if __name__ == "__main__":
	main()