Spaces:

Bikas0
/

Contract-Conditions-Extraction-and-Verification

Sleeping

App Files Files Community

Contract-Conditions-Extraction-and-Verification / app.py

Bikas0

contract verification

1705208 12 months ago

raw

history blame

7.73 kB

	import streamlit as st
	import os
	import openai
	import json
	import pandas as pd
	from docx import Document
	from concurrent.futures import ThreadPoolExecutor, as_completed
	from dotenv import load_dotenv
	import time

	# Load the OpenAI API key from environment variables
	load_dotenv()
	api_key = os.getenv("OPENAI_API_KEY")
	openai.api_key = api_key

	# Streamlit app layout
	st.set_page_config(layout="wide")

	# Add custom CSS for center alignment
	st.markdown("""
	<style>
	.centered-title {
	text-align: center;
	font-size: 2.5em;
	margin-top: 0;
	}
	</style>
	""", unsafe_allow_html=True)

	def extract_text_from_docx(docx_path):
	doc = Document(docx_path)
	return "\n".join([para.text for para in doc.paragraphs])

	def extract_terms_from_contract(contract_text):
	prompt = (
	"You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains "
	"various sections and subsections with terms related to budget constraints, types of allowable work, timelines, "
	"penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and "
	"structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. "
	"Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection "
	"contains multiple terms, list them all.\n\n"
	"Contract text:\n"
	f"{contract_text}\n\n"
	"Provide the extracted terms in JSON format."
	)

	retries = 2
	wait_time = 1
	for i in range(retries):
	try:
	response = openai.ChatCompletion.create(
	model="gpt-4",
	messages=[
	{"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."},
	{"role": "user", "content": prompt},
	],
	max_tokens=4096,
	n=1,
	stop=None,
	temperature=0.1,
	)
	return response.choices[0].message["content"]
	except openai.error.RateLimitError:
	if i < retries - 1:
	st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
	time.sleep(wait_time)
	wait_time *= 2 # Exponential backoff
	else:
	st.error("Rate limit exceeded. Please try again later.")
	return None

	def analyze_task_compliance(task_description, cost_estimate, contract_terms):
	prompt = (
	"You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. "
	"Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. "
	"Your job is to analyze the task description and specify if it violates any conditions from the contract. "
	"If there are violations, list the reasons for each violation.\n\n"
	f"Contract terms:\n{json.dumps(contract_terms, indent=4)}\n\n"
	f"Task description:\n{task_description}\n"
	f"Cost estimate:\n{cost_estimate}\n\n"
	"Provide the compliance analysis in a clear JSON format."
	)

	retries = 5
	wait_time = 1
	for i in range(retries):
	try:
	response = openai.ChatCompletion.create(
	model="gpt-4",
	messages=[
	{"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
	{"role": "user", "content": prompt},
	],
	max_tokens=4096,
	n=1,
	stop=None,
	temperature=0.1,
	stream=True,
	)

	compliance_analysis = ""
	for chunk in response:
	chunk_text = chunk['choices'][0]['delta'].get('content', '')
	compliance_analysis += chunk_text
	st.write(chunk_text)
	st.json(chunk_text)

	return json.loads(compliance_analysis)
	except openai.error.RateLimitError:
	if i < retries - 1:
	st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
	time.sleep(wait_time)
	wait_time *= 2 # Exponential backoff
	else:
	st.error("Rate limit exceeded. Please try again later.")
	return None

	def main():
	st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True)

	# File upload buttons one after another
	st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file")
	st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file")
	submit_button = st.sidebar.button("Submit")

	docx_file = st.session_state.get("docx_file")
	data_file = st.session_state.get("data_file")

	if submit_button and docx_file and data_file:
	# Clear previous information
	st.session_state.clear()

	# Extract contract text and terms
	contract_text = extract_text_from_docx(docx_file)
	extracted_terms_json = extract_terms_from_contract(contract_text)

	if extracted_terms_json is None:
	return

	try:
	contract_terms = json.loads(extracted_terms_json)
	except json.JSONDecodeError as e:
	st.error(f"JSON decoding error: {e}")
	return

	# Read task descriptions and cost estimates from XLSX or CSV
	if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
	tasks_df = pd.read_excel(data_file)
	else:
	tasks_df = pd.read_csv(data_file)

	compliance_results = []
	futures = []

	# Use ThreadPoolExecutor to analyze tasks concurrently
	with ThreadPoolExecutor(max_workers=10) as executor: # Adjust max_workers as needed
	for _, row in tasks_df.iterrows():
	task_description = row['Task Description']
	cost_estimate = row['Amount']
	futures.append(executor.submit(analyze_task_compliance, task_description, cost_estimate, contract_terms))

	for future in as_completed(futures):
	try:
	result = future.result()
	if result is not None:
	compliance_results.append(result)
	except Exception as e:
	st.error(f"An error occurred: {e}")

	col1, col2 = st.columns(2)

	with col1:
	st.write("Extracted Contract Terms:")
	st.json(contract_terms)

	# Download button for contract terms
	st.download_button(
	label="Download Contract Terms",
	data=json.dumps(contract_terms, indent=4),
	file_name="contract_terms.json",
	mime="application/json"
	)

	with col2:
	st.write("Compliance Results:")
	st.json(compliance_results)

	# Download button for compliance results
	compliance_results_json = json.dumps(compliance_results, indent=4)
	st.download_button(
	label="Download Compliance Results",
	data=compliance_results_json,
	file_name="compliance_results.json",
	mime="application/json"
	)

	if __name__ == "__main__":
	main()