Spaces:

Bikas0
/

Contract-Conditions-Extraction-and-Verification

Sleeping

App Files Files Community

Bikas0 commited on Jul 8, 2024

Commit

e9584dc

verified ·

1 Parent(s): f4f51e9

update code

Browse files

Files changed (1) hide show

app.py +103 -413

app.py CHANGED Viewed

@@ -1,390 +1,27 @@
-# import streamlit as st
-# import os
-# import openai
-# import json
-# import pandas as pd
-# from docx import Document
-# from concurrent.futures import ThreadPoolExecutor, as_completed
-# from dotenv import load_dotenv
-# import time
-# # Load the OpenAI API key from environment variables
-# load_dotenv()
-# api_key = os.getenv("OPENAI_API_KEY")
-# openai.api_key = api_key
-# # Streamlit app layout
-# st.set_page_config(layout="wide")
-# # Add custom CSS for center alignment
-# st.markdown("""
-#     <style>
-#     .centered-title {
-#         text-align: center;
-#         font-size: 2.5em;
-#         margin-top: 0;
-#     }
-#     </style>
-#     """, unsafe_allow_html=True)
-# def extract_text_from_docx(docx_path):
-#     doc = Document(docx_path)
-#     return "\n".join([para.text for para in doc.paragraphs])
-# def extract_terms_from_contract(contract_text):
-#     prompt = (
-#         "You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains "
-#         "various sections and subsections with terms related to budget constraints, types of allowable work, timelines, "
-#         "penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and "
-#         "structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. "
-#         "Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection "
-#         "contains multiple terms, list them all.\n\n"
-#         "Contract text:\n"
-#         f"{contract_text}\n\n"
-#         "Provide the extracted terms in JSON format."
-#     )
-#     retries = 2
-#     wait_time = 1
-#     for i in range(retries):
-#         try:
-#             response = openai.ChatCompletion.create(
-#                 model="gpt-4",
-#                 messages=[
-#                     {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."},
-#                     {"role": "user", "content": prompt},
-#                 ],
-#                 max_tokens=4096,
-#                 n=1,
-#                 stop=None,
-#                 temperature=0.1,
-#             )
-#             return response.choices[0].message["content"]
-#         except openai.error.RateLimitError:
-#             if i < retries - 1:
-#                 st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
-#                 time.sleep(wait_time)
-#                 wait_time *= 2  # Exponential backoff
-#             else:
-#                 st.error("Rate limit exceeded. Please try again later.")
-#                 return None
-# def analyze_task_compliance(task_description, cost_estimate, contract_terms):
-#     prompt = (
-#         "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. "
-#         "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. "
-#         "Your job is to analyze the task description and specify if it violates any conditions from the contract. "
-#         "If there are violations, list the reasons for each violation.\n\n"
-#         f"Contract terms:\n{json.dumps(contract_terms, indent=4)}\n\n"
-#         f"Task description:\n{task_description}\n"
-#         f"Cost estimate:\n{cost_estimate}\n\n"
-#         "Provide the compliance analysis in a clear JSON format."
-#     )
-#     retries = 5
-#     wait_time = 1
-#     for i in range(retries):
-#         try:
-#             response = openai.ChatCompletion.create(
-#                 model="gpt-4",
-#                 messages=[
-#                     {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
-#                     {"role": "user", "content": prompt},
-#                 ],
-#                 max_tokens=4096,
-#                 n=1,
-#                 stop=None,
-#                 temperature=0.1,
-#                 stream=True,
-#             )
-#             compliance_analysis = ""
-#             for chunk in response:
-#                 chunk_text = chunk['choices'][0]['delta'].get('content', '')
-#                 compliance_analysis += chunk_text
-#                 st.write(chunk_text)
-#                 st.json(chunk_text)
-#             return json.loads(compliance_analysis)
-#         except openai.error.RateLimitError:
-#             if i < retries - 1:
-#                 st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
-#                 time.sleep(wait_time)
-#                 wait_time *= 2  # Exponential backoff
-#             else:
-#                 st.error("Rate limit exceeded. Please try again later.")
-#                 return None
-# def main():
-#     st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True)
-#     # File upload buttons one after another
-#     st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file")
-#     st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file")
-#     submit_button = st.sidebar.button("Submit")
-#     docx_file = st.session_state.get("docx_file")
-#     data_file = st.session_state.get("data_file")
-#     if submit_button and docx_file and data_file:
-#         # Clear previous information
-#         st.session_state.clear()
-#         # Extract contract text and terms
-#         contract_text = extract_text_from_docx(docx_file)
-#         extracted_terms_json = extract_terms_from_contract(contract_text)
-#         if extracted_terms_json is None:
-#             return
-#         try:
-#             contract_terms = json.loads(extracted_terms_json)
-#         except json.JSONDecodeError as e:
-#             st.error(f"JSON decoding error: {e}")
-#             return
-#         # Read task descriptions and cost estimates from XLSX or CSV
-#         if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
-#             tasks_df = pd.read_excel(data_file)
-#         else:
-#             tasks_df = pd.read_csv(data_file)
-#         compliance_results = []
-#         futures = []
-#         # Use ThreadPoolExecutor to analyze tasks concurrently
-#         with ThreadPoolExecutor(max_workers=10) as executor:  # Adjust max_workers as needed
-#             for _, row in tasks_df.iterrows():
-#                 task_description = row['Task Description']
-#                 cost_estimate = row['Amount']
-#                 futures.append(executor.submit(analyze_task_compliance, task_description, cost_estimate, contract_terms))
-#             for future in as_completed(futures):
-#                 try:
-#                     result = future.result()
-#                     if result is not None:
-#                         compliance_results.append(result)
-#                 except Exception as e:
-#                     st.error(f"An error occurred: {e}")
-#         col1, col2 = st.columns(2)
-#         with col1:
-#             st.write("Extracted Contract Terms:")
-#             st.json(contract_terms)
-#             # Download button for contract terms
-#             st.download_button(
-#                 label="Download Contract Terms",
-#                 data=json.dumps(contract_terms, indent=4),
-#                 file_name="contract_terms.json",
-#                 mime="application/json"
-#             )
-#         with col2:
-#             st.write("Compliance Results:")
-#             st.json(compliance_results)
-#             # Download button for compliance results
-#             compliance_results_json = json.dumps(compliance_results, indent=4)
-#             st.download_button(
-#                 label="Download Compliance Results",
-#                 data=compliance_results_json,
-#                 file_name="compliance_results.json",
-#                 mime="application/json"
-#             )
-# if __name__ == "__main__":
-#     main()
-# import streamlit as st
-# import os
-# import openai
-# import json
-# import pandas as pd
-# from docx import Document
-# from dotenv import load_dotenv
-# import time
-# # Load the OpenAI API key from environment variables
-# load_dotenv()
-# api_key = os.getenv("OPENAI_API_KEY")
-# openai.api_key = api_key
-# # Streamlit app layout
-# st.set_page_config(layout="wide")
-# # Add custom CSS for center alignment
-# st.markdown("""
-#     <style>
-#     .centered-title {
-#         text-align: center;
-#         font-size: 2.5em;
-#         margin-top: 0;
-#     }
-#     </style>
-#     """, unsafe_allow_html=True)
-# def extract_text_from_docx(docx_path):
-#     doc = Document(docx_path)
-#     return "\n".join([para.text for para in doc.paragraphs])
-# def extract_terms_from_contract(contract_text):
-#     prompt = (
-#         "You are an AI tasked with analyzing a contract and extracting key terms and constraints. The contract contains "
-#         "various sections and subsections with terms related to budget constraints, types of allowable work, timelines, "
-#         "penalties, responsibilities, and other conditions for work execution. Your job is to extract these key terms and "
-#         "structure them in a clear JSON format, reflecting the hierarchy of sections and subsections. "
-#         "Ensure to capture all important constraints and conditions specified in the contract text. If a section or subsection "
-#         "contains multiple terms, list them all.\n\n"
-#         "Contract text:\n"
-#         f"{contract_text}\n\n"
-#         "Provide the extracted terms in JSON format."
-#     )
-#     try:
-#         response = openai.ChatCompletion.create(
-#             model="gpt-4",
-#             messages=[
-#                 {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."},
-#                 {"role": "user", "content": prompt},
-#             ],
-#             max_tokens=4096,
-#             n=1,
-#             stop=None,
-#             temperature=0.1,
-#         )
-#         return response.choices[0].message["content"]
-#     except openai.error.OpenAIError as e:
-#         st.error(f"Error extracting terms from contract: {e}")
-#         return None
-# def analyze_task_compliance(task_description, cost_estimate, contract_text):
-#     prompt = (
-#         "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. "
-#         "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. "
-#         "Your job is to analyze the task description and specify if it violates any conditions from the contract. "
-#         "If there are violations, list the reasons for each violation.\n\n"
-#         f"Contract terms:\n{contract_text}\n\n"
-#         f"Task description:\n{task_description}\n"
-#         f"Cost estimate:\n{cost_estimate}\n\n"
-#         "Provide the compliance analysis in a clear JSON format."
-#     )
-#     try:
-#         response = openai.ChatCompletion.create(
-#             model="gpt-4",
-#             messages=[
-#                 {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
-#                 {"role": "user", "content": prompt},
-#             ],
-#             max_tokens=4096,
-#             n=1,
-#             stop=None,
-#             temperature=0.1,
-#         )
-#         return json.loads(response.choices[0].message["content"])
-#     except openai.error.OpenAIError as e:
-#         st.error(f"Error analyzing task compliance: {e}")
-#         return None
-# def main():
-#     start = time.time()
-#     st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True)
-#     # File upload buttons one after another
-#     st.sidebar.file_uploader("Upload Contract Document (DOCX)", type="docx", key="docx_file")
-#     st.sidebar.file_uploader("Upload Task Descriptions (XLSX or CSV)", type=["xlsx", "csv"], key="data_file")
-#     submit_button = st.sidebar.button("Submit")
-#     docx_file = st.session_state.get("docx_file")
-#     data_file = st.session_state.get("data_file")
-#     if submit_button and docx_file and data_file:
-#         # Clear previous information
-#         st.session_state.clear()
-#         # Extract contract text and terms
-#         contract_text = extract_text_from_docx(docx_file)
-#         extracted_terms_json = extract_terms_from_contract(contract_text)
-#         if extracted_terms_json is None:
-#             return
-#         try:
-#             contract_terms = json.loads(extracted_terms_json)
-#         except json.JSONDecodeError as e:
-#             st.error(f"JSON decoding error: {e}")
-#             return
-#         # Introducing a 1-second delay before analyzing task compliance
-#         time.sleep(8)
-#         # Read task descriptions and cost estimates from XLSX or CSV
-#         if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
-#             tasks_df = pd.read_excel(data_file)
-#         else:
-#             tasks_df = pd.read_csv(data_file)
-#         compliance_results = []
-#         # Process tasks sequentially
-#         for _, row in tasks_df.iterrows():
-#             task_description = row['Task Description']
-#             cost_estimate = row['Amount']
-#             result = analyze_task_compliance(task_description, cost_estimate, contract_text)
-#             if result is not None:
-#                 compliance_results.append(result)
-#         col1, col2 = st.columns(2)
-#         with col1:
-#             st.write("Extracted Contract Terms:")
-#             st.json(contract_terms)
-#             # Download button for contract terms
-#             st.download_button(
-#                 label="Download Contract Terms",
-#                 data=json.dumps(contract_terms, indent=4),
-#                 file_name="contract_terms.json",
-#                 mime="application/json"
-#             )
-#         with col2:
-#             st.write("Compliance Results:")
-#             st.json(compliance_results)
-#             # Download button for compliance results
-#             compliance_results_json = json.dumps(compliance_results, indent=4)
-#             st.download_button(
-#                 label="Download Compliance Results",
-#                 data=compliance_results_json,
-#                 file_name="compliance_results.json",
-#                 mime="application/json"
-#             )
-#     end = time.time()
-#     print("Total Time: ", end-start)
-# if __name__ == "__main__":
-#     main()
 import streamlit as st
 import os
-import openai
 import json
 import pandas as pd
 from docx import Document
 from dotenv import load_dotenv
-import time
-import retrying
-# Load the OpenAI API key from environment variables
 load_dotenv()
-api_key = os.getenv("OPENAI_API_KEY")
-openai.api_key = api_key
 # Streamlit app layout
 st.set_page_config(layout="wide")
@@ -417,9 +54,12 @@ def extract_terms_from_contract(contract_text):
         "Provide the extracted terms in JSON format."
     )
-    try:
-        response = openai.ChatCompletion.create(
-            model="gpt-4",
             messages=[
                 {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."},
                 {"role": "user", "content": prompt},
@@ -429,28 +69,37 @@ def extract_terms_from_contract(contract_text):
             stop=None,
             temperature=0.1,
         )
-        return response.choices[0].message["content"]
-    except openai.error.OpenAIError as e:
-        st.error(f"Error extracting terms from contract: {e}")
-        return None
-# Add a retry decorator with exponential backoff
-@retrying.retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5)
-def analyze_task_compliance(task_description, cost_estimate, contract_text):
     prompt = (
         "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. "
         "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. "
         "Your job is to analyze the task description and specify if it violates any conditions from the contract. "
         "If there are violations, list the reasons for each violation.\n\n"
-        f"Contract terms:\n{contract_text}\n\n"
         f"Task description:\n{task_description}\n"
         f"Cost estimate:\n{cost_estimate}\n\n"
         "Provide the compliance analysis in a clear JSON format."
     )
-    try:
-        response = openai.ChatCompletion.create(
-            model="gpt-4",
             messages=[
                 {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
                 {"role": "user", "content": prompt},
@@ -459,15 +108,52 @@ def analyze_task_compliance(task_description, cost_estimate, contract_text):
             n=1,
             stop=None,
             temperature=0.1,
         )
-        return json.loads(response.choices[0].message["content"])
-    except openai.error.OpenAIError as e:
-        st.error(f"Error analyzing task compliance: {e}")
-        return None
 def main():
-    start = time.time()
     st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True)
     # File upload buttons one after another
@@ -485,7 +171,7 @@ def main():
         # Extract contract text and terms
         contract_text = extract_text_from_docx(docx_file)
         extracted_terms_json = extract_terms_from_contract(contract_text)
         if extracted_terms_json is None:
             return
@@ -494,7 +180,7 @@ def main():
         except json.JSONDecodeError as e:
             st.error(f"JSON decoding error: {e}")
             return
         # Read task descriptions and cost estimates from XLSX or CSV
         if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
             tasks_df = pd.read_excel(data_file)
@@ -502,16 +188,23 @@ def main():
             tasks_df = pd.read_csv(data_file)
         compliance_results = []
-        # Process tasks sequentially
-        for _, row in tasks_df.iterrows():
-            task_description = row['Task Description']
-            cost_estimate = row['Amount']
-            result = analyze_task_compliance(task_description, cost_estimate, contract_text)
-            if result is not None:
-                compliance_results.append(result)
         col1, col2 = st.columns(2)
         with col1:
@@ -538,9 +231,6 @@ def main():
                 file_name="compliance_results.json",
                 mime="application/json"
             )
-    end = time.time()
-    print("Total Time: ", end-start)
 if __name__ == "__main__":
     main()

 import streamlit as st
 import os
 import json
 import pandas as pd
 from docx import Document
 from dotenv import load_dotenv
+from openai import AzureOpenAI
+from concurrent.futures import ThreadPoolExecutor, as_completed
+# Load environment variables
 load_dotenv()
+# Azure OpenAI credentials
+key = os.getenv("AZURE_OPENAI_API_KEY")
+endpoint_url = "https://interview-key.openai.azure.com/"
+api_version = "2024-05-01-preview"
+deployment_id = "interview"
+# Initialize Azure OpenAI client
+client = AzureOpenAI(
+    api_version=api_version,
+    azure_endpoint=endpoint_url,
+    api_key=key
+)
 # Streamlit app layout
 st.set_page_config(layout="wide")
         "Provide the extracted terms in JSON format."
     )
+    retries = 2
+    wait_time = 1
+    for i in range(retries):
+        try:
+            response = client.chat.completions.create(
+            model=deployment_id,
             messages=[
                 {"role": "system", "content": "You are an AI specialized in extracting structured data from text documents."},
                 {"role": "user", "content": prompt},
             stop=None,
             temperature=0.1,
         )
+        return response.choices[0].message.content
+        except Exception as e:
+            st.error(f"Error extracting terms from contract: {e}")
+            return None
+        # except openai.error.RateLimitError:
+        #     if i < retries - 1:
+        #         st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
+        #         time.sleep(wait_time)
+        #         wait_time *= 2  # Exponential backoff
+        #     else:
+        #         st.error("Rate limit exceeded. Please try again later.")
+        #         return None
+def analyze_task_compliance(task_description, cost_estimate, contract_terms):
     prompt = (
         "You are an AI tasked with analyzing a task description and its associated cost estimate for compliance with contract conditions. "
         "Below are the key terms and constraints extracted from the contract, followed by a task description and its cost estimate. "
         "Your job is to analyze the task description and specify if it violates any conditions from the contract. "
         "If there are violations, list the reasons for each violation.\n\n"
+        f"Contract terms:\n{json.dumps(contract_terms, indent=4)}\n\n"
         f"Task description:\n{task_description}\n"
         f"Cost estimate:\n{cost_estimate}\n\n"
         "Provide the compliance analysis in a clear JSON format."
     )
+    retries = 5
+    wait_time = 1
+    for i in range(retries):
+        try:
+            response = client.chat.completions.create(
+            model=deployment_id,
             messages=[
                 {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
                 {"role": "user", "content": prompt},
             n=1,
             stop=None,
             temperature=0.1,
+            stream=True,
         )
+            compliance_analysis = ""
+            for chunk in response:
+                chunk_text = chunk['choices'][0]['delta'].get('content', '')
+                compliance_analysis += chunk_text
+                st.write(chunk_text)
+                st.json(chunk_text)
+            return json.loads(compliance_analysis)
+        except Exception as e:
+            st.error(f"Error analyzing task compliance: {e}")
+            return None
+            # response = openai.ChatCompletion.create(
+            #     model="gpt-4",
+            #     messages=[
+            #         {"role": "system", "content": "You are an AI specialized in analyzing text for compliance with specified conditions."},
+            #         {"role": "user", "content": prompt},
+            #     ],
+            #     max_tokens=4096,
+            #     n=1,
+            #     stop=None,
+            #     temperature=0.1,
+            #     stream=True,
+            # )
+            # compliance_analysis = ""
+            # for chunk in response:
+            #     chunk_text = chunk['choices'][0]['delta'].get('content', '')
+            #     compliance_analysis += chunk_text
+            #     st.write(chunk_text)
+            #     st.json(chunk_text)
+            # return json.loads(compliance_analysis)
+        # except openai.error.RateLimitError:
+        #     if i < retries - 1:
+        #         st.warning(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
+        #         time.sleep(wait_time)
+        #         wait_time *= 2  # Exponential backoff
+        #     else:
+        #         st.error("Rate limit exceeded. Please try again later.")
+        #         return None
 def main():
     st.markdown("<h1 class='centered-title'>Contract Compliance Analyzer</h1>", unsafe_allow_html=True)
     # File upload buttons one after another
         # Extract contract text and terms
         contract_text = extract_text_from_docx(docx_file)
         extracted_terms_json = extract_terms_from_contract(contract_text)
         if extracted_terms_json is None:
             return
         except json.JSONDecodeError as e:
             st.error(f"JSON decoding error: {e}")
             return
         # Read task descriptions and cost estimates from XLSX or CSV
         if data_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
             tasks_df = pd.read_excel(data_file)
             tasks_df = pd.read_csv(data_file)
         compliance_results = []
+        futures = []
+        # Use ThreadPoolExecutor to analyze tasks concurrently
+        with ThreadPoolExecutor(max_workers=10) as executor:  # Adjust max_workers as needed
+            for _, row in tasks_df.iterrows():
+                task_description = row['Task Description']
+                cost_estimate = row['Amount']
+                futures.append(executor.submit(analyze_task_compliance, task_description, cost_estimate, contract_terms))
+            for future in as_completed(futures):
+                try:
+                    result = future.result()
+                    if result is not None:
+                        compliance_results.append(result)
+                except Exception as e:
+                    st.error(f"An error occurred: {e}")
         col1, col2 = st.columns(2)
         with col1:
                 file_name="compliance_results.json",
                 mime="application/json"
             )
 if __name__ == "__main__":
     main()