Spaces:

seanpedrickcase
/

document_redaction

Running

App Files Files Community

seanpedrickcase commited on 5 days ago

Commit

36574ae

1 Parent(s): a7566b9

Added folder with CDK code and app. Updated config py file to be compatible with all temp folders needed for read only file systems

Browse files

Files changed (9) hide show

cdk/__init__.py +0 -0
cdk/app.py +81 -0
cdk/cdk_config.py +225 -0
cdk/cdk_functions.py +1293 -0
cdk/cdk_stack.py +1317 -0
cdk/check_resources.py +297 -0
cdk/post_cdk_build_quickstart.py +27 -0
cdk/requirements.txt +5 -0
tools/config.py +12 -8

cdk/__init__.py ADDED Viewed

File without changes

cdk/app.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import os
+from aws_cdk import (App, Environment)
+# Assuming these are still relevant for you
+from check_resources import check_and_set_context, CONTEXT_FILE
+from cdk_config import AWS_ACCOUNT_ID, AWS_REGION, RUN_USEAST_STACK, USE_CLOUDFRONT
+from cdk_stack import CdkStack, CdkStackCloudfront#, CdkStackMain
+from cdk_functions import load_context_from_file, create_basic_config_env
+# Initialize the CDK app
+app = App()
+# --- ENHANCED CONTEXT GENERATION AND LOADING ---
+# 1. Always ensure the old context file is removed before generation
+if os.path.exists(CONTEXT_FILE):
+    try:
+        os.remove(CONTEXT_FILE)
+        print(f"Removed stale context file: {CONTEXT_FILE}")
+    except OSError as e:
+        print(f"Warning: Could not remove old context file {CONTEXT_FILE}: {e}")
+        # Proceed anyway, check_and_set_context might handle overwriting
+# 2. Always run the pre-check script to generate fresh context
+print("Running pre-check script to generate application context...")
+try:
+    check_and_set_context()
+    if not os.path.exists(CONTEXT_FILE):
+        raise RuntimeError(f"check_and_set_context() finished, but {CONTEXT_FILE} was not created.")
+    print(f"Context generated successfully at {CONTEXT_FILE}.")
+except Exception as e:
+    raise RuntimeError(f"Failed to generate context via check_and_set_context(): {e}")
+if os.path.exists(CONTEXT_FILE):
+    load_context_from_file(app, CONTEXT_FILE)
+else:
+    raise RuntimeError(f"Could not find {CONTEXT_FILE}.")
+# Create basic config.env file that user can use to run the app later. Input is the folder it is saved into.
+create_basic_config_env("config")
+# Define the environment for the regional stack (where ALB resides)
+aws_env_regional = Environment(account=AWS_ACCOUNT_ID, region=AWS_REGION)
+# Create the regional stack (ALB, SGs, etc.)
+# regional_stack = CdkStack(app,
+#                           "RedactionStackSubnets",
+#                           env=aws_env_regional,
+#                           cross_region_references=True)
+# regional_stack_main = CdkStackMain(app,
+#                         "RedactionStackMain",
+#                         env=aws_env_regional,
+#                         private_subnets=regional_stack.params["private_subnets"],
+#                         private_route_tables=regional_stack.params["private_route_tables"],
+#                         public_subnets=regional_stack.params["public_subnets"],
+#                         public_route_tables=regional_stack.params["public_route_tables"],
+#                         cross_region_references=True)
+regional_stack = CdkStack(app,
+                          "RedactionStack",
+                          env=aws_env_regional,
+                          cross_region_references=True)
+if USE_CLOUDFRONT == 'True' and RUN_USEAST_STACK == 'True':
+    # Define the environment for the CloudFront stack (always us-east-1 for CF-level resources like WAFv2 WebACLs for CF)
+    aws_env_us_east_1 = Environment(account=AWS_ACCOUNT_ID, region="us-east-1")
+    # Create the CloudFront stack, passing the outputs from the regional stack
+    cloudfront_stack = CdkStackCloudfront(
+        app,
+        "RedactionStackCloudfront",
+        env=aws_env_us_east_1,
+        alb_arn=regional_stack.params["alb_arn_output"],
+        alb_sec_group_id=regional_stack.params["alb_security_group_id"],
+        alb_dns_name=regional_stack.params["alb_dns_name"],
+        cross_region_references=True
+    )
+# Synthesize the CloudFormation template
+app.synth(validate_on_synthesis=True)

cdk/cdk_config.py ADDED Viewed

	@@ -0,0 +1,225 @@

+import os
+import tempfile
+from dotenv import load_dotenv
+# Set or retrieve configuration variables for CDK redaction deployment
+def get_or_create_env_var(var_name:str, default_value:str, print_val:bool=False):
+    '''
+    Get an environmental variable, and set it to a default value if it doesn't exist
+    '''
+    # Get the environment variable if it exists
+    value = os.environ.get(var_name)
+    # If it doesn't exist, set the environment variable to the default value
+    if value is None:
+        os.environ[var_name] = default_value
+        value = default_value
+    if print_val == True:
+        print(f'The value of {var_name} is {value}')
+    return value
+def ensure_folder_exists(output_folder:str):
+    """Checks if the specified folder exists, creates it if not."""
+    if not os.path.exists(output_folder):
+        # Create the folder if it doesn't exist
+        os.makedirs(output_folder, exist_ok=True)
+        print(f"Created the {output_folder} folder.")
+    else:
+        print(f"The {output_folder} folder already exists.")
+def add_folder_to_path(folder_path: str):
+    '''
+    Check if a folder exists on your system. If so, get the absolute path and then add it to the system Path variable if it doesn't already exist. Function is only relevant for locally-created executable files based on this app (when using pyinstaller it creates a _internal folder that contains tesseract and poppler. These need to be added to the system path to enable the app to run)
+    '''
+    if os.path.exists(folder_path) and os.path.isdir(folder_path):
+        print(folder_path, "folder exists.")
+        # Resolve relative path to absolute path
+        absolute_path = os.path.abspath(folder_path)
+        current_path = os.environ['PATH']
+        if absolute_path not in current_path.split(os.pathsep):
+            full_path_extension = absolute_path + os.pathsep + current_path
+            os.environ['PATH'] = full_path_extension
+            #print(f"Updated PATH with: ", full_path_extension)
+        else:
+            print(f"Directory {folder_path} already exists in PATH.")
+    else:
+        print(f"Folder not found at {folder_path} - not added to PATH")
+###
+# LOAD CONFIG FROM ENV FILE
+###
+CONFIG_FOLDER = get_or_create_env_var('CONFIG_FOLDER', "config/")
+ensure_folder_exists(CONFIG_FOLDER)
+# If you have an aws_config env file in the config folder, you can load in app variables this way, e.g. 'config/cdk_config.env'
+CDK_CONFIG_PATH = get_or_create_env_var('CDK_CONFIG_PATH', 'config/cdk_config.env') # e.g. config/cdk_config.env
+if CDK_CONFIG_PATH:
+    if os.path.exists(CDK_CONFIG_PATH):
+        print(f"Loading CDK variables from config file {CDK_CONFIG_PATH}")
+        load_dotenv(CDK_CONFIG_PATH)
+    else: print("CDK config file not found at location:", CDK_CONFIG_PATH)
+###
+# AWS OPTIONS
+###
+AWS_REGION = get_or_create_env_var('AWS_REGION', '')
+AWS_ACCOUNT_ID = get_or_create_env_var('AWS_ACCOUNT_ID', '')
+###
+# CDK OPTIONS
+###
+CDK_PREFIX = get_or_create_env_var('CDK_PREFIX', '')
+CONTEXT_FILE = get_or_create_env_var('CONTEXT_FILE', 'cdk.context.json') # Define the CDK output context file name
+CDK_FOLDER = get_or_create_env_var('CDK_FOLDER', '') # FULL_PATH_TO_CDK_FOLDER_HERE (with forward slash)
+RUN_USEAST_STACK = get_or_create_env_var('RUN_USEAST_STACK', 'False')
+### VPC
+VPC_NAME = get_or_create_env_var('VPC_NAME', '')
+EXISTING_IGW_ID = get_or_create_env_var('EXISTING_IGW_ID', '')
+SINGLE_NAT_GATEWAY_ID = get_or_create_env_var('SINGLE_NAT_GATEWAY_ID', '')
+### SUBNETS / ROUTE TABLES / NAT GATEWAY
+PUBLIC_SUBNETS_TO_USE = get_or_create_env_var('PUBLIC_SUBNETS_TO_USE', '') # e.g. ['PublicSubnet1', 'PublicSubnet2']
+PUBLIC_SUBNET_CIDR_BLOCKS = get_or_create_env_var('PUBLIC_SUBNET_CIDR_BLOCKS', '') # e.g. ["10.0.1.0/24", "10.0.2.0/24"]
+PUBLIC_SUBNET_AVAILABILITY_ZONES = get_or_create_env_var('PUBLIC_SUBNET_AVAILABILITY_ZONES', '') # e.g. ["eu-east-1b", "eu-east1b"]
+PRIVATE_SUBNETS_TO_USE = get_or_create_env_var('PRIVATE_SUBNETS_TO_USE', '') # e.g. ['PrivateSubnet1', 'PrivateSubnet2']
+PRIVATE_SUBNET_CIDR_BLOCKS = get_or_create_env_var('PRIVATE_SUBNET_CIDR_BLOCKS', '') # e.g. ["10.0.1.0/24", "10.0.2.0/24"]
+PRIVATE_SUBNET_AVAILABILITY_ZONES = get_or_create_env_var('PRIVATE_SUBNET_AVAILABILITY_ZONES', '') # e.g. ["eu-east-1b", "eu-east1b"]
+ROUTE_TABLE_BASE_NAME = get_or_create_env_var('ROUTE_TABLE_BASE_NAME', f'{CDK_PREFIX}PrivateRouteTable')
+NAT_GATEWAY_EIP_NAME = get_or_create_env_var('NAT_GATEWAY_EIP_NAME', f"{CDK_PREFIX}NatGatewayEip")
+NAT_GATEWAY_NAME = get_or_create_env_var('NAT_GATEWAY_NAME', f"{CDK_PREFIX}NatGateway")
+# IAM roles
+AWS_MANAGED_TASK_ROLES_LIST = get_or_create_env_var('AWS_MANAGED_TASK_ROLES_LIST', '["AmazonCognitoReadOnly", "service-role/AmazonECSTaskExecutionRolePolicy", "AmazonS3FullAccess", "AmazonTextractFullAccess", "ComprehendReadOnly", "AmazonDynamoDBFullAccess", "service-role/AWSAppSyncPushToCloudWatchLogs"]')
+POLICY_FILE_LOCATIONS = get_or_create_env_var('POLICY_FILE_LOCATIONS', '') # e.g. '["config/sts_permissions.json"]'
+POLICY_FILE_ARNS = get_or_create_env_var('POLICY_FILE_ARNS', '')
+# GITHUB REPO
+GITHUB_REPO_USERNAME = get_or_create_env_var('GITHUB_REPO_USERNAME', 'seanpedrick-case')
+GITHUB_REPO_NAME = get_or_create_env_var('GITHUB_REPO_NAME', 'doc_redaction')
+GITHUB_REPO_BRANCH = get_or_create_env_var('GITHUB_REPO_BRANCH', 'main')
+### CODEBUILD
+CODEBUILD_ROLE_NAME = get_or_create_env_var('CODEBUILD_ROLE_NAME', f"{CDK_PREFIX}CodeBuildRole")
+CODEBUILD_PROJECT_NAME = get_or_create_env_var('CODEBUILD_PROJECT_NAME', f"{CDK_PREFIX}CodeBuildProject")
+### ECR
+ECR_REPO_NAME = get_or_create_env_var('ECR_REPO_NAME', 'doc-redaction') # Beware - cannot have underscores and must be lower case
+ECR_CDK_REPO_NAME = get_or_create_env_var('ECR_CDK_REPO_NAME', f"{CDK_PREFIX}{ECR_REPO_NAME}".lower())
+### S3
+S3_LOG_CONFIG_BUCKET_NAME = get_or_create_env_var('S3_LOG_CONFIG_BUCKET_NAME', f"{CDK_PREFIX}s3-logs".lower()) # S3 bucket names need to be lower case
+S3_OUTPUT_BUCKET_NAME = get_or_create_env_var('S3_OUTPUT_BUCKET_NAME', f"{CDK_PREFIX}s3-output".lower())
+### ECS
+FARGATE_TASK_DEFINITION_NAME = get_or_create_env_var('FARGATE_TASK_DEFINITION_NAME', f"{CDK_PREFIX}FargateTaskDefinition")
+TASK_DEFINITION_FILE_LOCATION = get_or_create_env_var('TASK_DEFINITION_FILE_LOCATION', CDK_FOLDER + CONFIG_FOLDER + "task_definition.json")
+CLUSTER_NAME = get_or_create_env_var('CLUSTER_NAME', f"{CDK_PREFIX}Cluster")
+ECS_SERVICE_NAME = get_or_create_env_var('ECS_SERVICE_NAME', f"{CDK_PREFIX}ECSService")
+ECS_TASK_ROLE_NAME = get_or_create_env_var('ECS_TASK_ROLE_NAME', f"{CDK_PREFIX}TaskRole")
+ECS_TASK_EXECUTION_ROLE_NAME = get_or_create_env_var('ECS_TASK_EXECUTION_ROLE_NAME', f"{CDK_PREFIX}ExecutionRole")
+ECS_SECURITY_GROUP_NAME = get_or_create_env_var('ECS_SECURITY_GROUP_NAME', f"{CDK_PREFIX}SecurityGroupECS")
+ECS_LOG_GROUP_NAME = get_or_create_env_var('ECS_LOG_GROUP_NAME', f"/ecs/{ECS_SERVICE_NAME}-logs".lower())
+ECS_TASK_CPU_SIZE = get_or_create_env_var('ECS_TASK_CPU_SIZE', '1024')
+ECS_TASK_MEMORY_SIZE = get_or_create_env_var('ECS_TASK_MEMORY_SIZE', '4096')
+ECS_USE_FARGATE_SPOT = get_or_create_env_var('USE_FARGATE_SPOT', 'False')
+ECS_READ_ONLY_FILE_SYSTEM = get_or_create_env_var('ECS_READ_ONLY_FILE_SYSTEM', 'True')
+### Cognito
+COGNITO_USER_POOL_NAME = get_or_create_env_var('COGNITO_USER_POOL_NAME', f"{CDK_PREFIX}UserPool")
+COGNITO_USER_POOL_CLIENT_NAME = get_or_create_env_var('COGNITO_USER_POOL_CLIENT_NAME', f"{CDK_PREFIX}UserPoolClient")
+COGNITO_USER_POOL_CLIENT_SECRET_NAME = get_or_create_env_var('COGNITO_USER_POOL_CLIENT_SECRET_NAME', f"{CDK_PREFIX}ParamCognitoSecret")
+COGNITO_USER_POOL_DOMAIN_PREFIX = get_or_create_env_var('COGNITO_USER_POOL_DOMAIN_PREFIX', "redaction-app-domain") # Should change this to something unique or you'll probably hit an error
+# Application load balancer
+ALB_NAME = get_or_create_env_var('ALB_NAME', f"{CDK_PREFIX}Alb"[-32:]) # Application load balancer name can be max 32 characters, so taking the last 32 characters of the suggested name
+ALB_NAME_SECURITY_GROUP_NAME = get_or_create_env_var('ALB_SECURITY_GROUP_NAME', f"{CDK_PREFIX}SecurityGroupALB")
+ALB_TARGET_GROUP_NAME = get_or_create_env_var('ALB_TARGET_GROUP_NAME', f"{CDK_PREFIX}-tg"[-32:]) # Max 32 characters
+EXISTING_LOAD_BALANCER_ARN = get_or_create_env_var('EXISTING_LOAD_BALANCER_ARN', '')
+EXISTING_LOAD_BALANCER_DNS = get_or_create_env_var('EXISTING_LOAD_BALANCER_ARN', 'placeholder_load_balancer_dns.net')
+## CLOUDFRONT
+USE_CLOUDFRONT = get_or_create_env_var('USE_CLOUDFRONT', 'True')
+CLOUDFRONT_PREFIX_LIST_ID = get_or_create_env_var('CLOUDFRONT_PREFIX_LIST_ID', 'pl-93a247fa')
+CLOUDFRONT_GEO_RESTRICTION = get_or_create_env_var('CLOUDFRONT_GEO_RESTRICTION', '') # A country that Cloudfront restricts access to. See here: https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/georestrictions.html
+CLOUDFRONT_DISTRIBUTION_NAME = get_or_create_env_var('CLOUDFRONT_DISTRIBUTION_NAME', f"{CDK_PREFIX}CfDist")
+CLOUDFRONT_DOMAIN = get_or_create_env_var('CLOUDFRONT_DOMAIN', "cloudfront_placeholder.net")
+# Certificate for Application load balancer (optional, for HTTPS and logins through the ALB)
+ACM_CERTIFICATE_ARN = get_or_create_env_var('ACM_CERTIFICATE_ARN', '')
+SSL_CERTIFICATE_DOMAIN = get_or_create_env_var('SSL_CERTIFICATE_DOMAIN', '') # e.g. example.com or www.example.com
+# This should be the CloudFront domain, the domain linked to your ACM certificate, or the DNS of your application load balancer in console afterwards
+if USE_CLOUDFRONT == "True":
+    COGNITO_REDIRECTION_URL = get_or_create_env_var('COGNITO_REDIRECTION_URL', "https://" + CLOUDFRONT_DOMAIN)
+elif SSL_CERTIFICATE_DOMAIN:
+    COGNITO_REDIRECTION_URL = get_or_create_env_var('COGNITO_REDIRECTION_URL', "https://" + SSL_CERTIFICATE_DOMAIN)
+else:
+    COGNITO_REDIRECTION_URL = get_or_create_env_var('COGNITO_REDIRECTION_URL', "https://" + EXISTING_LOAD_BALANCER_DNS)
+# Custom headers e.g. if routing traffic through Cloudfront
+CUSTOM_HEADER = get_or_create_env_var('CUSTOM_HEADER', '') # Retrieving or setting CUSTOM_HEADER
+CUSTOM_HEADER_VALUE = get_or_create_env_var('CUSTOM_HEADER_VALUE', '') # Retrieving or setting CUSTOM_HEADER_VALUE
+# Firewall on top of load balancer
+LOAD_BALANCER_WEB_ACL_NAME = get_or_create_env_var('LOAD_BALANCER_WEB_ACL_NAME', f"{CDK_PREFIX}alb-web-acl")
+# Firewall on top of CloudFront
+WEB_ACL_NAME = get_or_create_env_var('WEB_ACL_NAME', f"{CDK_PREFIX}cloudfront-web-acl")
+###
+# File I/O options
+###
+OUTPUT_FOLDER = get_or_create_env_var('GRADIO_OUTPUT_FOLDER', 'output/') # 'output/'
+INPUT_FOLDER = get_or_create_env_var('GRADIO_INPUT_FOLDER', 'input/') # 'input/'
+# Allow for files to be saved in a temporary folder for increased security in some instances
+if OUTPUT_FOLDER == "TEMP" or INPUT_FOLDER == "TEMP":
+    # Create a temporary directory
+    with tempfile.TemporaryDirectory() as temp_dir:
+        print(f'Temporary directory created at: {temp_dir}')
+        if OUTPUT_FOLDER == "TEMP": OUTPUT_FOLDER = temp_dir + "/"
+        if INPUT_FOLDER == "TEMP": INPUT_FOLDER = temp_dir + "/"
+###
+# LOGGING OPTIONS
+###
+SAVE_LOGS_TO_CSV = get_or_create_env_var('SAVE_LOGS_TO_CSV', 'True')
+### DYNAMODB logs. Whether to save to DynamoDB, and the headers of the table
+SAVE_LOGS_TO_DYNAMODB = get_or_create_env_var('SAVE_LOGS_TO_DYNAMODB', 'True')
+ACCESS_LOG_DYNAMODB_TABLE_NAME = get_or_create_env_var('ACCESS_LOG_DYNAMODB_TABLE_NAME', f"{CDK_PREFIX}dynamodb-access-log".lower())
+FEEDBACK_LOG_DYNAMODB_TABLE_NAME = get_or_create_env_var('FEEDBACK_LOG_DYNAMODB_TABLE_NAME', f"{CDK_PREFIX}dynamodb-feedback".lower())
+USAGE_LOG_DYNAMODB_TABLE_NAME = get_or_create_env_var('USAGE_LOG_DYNAMODB_TABLE_NAME', f"{CDK_PREFIX}dynamodb-usage".lower())
+###
+# REDACTION OPTIONS
+###
+# Get some environment variables and Launch the Gradio app
+COGNITO_AUTH = get_or_create_env_var('COGNITO_AUTH', '0')
+GRADIO_SERVER_PORT = int(get_or_create_env_var('GRADIO_SERVER_PORT', '7860'))
+###
+# WHOLE DOCUMENT API OPTIONS
+###
+DAYS_TO_DISPLAY_WHOLE_DOCUMENT_JOBS = get_or_create_env_var('DAYS_TO_DISPLAY_WHOLE_DOCUMENT_JOBS', '7') # How many days into the past should whole document Textract jobs be displayed? After that, the data is not deleted from the Textract jobs csv, but it is just filtered out. Included to align with S3 buckets where the file outputs will be automatically deleted after X days.

cdk/cdk_functions.py ADDED Viewed

	@@ -0,0 +1,1293 @@

+import boto3
+from botocore.exceptions import ClientError
+import json
+import os
+import pandas as pd
+import ipaddress
+from constructs import Construct
+from dotenv import set_key
+from typing import List, Tuple, Optional, Dict, Any
+from aws_cdk import (
+    App,
+    CfnTag,
+    aws_ec2 as ec2,
+    aws_wafv2 as wafv2,
+    aws_elasticloadbalancingv2 as elb,
+    aws_elasticloadbalancingv2_actions as elb_act,
+    aws_certificatemanager as acm, # You might need this if you were looking up a cert, but not strictly for ARN
+    aws_cognito as cognito,
+    aws_iam as iam,
+    CfnOutput,
+    Tags
+)
+from cdk_config import PUBLIC_SUBNETS_TO_USE, PRIVATE_SUBNETS_TO_USE, PUBLIC_SUBNET_CIDR_BLOCKS, PRIVATE_SUBNET_CIDR_BLOCKS, PUBLIC_SUBNET_AVAILABILITY_ZONES, PRIVATE_SUBNET_AVAILABILITY_ZONES, POLICY_FILE_LOCATIONS, NAT_GATEWAY_EIP_NAME, S3_LOG_CONFIG_BUCKET_NAME, S3_OUTPUT_BUCKET_NAME, ACCESS_LOG_DYNAMODB_TABLE_NAME, FEEDBACK_LOG_DYNAMODB_TABLE_NAME, USAGE_LOG_DYNAMODB_TABLE_NAME, AWS_REGION
+# --- Function to load context from file ---
+def load_context_from_file(app: App, file_path: str):
+    if os.path.exists(file_path):
+        with open(file_path, 'r') as f:
+            context_data = json.load(f)
+            for key, value in context_data.items():
+                app.node.set_context(key, value)
+            print(f"Loaded context from {file_path}")
+    else:
+        print(f"Context file not found: {file_path}")
+# --- Helper to parse environment variables into lists ---
+def _get_env_list(env_var_name: str) -> List[str]:
+    """Parses a comma-separated environment variable into a list of strings."""
+    value = env_var_name[1:-1].strip().replace('\"', '').replace("\'","")
+    if not value:
+        return []
+    # Split by comma and filter out any empty strings that might result from extra commas
+    return [s.strip() for s in value.split(',') if s.strip()]
+# 1. Try to load CIDR/AZs from environment variables
+if PUBLIC_SUBNETS_TO_USE: PUBLIC_SUBNETS_TO_USE = _get_env_list(PUBLIC_SUBNETS_TO_USE)
+if PRIVATE_SUBNETS_TO_USE: PRIVATE_SUBNETS_TO_USE = _get_env_list(PRIVATE_SUBNETS_TO_USE)
+if PUBLIC_SUBNET_CIDR_BLOCKS: PUBLIC_SUBNET_CIDR_BLOCKS = _get_env_list("PUBLIC_SUBNET_CIDR_BLOCKS")
+if PUBLIC_SUBNET_AVAILABILITY_ZONES: PUBLIC_SUBNET_AVAILABILITY_ZONES = _get_env_list("PUBLIC_SUBNET_AVAILABILITY_ZONES")
+if PRIVATE_SUBNET_CIDR_BLOCKS: PRIVATE_SUBNET_CIDR_BLOCKS = _get_env_list("PRIVATE_SUBNET_CIDR_BLOCKS")
+if PRIVATE_SUBNET_AVAILABILITY_ZONES: PRIVATE_SUBNET_AVAILABILITY_ZONES = _get_env_list("PRIVATE_SUBNET_AVAILABILITY_ZONES")
+if POLICY_FILE_LOCATIONS: POLICY_FILE_LOCATIONS = _get_env_list(POLICY_FILE_LOCATIONS)
+def check_for_existing_role(role_name:str):
+    try:
+        iam = boto3.client('iam')
+        #iam.get_role(RoleName=role_name)
+        response = iam.get_role(RoleName=role_name)
+        role = response['Role']['Arn']
+        print("Response Role:", role)
+        return True, role, ""
+    except iam.exceptions.NoSuchEntityException:
+        return False, "", ""
+    except Exception as e:
+        raise Exception("Getting information on IAM role failed due to:", e)
+import json
+from typing import List, Dict, Any, Union, Optional
+from aws_cdk import (
+    aws_iam as iam,
+)
+from constructs import Construct
+# Assume POLICY_FILE_LOCATIONS is defined globally or passed as a default
+# For example:
+# POLICY_FILE_LOCATIONS = ["./policies/my_read_policy.json", "./policies/my_write_policy.json"]
+def add_statement_to_policy(role: iam.IRole, policy_document: Dict[str, Any]):
+    """
+    Adds individual policy statements from a parsed policy document to a CDK Role.
+    Args:
+        role: The CDK Role construct to attach policies to.
+        policy_document: A Python dictionary representing an IAM policy document.
+    """
+    # Ensure the loaded JSON is a valid policy document structure
+    if 'Statement' not in policy_document or not isinstance(policy_document['Statement'], list):
+        print(f"Warning: Policy document does not contain a 'Statement' list. Skipping.")
+        return # Do not return role, just log and exit
+    for statement_dict in policy_document['Statement']:
+        try:
+            # Create a CDK PolicyStatement from the dictionary
+            cdk_policy_statement = iam.PolicyStatement.from_json(statement_dict)
+            # Add the policy statement to the role
+            role.add_to_policy(cdk_policy_statement)
+            print(f"  - Added statement: {statement_dict.get('Sid', 'No Sid')}")
+        except Exception as e:
+            print(f"Warning: Could not process policy statement: {statement_dict}. Error: {e}")
+def add_custom_policies(
+    scope: Construct, # Not strictly used here, but good practice if you expand to ManagedPolicies
+    role: iam.IRole,
+    policy_file_locations: Optional[List[str]] = None,
+    custom_policy_text: Optional[str] = None
+) -> iam.IRole:
+    """
+    Loads custom policies from JSON files or a string and attaches them to a CDK Role.
+    Args:
+        scope: The scope in which to define constructs (if needed, e.g., for iam.ManagedPolicy).
+        role: The CDK Role construct to attach policies to.
+        policy_file_locations: List of file paths to JSON policy documents.
+        custom_policy_text: A JSON string representing a policy document.
+    Returns:
+        The modified CDK Role construct.
+    """
+    if policy_file_locations is None:
+        policy_file_locations = []
+    current_source = "unknown source" # For error messages
+    try:
+        if policy_file_locations:
+            print(f"Attempting to add policies from files to role {role.node.id}...")
+            for path in policy_file_locations:
+                current_source = f"file: {path}"
+                try:
+                    with open(path, 'r') as f:
+                        policy_document = json.load(f)
+                    print(f"Processing policy from {current_source}...")
+                    add_statement_to_policy(role, policy_document)
+                except FileNotFoundError:
+                    print(f"Warning: Policy file not found at {path}. Skipping.")
+                except json.JSONDecodeError as e:
+                    print(f"Warning: Invalid JSON in policy file {path}: {e}. Skipping.")
+                except Exception as e:
+                    print(f"An unexpected error occurred processing policy from {path}: {e}. Skipping.")
+        if custom_policy_text:
+            current_source = "custom policy text string"
+            print(f"Attempting to add policy from custom text to role {role.node.id}...")
+            try:
+                # *** FIX: Parse the JSON string into a Python dictionary ***
+                policy_document = json.loads(custom_policy_text)
+                print(f"Processing policy from {current_source}...")
+                add_statement_to_policy(role, policy_document)
+            except json.JSONDecodeError as e:
+                print(f"Warning: Invalid JSON in custom_policy_text: {e}. Skipping.")
+            except Exception as e:
+                print(f"An unexpected error occurred processing policy from custom_policy_text: {e}. Skipping.")
+        # You might want a final success message, but individual processing messages are also good.
+        print(f"Finished processing custom policies for role {role.node.id}.")
+    except Exception as e:
+        print(f"An unhandled error occurred during policy addition for {current_source}: {e}")
+    return role
+# Import the S3 Bucket class if you intend to return a CDK object later
+# from aws_cdk import aws_s3 as s3
+def check_s3_bucket_exists(bucket_name: str): # Return type hint depends on what you return
+    """
+    Checks if an S3 bucket with the given name exists and is accessible.
+    Args:
+        bucket_name: The name of the S3 bucket to check.
+    Returns:
+        A tuple: (bool indicating existence, optional S3 Bucket object or None)
+        Note: Returning a Boto3 S3 Bucket object from here is NOT ideal
+              for direct use in CDK. You'll likely only need the boolean result
+              or the bucket name for CDK lookups/creations.
+              For this example, let's return the boolean and the name.
+    """
+    s3_client = boto3.client('s3')
+    try:
+        # Use head_bucket to check for existence and access
+        s3_client.head_bucket(Bucket=bucket_name)
+        print(f"Bucket '{bucket_name}' exists and is accessible.")
+        return True, bucket_name # Return True and the bucket name
+    except ClientError as e:
+        # If a ClientError occurs, check the error code.
+        # '404' means the bucket does not exist.
+        # '403' means the bucket exists but you don't have permission.
+        error_code = e.response['Error']['Code']
+        if error_code == '404':
+            print(f"Bucket '{bucket_name}' does not exist.")
+            return False, None
+        elif error_code == '403':
+             # The bucket exists, but you can't access it.
+             # Depending on your requirements, this might be treated as "exists"
+             # or "not accessible for our purpose". For checking existence,
+             # we'll say it exists here, but note the permission issue.
+             # NOTE - when I tested this, it was returning 403 even for buckets that don't exist. So I will return False instead
+            print(f"Bucket '{bucket_name}' returned 403, which indicates it may exist but is not accessible due to permissions, or that it doesn't exist. Returning False for existence just in case.")
+            return False, bucket_name # It exists, even if not accessible
+        else:
+            # For other errors, it's better to raise the exception
+            # to indicate something unexpected happened.
+            print(f"An unexpected AWS ClientError occurred checking bucket '{bucket_name}': {e}")
+            # Decide how to handle other errors - raising might be safer
+            raise # Re-raise the original exception
+    except Exception as e:
+        print(f"An unexpected non-ClientError occurred checking bucket '{bucket_name}': {e}")
+        # Decide how to handle other errors
+        raise # Re-raise the original exception
+# Example usage in your check_resources.py:
+# exists, bucket_name_if_exists = check_s3_bucket_exists(log_bucket_name)
+# context_data[f"exists:{log_bucket_name}"] = exists
+# # You don't necessarily need to store the name in context if using from_bucket_name
+# Delete an S3 bucket
+def delete_s3_bucket(bucket_name:str):
+    s3 = boto3.client('s3')
+    try:
+        # List and delete all objects
+        response = s3.list_object_versions(Bucket=bucket_name)
+        versions = response.get('Versions', []) + response.get('DeleteMarkers', [])
+        for version in versions:
+            s3.delete_object(Bucket=bucket_name, Key=version['Key'], VersionId=version['VersionId'])
+        # Delete the bucket
+        s3.delete_bucket(Bucket=bucket_name)
+        return {'Status': 'SUCCESS'}
+    except Exception as e:
+        return {'Status': 'FAILED', 'Reason': str(e)}
+# Function to get subnet ID from subnet name
+def get_subnet_id(vpc:str, ec2_client:str, subnet_name:str):
+    response = ec2_client.describe_subnets(Filters=[{'Name': 'vpc-id', 'Values': [vpc.vpc_id]}])
+    for subnet in response['Subnets']:
+        if subnet['Tags'] and any(tag['Key'] == 'Name' and tag['Value'] == subnet_name for tag in subnet['Tags']):
+            return subnet['SubnetId']
+    return None
+def check_ecr_repo_exists(repo_name: str) -> tuple[bool, dict]:
+    """
+    Checks if an ECR repository with the given name exists.
+    Args:
+        repo_name: The name of the ECR repository to check.
+    Returns:
+        True if the repository exists, False otherwise.
+    """
+    ecr_client = boto3.client('ecr')
+    try:
+        print("ecr repo_name to check:", repo_name)
+        response = ecr_client.describe_repositories(repositoryNames=[repo_name])
+        # If describe_repositories succeeds and returns a list of repositories,
+        # and the list is not empty, the repository exists.
+        return len(response['repositories']) > 0, response['repositories'][0]
+    except ClientError as e:
+        # Check for the specific error code indicating the repository doesn't exist
+        if e.response['Error']['Code'] == 'RepositoryNotFoundException':
+            return False, {}
+        else:
+            # Re-raise other exceptions to handle unexpected errors
+            raise
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+        return False, {}
+def check_codebuild_project_exists(project_name: str): # Adjust return type hint as needed
+    """
+    Checks if a CodeBuild project with the given name exists.
+    Args:
+        project_name: The name of the CodeBuild project to check.
+    Returns:
+        A tuple:
+        - The first element is True if the project exists, False otherwise.
+        - The second element is the project object (dictionary) if found,
+          None otherwise.
+    """
+    codebuild_client = boto3.client('codebuild')
+    try:
+        # Use batch_get_projects with a list containing the single project name
+        response = codebuild_client.batch_get_projects(names=[project_name])
+        # The response for batch_get_projects includes 'projects' (found)
+        # and 'projectsNotFound' (not found).
+        if response['projects']:
+            # If the project is found in the 'projects' list
+            print(f"CodeBuild project '{project_name}' found.")
+            return True, response['projects'][0]['arn']  # Return True and the project details dict
+        elif response['projectsNotFound'] and project_name in response['projectsNotFound']:
+             # If the project name is explicitly in the 'projectsNotFound' list
+             print(f"CodeBuild project '{project_name}' not found.")
+             return False, None
+        else:
+            # This case is less expected for a single name lookup,
+            # but could happen if there's an internal issue or the response
+            # structure is slightly different than expected for an error.
+            # It's safer to assume it wasn't found if not in 'projects'.
+            print(f"CodeBuild project '{project_name}' not found (not in 'projects' list).")
+            return False, None
+    except ClientError as e:
+        # Catch specific ClientErrors. batch_get_projects might not throw
+        # 'InvalidInputException' for a non-existent project name if the
+        # name format is valid. It typically just lists it in projectsNotFound.
+        # However, other ClientErrors are possible (e.g., permissions).
+        print(f"An AWS ClientError occurred checking CodeBuild project '{project_name}': {e}")
+        # Decide how to handle other ClientErrors - raising might be safer
+        raise # Re-raise the original exception
+    except Exception as e:
+        print(f"An unexpected non-ClientError occurred checking CodeBuild project '{project_name}': {e}")
+        # Decide how to handle other errors
+        raise # Re-raise the original exception
+def get_vpc_id_by_name(vpc_name: str) -> Optional[str]:
+    """
+    Finds a VPC ID by its 'Name' tag.
+    """
+    ec2_client = boto3.client('ec2')
+    try:
+        response = ec2_client.describe_vpcs(
+            Filters=[
+                {'Name': 'tag:Name', 'Values': [vpc_name]}
+            ]
+        )
+        if response and response['Vpcs']:
+            vpc_id = response['Vpcs'][0]['VpcId']
+            print(f"VPC '{vpc_name}' found with ID: {vpc_id}")
+            # In get_vpc_id_by_name, after finding VPC ID:
+            # Look for NAT Gateways in this VPC
+            ec2_client = boto3.client('ec2')
+            nat_gateways = []
+            try:
+                response = ec2_client.describe_nat_gateways(
+                    Filters=[
+                        {'Name': 'vpc-id', 'Values': [vpc_id]},
+                        # Optional: Add a tag filter if you consistently tag your NATs
+                        # {'Name': 'tag:Name', 'Values': [f"{prefix}-nat-gateway"]}
+                    ]
+                )
+                nat_gateways = response.get('NatGateways', [])
+            except Exception as e:
+                print(f"Warning: Could not describe NAT Gateways in VPC '{vpc_id}': {e}")
+                # Decide how to handle this error - proceed or raise?
+            # Decide how to identify the specific NAT Gateway you want to check for.
+            return vpc_id, nat_gateways
+        else:
+            print(f"VPC '{vpc_name}' not found.")
+            return None
+    except Exception as e:
+        print(f"An unexpected error occurred finding VPC '{vpc_name}': {e}")
+        raise
+# --- Helper to fetch all existing subnets in a VPC once ---
+def _get_existing_subnets_in_vpc(vpc_id: str) -> Dict[str, Any]:
+    """
+    Fetches all subnets in a given VPC.
+    Returns a dictionary with 'by_name' (map of name to subnet data),
+    'by_id' (map of id to subnet data), and 'cidr_networks' (list of ipaddress.IPv4Network).
+    """
+    ec2_client = boto3.client('ec2')
+    existing_subnets_data = {
+        "by_name": {},  # {subnet_name: {'id': 'subnet-id', 'cidr': 'x.x.x.x/x'}}
+        "by_id": {},    # {subnet_id: {'name': 'subnet-name', 'cidr': 'x.x.x.x/x'}}
+        "cidr_networks": [] # List of ipaddress.IPv4Network objects
+    }
+    try:
+        response = ec2_client.describe_subnets(Filters=[{'Name': 'vpc-id', 'Values': [vpc_id]}])
+        for s in response.get('Subnets', []):
+            subnet_id = s['SubnetId']
+            cidr_block = s.get('CidrBlock')
+            # Extract 'Name' tag, which is crucial for lookup by name
+            name_tag = next((tag['Value'] for tag in s.get('Tags', []) if tag['Key'] == 'Name'), None)
+            subnet_info = {'id': subnet_id, 'cidr': cidr_block, 'name': name_tag}
+            if name_tag:
+                existing_subnets_data["by_name"][name_tag] = subnet_info
+            existing_subnets_data["by_id"][subnet_id] = subnet_info
+            if cidr_block:
+                try:
+                    existing_subnets_data["cidr_networks"].append(ipaddress.ip_network(cidr_block, strict=False))
+                except ValueError:
+                    print(f"Warning: Existing subnet {subnet_id} has an invalid CIDR: {cidr_block}. Skipping for overlap check.")
+        print(f"Fetched {len(response.get('Subnets', []))} existing subnets from VPC '{vpc_id}'.")
+    except Exception as e:
+        print(f"Error describing existing subnets in VPC '{vpc_id}': {e}. Cannot perform full validation.")
+        raise # Re-raise if this essential step fails
+    return existing_subnets_data
+# --- Modified validate_subnet_creation_parameters to take pre-fetched data ---
+def validate_subnet_creation_parameters(
+    vpc_id: str,
+    proposed_subnets_data: List[Dict[str, str]], # e.g., [{'name': 'my-public-subnet', 'cidr': '10.0.0.0/24', 'az': 'us-east-1a'}]
+    existing_aws_subnets_data: Dict[str, Any] # Pre-fetched data from _get_existing_subnets_in_vpc
+) -> None:
+    """
+    Validates proposed subnet names and CIDR blocks against existing AWS subnets
+    in the specified VPC and against each other.
+    This function uses pre-fetched AWS subnet data.
+    Args:
+        vpc_id: The ID of the VPC (for logging/error messages).
+        proposed_subnets_data: A list of dictionaries, where each dict represents
+                               a proposed subnet with 'name', 'cidr', and 'az'.
+        existing_aws_subnets_data: Dictionary containing existing AWS subnet data
+                                   (e.g., from _get_existing_subnets_in_vpc).
+    Raises:
+        ValueError: If any proposed subnet name or CIDR block
+                    conflicts with existing AWS resources or other proposed resources.
+    """
+    if not proposed_subnets_data:
+        print("No proposed subnet data provided for validation. Skipping.")
+        return
+    print(f"--- Starting pre-synth validation for VPC '{vpc_id}' with proposed subnets ---")
+    print("Existing subnet data:", pd.DataFrame(existing_aws_subnets_data['by_name']))
+    existing_aws_subnet_names = set(existing_aws_subnets_data["by_name"].keys())
+    existing_aws_cidr_networks = existing_aws_subnets_data["cidr_networks"]
+    # Sets to track names and list to track networks for internal batch consistency
+    proposed_names_seen: set[str] = set()
+    proposed_cidr_networks_seen: List[ipaddress.IPv4Network] = []
+    for i, proposed_subnet in enumerate(proposed_subnets_data):
+        subnet_name = proposed_subnet.get('name')
+        cidr_block_str = proposed_subnet.get('cidr')
+        availability_zone = proposed_subnet.get('az')
+        if not all([subnet_name, cidr_block_str, availability_zone]):
+            raise ValueError(f"Proposed subnet at index {i} is incomplete. Requires 'name', 'cidr', and 'az'.")
+        # 1. Check for duplicate names within the proposed batch
+        if subnet_name in proposed_names_seen:
+            raise ValueError(f"Proposed subnet name '{subnet_name}' is duplicated within the input list.")
+        proposed_names_seen.add(subnet_name)
+        # 2. Check for duplicate names against existing AWS subnets
+        if subnet_name in existing_aws_subnet_names:
+            print(f"Proposed subnet name '{subnet_name}' already exists in VPC '{vpc_id}'.")
+        # Parse proposed CIDR
+        try:
+            proposed_net = ipaddress.ip_network(cidr_block_str, strict=False)
+        except ValueError as e:
+            raise ValueError(f"Invalid CIDR format '{cidr_block_str}' for proposed subnet '{subnet_name}': {e}")
+        # 3. Check for overlapping CIDRs within the proposed batch
+        for existing_proposed_net in proposed_cidr_networks_seen:
+            if proposed_net.overlaps(existing_proposed_net):
+                raise ValueError(
+                    f"Proposed CIDR '{cidr_block_str}' for subnet '{subnet_name}' "
+                    f"overlaps with another proposed CIDR '{str(existing_proposed_net)}' "
+                    f"within the same batch."
+                )
+        # 4. Check for overlapping CIDRs against existing AWS subnets
+        for existing_aws_net in existing_aws_cidr_networks:
+            if proposed_net.overlaps(existing_aws_net):
+                raise ValueError(
+                    f"Proposed CIDR '{cidr_block_str}' for subnet '{subnet_name}' "
+                    f"overlaps with an existing AWS subnet CIDR '{str(existing_aws_net)}' "
+                    f"in VPC '{vpc_id}'."
+                )
+        # If all checks pass for this subnet, add its network to the list for subsequent checks
+        proposed_cidr_networks_seen.append(proposed_net)
+        print(f"Validation successful for proposed subnet '{subnet_name}' with CIDR '{cidr_block_str}'.")
+    print(f"--- All proposed subnets passed pre-synth validation checks for VPC '{vpc_id}'. ---")
+# --- Modified check_subnet_exists_by_name (Uses pre-fetched data) ---
+def check_subnet_exists_by_name(
+    subnet_name: str,
+    existing_aws_subnets_data: Dict[str, Any]
+) -> Tuple[bool, Optional[str]]:
+    """
+    Checks if a subnet with the given name exists within the pre-fetched data.
+    Args:
+        subnet_name: The 'Name' tag value of the subnet to check.
+        existing_aws_subnets_data: Dictionary containing existing AWS subnet data
+                                   (e.g., from _get_existing_subnets_in_vpc).
+    Returns:
+        A tuple:
+        - The first element is True if the subnet exists, False otherwise.
+        - The second element is the Subnet ID if found, None otherwise.
+    """
+    subnet_info = existing_aws_subnets_data["by_name"].get(subnet_name)
+    if subnet_info:
+        print(f"Subnet '{subnet_name}' found with ID: {subnet_info['id']}")
+        return True, subnet_info['id']
+    else:
+        print(f"Subnet '{subnet_name}' not found.")
+        return False, None
+def create_nat_gateway(
+    scope: Construct,
+    public_subnet_for_nat: ec2.ISubnet, # Expects a proper ISubnet
+    nat_gateway_name: str,
+    nat_gateway_id_context_key: str
+) -> str:
+    """
+    Creates a single NAT Gateway in the specified public subnet.
+    It does not handle lookup from context; the calling stack should do that.
+    Returns the CloudFormation Ref of the NAT Gateway ID.
+    """
+    print(f"Defining a new NAT Gateway '{nat_gateway_name}' in subnet '{public_subnet_for_nat.subnet_id}'.")
+    # Create an Elastic IP for the NAT Gateway
+    eip = ec2.CfnEIP(scope, NAT_GATEWAY_EIP_NAME,
+        tags=[CfnTag(key="Name", value=NAT_GATEWAY_EIP_NAME)]
+    )
+    # Create the NAT Gateway
+    nat_gateway_logical_id = nat_gateway_name.replace('-', '') + "NatGateway"
+    nat_gateway = ec2.CfnNatGateway(scope, nat_gateway_logical_id,
+        subnet_id=public_subnet_for_nat.subnet_id,  # Associate with the public subnet
+        allocation_id=eip.attr_allocation_id,       # Associate with the EIP
+        tags=[CfnTag(key="Name", value=nat_gateway_name)]
+    )
+    # The NAT GW depends on the EIP. The dependency on the subnet is implicit via subnet_id.
+    nat_gateway.add_dependency(eip)
+    # *** CRUCIAL: Use CfnOutput to export the ID after deployment ***
+    # This is how you will get the ID to put into cdk.context.json
+    CfnOutput(scope, "SingleNatGatewayIdOutput",
+        value=nat_gateway.ref,
+        description=f"Physical ID of the Single NAT Gateway. Add this to cdk.context.json under the key '{nat_gateway_id_context_key}'.",
+        export_name=f"{scope.stack_name}-NatGatewayId" # Make export name unique
+    )
+    print(f"CDK: Defined new NAT Gateway '{nat_gateway.ref}'. Its physical ID will be available in the stack outputs after deployment.")
+    # Return the tokenised reference for use within this synthesis
+    return nat_gateway.ref
+def create_subnets(
+    scope: Construct,
+    vpc: ec2.IVpc,
+    prefix: str,
+    subnet_names: List[str],
+    cidr_blocks: List[str],
+    availability_zones: List[str],
+    is_public: bool,
+    internet_gateway_id: Optional[str] = None,
+    single_nat_gateway_id: Optional[str] = None
+) -> Tuple[List[ec2.CfnSubnet], List[ec2.CfnRouteTable]]:
+    """
+    Creates subnets using L2 constructs but returns the underlying L1 Cfn objects
+    for backward compatibility.
+    """
+    # --- Validations remain the same ---
+    if not (len(subnet_names) == len(cidr_blocks) == len(availability_zones) > 0):
+        raise ValueError("Subnet names, CIDR blocks, and Availability Zones lists must be non-empty and match in length.")
+    if is_public and not internet_gateway_id:
+        raise ValueError("internet_gateway_id must be provided for public subnets.")
+    if not is_public and not single_nat_gateway_id:
+        raise ValueError("single_nat_gateway_id must be provided for private subnets when using a single NAT Gateway.")
+    # --- We will populate these lists with the L1 objects to return ---
+    created_subnets: List[ec2.CfnSubnet] = []
+    created_route_tables: List[ec2.CfnRouteTable] = []
+    subnet_type_tag = "public" if is_public else "private"
+    for i, subnet_name in enumerate(subnet_names):
+        logical_id = f"{prefix}{subnet_type_tag.capitalize()}Subnet{i+1}"
+        # 1. Create the L2 Subnet (this is the easy part)
+        subnet = ec2.Subnet(
+            scope,
+            logical_id,
+            vpc_id=vpc.vpc_id,
+            cidr_block=cidr_blocks[i],
+            availability_zone=availability_zones[i],
+            map_public_ip_on_launch=is_public
+        )
+        Tags.of(subnet).add("Name", subnet_name)
+        Tags.of(subnet).add("Type", subnet_type_tag)
+        if is_public:
+            # The subnet's route_table is automatically created by the L2 Subnet construct
+            try:
+                subnet.add_route(
+                    "DefaultInternetRoute", # A logical ID for the CfnRoute resource
+                    router_id=internet_gateway_id,
+                    router_type=ec2.RouterType.GATEWAY,
+                    # destination_cidr_block="0.0.0.0/0" is the default for this method
+                )
+            except Exception as e:
+                print("Could not create IGW route for public subnet due to:", e)
+            print(f"CDK: Defined public L2 subnet '{subnet_name}' and added IGW route.")
+        else:
+            try:
+                # Using .add_route() for private subnets as well for consistency
+                subnet.add_route(
+                    "DefaultNatRoute", # A logical ID for the CfnRoute resource
+                    router_id=single_nat_gateway_id,
+                    router_type=ec2.RouterType.NAT_GATEWAY,
+                )
+            except Exception as e:
+                print("Could not create NAT gateway route for public subnet due to:", e)
+            print(f"CDK: Defined private L2 subnet '{subnet_name}' and added NAT GW route.")
+        route_table = subnet.route_table
+        created_subnets.append(subnet)
+        created_route_tables.append(route_table)
+    return created_subnets, created_route_tables
+def ingress_rule_exists(security_group:str, peer:str, port:str):
+    for rule in security_group.connections.security_groups:
+        if port:
+            if rule.peer == peer and rule.connection == port:
+                return True
+        else:
+            if rule.peer == peer:
+                return True
+    return False
+def check_for_existing_user_pool(user_pool_name:str):
+    cognito_client = boto3.client("cognito-idp")
+    list_pools_response = cognito_client.list_user_pools(MaxResults=60) # MaxResults up to 60
+    # ListUserPools might require pagination if you have more than 60 pools
+    # This simple example doesn't handle pagination, which could miss your pool
+    existing_user_pool_id = ""
+    for pool in list_pools_response.get('UserPools', []):
+        if pool.get('Name') == user_pool_name:
+            existing_user_pool_id = pool['Id']
+            print(f"Found existing user pool by name '{user_pool_name}' with ID: {existing_user_pool_id}")
+            break # Found the one we're looking for
+    if existing_user_pool_id:
+        return True, existing_user_pool_id, pool
+    else:
+        return False, "", ""
+def check_for_existing_user_pool_client(user_pool_id: str, user_pool_client_name: str):
+    """
+    Checks if a Cognito User Pool Client with the given name exists in the specified User Pool.
+    Args:
+        user_pool_id: The ID of the Cognito User Pool.
+        user_pool_client_name: The name of the User Pool Client to check for.
+    Returns:
+        A tuple:
+        - True, client_id, client_details if the client exists.
+        - False, "", {} otherwise.
+    """
+    cognito_client = boto3.client("cognito-idp")
+    next_token = 'string'
+    while True:
+        try:
+            response = cognito_client.list_user_pool_clients(
+                UserPoolId=user_pool_id,
+                MaxResults=60,
+                NextToken=next_token
+            )
+        except cognito_client.exceptions.ResourceNotFoundException:
+            print(f"Error: User pool with ID '{user_pool_id}' not found.")
+            return False, "", {}
+        except cognito_client.exceptions.InvalidParameterException:
+            print(f"Error: No app clients for '{user_pool_id}' found.")
+            return False, "", {}
+        except Exception as e:
+            print("Could not check User Pool clients due to:", e)
+        for client in response.get('UserPoolClients', []):
+            if client.get('ClientName') == user_pool_client_name:
+                print(f"Found existing user pool client '{user_pool_client_name}' with ID: {client['ClientId']}")
+                return True, client['ClientId'], client
+        next_token = response.get('NextToken')
+        if not next_token:
+            break
+    return False, "", {}
+def check_for_secret(secret_name: str, secret_value: dict=""):
+    """
+    Checks if a Secrets Manager secret with the given name exists.
+    If it doesn't exist, it creates the secret.
+    Args:
+        secret_name: The name of the Secrets Manager secret.
+        secret_value: A dictionary containing the key-value pairs for the secret.
+    Returns:
+        True if the secret existed or was created, False otherwise (due to other errors).
+    """
+    secretsmanager_client = boto3.client("secretsmanager")
+    try:
+        # Try to get the secret. If it doesn't exist, a ResourceNotFoundException will be raised.
+        secret_value = secretsmanager_client.get_secret_value(SecretId=secret_name)
+        print(f"Secret '{secret_name}' already exists.")
+        return True, secret_value
+    except secretsmanager_client.exceptions.ResourceNotFoundException:
+        print("Secret not found")
+        return False, {}
+    except Exception as e:
+        # Handle other potential exceptions during the get operation
+        print(f"Error checking for secret '{secret_name}': {e}")
+        return False, {}
+def check_alb_exists(load_balancer_name: str, region_name: str = None) -> tuple[bool, dict]:
+    """
+    Checks if an Application Load Balancer (ALB) with the given name exists.
+    Args:
+        load_balancer_name: The name of the ALB to check.
+        region_name: The AWS region to check in.  If None, uses the default
+                     session region.
+    Returns:
+        A tuple:
+        - The first element is True if the ALB exists, False otherwise.
+        - The second element is the ALB object (dictionary) if found,
+          None otherwise.  Specifically, it returns the first element of
+          the LoadBalancers list from the describe_load_balancers response.
+    """
+    if region_name:
+        elbv2_client = boto3.client('elbv2', region_name=region_name)
+    else:
+        elbv2_client = boto3.client('elbv2')
+    try:
+        response = elbv2_client.describe_load_balancers(Names=[load_balancer_name])
+        if response['LoadBalancers']:
+            return True, response['LoadBalancers'][0]  # Return True and the first ALB object
+        else:
+            return False, {}
+    except ClientError as e:
+        #  If the error indicates the ALB doesn't exist, return False
+        if e.response['Error']['Code'] == 'LoadBalancerNotFound':
+            return False, {}
+        else:
+            # Re-raise other exceptions
+            raise
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+        return False, {}
+def check_fargate_task_definition_exists(task_definition_name: str, region_name: str = None) -> tuple[bool, dict]:
+    """
+    Checks if a Fargate task definition with the given name exists.
+    Args:
+        task_definition_name: The name or ARN of the task definition to check.
+        region_name: The AWS region to check in. If None, uses the default
+                     session region.
+    Returns:
+        A tuple:
+        - The first element is True if the task definition exists, False otherwise.
+        - The second element is the task definition object (dictionary) if found,
+          None otherwise.  Specifically, it returns the first element of the
+          taskDefinitions list from the describe_task_definition response.
+    """
+    if region_name:
+        ecs_client = boto3.client('ecs', region_name=region_name)
+    else:
+        ecs_client = boto3.client('ecs')
+    try:
+        response = ecs_client.describe_task_definition(taskDefinition=task_definition_name)
+        # If describe_task_definition succeeds, it returns the task definition.
+        # We can directly return True and the task definition.
+        return True, response['taskDefinition']
+    except ClientError as e:
+        # Check for the error code indicating the task definition doesn't exist.
+        if e.response['Error']['Code'] == 'ClientException' and 'Task definition' in e.response['Message'] and 'does not exist' in e.response['Message']:
+            return False, {}
+        else:
+            # Re-raise other exceptions.
+            raise
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+        return False, {}
+def check_ecs_service_exists(cluster_name: str, service_name: str, region_name: str = None) -> tuple[bool, dict]:
+    """
+    Checks if an ECS service with the given name exists in the specified cluster.
+    Args:
+        cluster_name: The name or ARN of the ECS cluster.
+        service_name: The name of the ECS service to check.
+        region_name: The AWS region to check in. If None, uses the default
+                     session region.
+    Returns:
+        A tuple:
+        - The first element is True if the service exists, False otherwise.
+        - The second element is the service object (dictionary) if found,
+          None otherwise.
+    """
+    if region_name:
+        ecs_client = boto3.client('ecs', region_name=region_name)
+    else:
+        ecs_client = boto3.client('ecs')
+    try:
+        response = ecs_client.describe_services(cluster=cluster_name, services=[service_name])
+        if response['services']:
+            return True, response['services'][0]  # Return True and the first service object
+        else:
+            return False, {}
+    except ClientError as e:
+        # Check for the error code indicating the service doesn't exist.
+        if e.response['Error']['Code'] == 'ClusterNotFoundException':
+            return False, {}
+        elif e.response['Error']['Code'] == 'ServiceNotFoundException':
+            return False, {}
+        else:
+            # Re-raise other exceptions.
+            raise
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+        return False, {}
+def check_cloudfront_distribution_exists(distribution_name: str, region_name: str = None) -> tuple[bool, dict | None]:
+    """
+    Checks if a CloudFront distribution with the given name exists.
+    Args:
+        distribution_name: The name of the CloudFront distribution to check.
+        region_name: The AWS region to check in. If None, uses the default
+                     session region.  Note: CloudFront is a global service,
+                     so the region is usually 'us-east-1', but this parameter
+                     is included for completeness.
+    Returns:
+        A tuple:
+        - The first element is True if the distribution exists, False otherwise.
+        - The second element is the distribution object (dictionary) if found,
+          None otherwise.  Specifically, it returns the first element of the
+          DistributionList from the ListDistributions response.
+    """
+    if region_name:
+        cf_client = boto3.client('cloudfront', region_name=region_name)
+    else:
+        cf_client = boto3.client('cloudfront')
+    try:
+        response = cf_client.list_distributions()
+        if 'Items' in response['DistributionList']:
+            for distribution in response['DistributionList']['Items']:
+                # CloudFront doesn't directly filter by name, so we have to iterate.
+                if distribution['AliasSet']['Items'] and distribution['AliasSet']['Items'][0] == distribution_name:
+                    return True, distribution
+            return False, None
+        else:
+            return False, None
+    except ClientError as e:
+        #  If the error indicates the Distribution doesn't exist, return False
+        if e.response['Error']['Code'] == 'NoSuchDistribution':
+            return False, None
+        else:
+            # Re-raise other exceptions
+            raise
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+        return False, None
+def create_web_acl_with_common_rules(scope:Construct, web_acl_name: str, waf_scope:str="CLOUDFRONT"):
+    '''
+    Use CDK to create a web ACL based on an AWS common rule set with overrides.
+    This function now expects a 'scope' argument, typically 'self' from your stack,
+    as CfnWebACL requires a construct scope.
+    '''
+    # Create full list of rules
+    rules = []
+    aws_ruleset_names = [
+        "AWSManagedRulesCommonRuleSet",
+        "AWSManagedRulesKnownBadInputsRuleSet",
+        "AWSManagedRulesAmazonIpReputationList"
+    ]
+    # Use a separate counter to assign unique priorities sequentially
+    priority_counter = 1
+    for aws_rule_name in aws_ruleset_names:
+        current_rule_action_overrides = None
+        # All managed rule groups need an override_action.
+        # 'none' means use the managed rule group's default action.
+        current_override_action = wafv2.CfnWebACL.OverrideActionProperty(none={})
+        current_priority = priority_counter
+        priority_counter += 1
+        if aws_rule_name == "AWSManagedRulesCommonRuleSet":
+            current_rule_action_overrides = [
+                wafv2.CfnWebACL.RuleActionOverrideProperty(
+                    name="SizeRestrictions_BODY",
+                    action_to_use=wafv2.CfnWebACL.RuleActionProperty(
+                        allow={}
+                    )
+                )
+            ]
+            # No need to set current_override_action here, it's already set above.
+            # If you wanted this specific rule to have a *fixed* priority, you'd handle it differently
+            # For now, it will get priority 1 from the counter.
+        rule_property = wafv2.CfnWebACL.RuleProperty(
+            name=aws_rule_name,
+            priority=current_priority,
+            statement=wafv2.CfnWebACL.StatementProperty(
+                managed_rule_group_statement=wafv2.CfnWebACL.ManagedRuleGroupStatementProperty(
+                    vendor_name="AWS",
+                    name=aws_rule_name,
+                    rule_action_overrides=current_rule_action_overrides
+                )
+            ),
+            visibility_config=wafv2.CfnWebACL.VisibilityConfigProperty(
+                cloud_watch_metrics_enabled=True,
+                metric_name=aws_rule_name,
+                sampled_requests_enabled=True
+            ),
+            override_action=current_override_action # THIS IS THE CRUCIAL PART FOR ALL MANAGED RULES
+        )
+        rules.append(rule_property)
+    # Add the rate limit rule
+    rate_limit_priority = priority_counter # Use the next available priority
+    rules.append(wafv2.CfnWebACL.RuleProperty(
+        name="RateLimitRule",
+        priority=rate_limit_priority,
+        statement=wafv2.CfnWebACL.StatementProperty(
+            rate_based_statement=wafv2.CfnWebACL.RateBasedStatementProperty(
+                limit=1000,
+                aggregate_key_type="IP"
+            )
+        ),
+        visibility_config=wafv2.CfnWebACL.VisibilityConfigProperty(
+            cloud_watch_metrics_enabled=True,
+            metric_name="RateLimitRule",
+            sampled_requests_enabled=True
+        ),
+        action=wafv2.CfnWebACL.RuleActionProperty(
+            block={}
+        )
+    ))
+    web_acl = wafv2.CfnWebACL(
+        scope,
+        "WebACL",
+        name=web_acl_name,
+        default_action=wafv2.CfnWebACL.DefaultActionProperty(allow={}),
+        scope=waf_scope,
+        visibility_config=wafv2.CfnWebACL.VisibilityConfigProperty(
+            cloud_watch_metrics_enabled=True,
+            metric_name="webACL",
+            sampled_requests_enabled=True
+        ),
+        rules=rules
+    )
+    CfnOutput(scope, "WebACLArn", value=web_acl.attr_arn)
+    return web_acl
+def check_web_acl_exists(web_acl_name: str, scope: str, region_name: str = None) -> tuple[bool, dict]:
+    """
+    Checks if a Web ACL with the given name and scope exists.
+    Args:
+        web_acl_name: The name of the Web ACL to check.
+        scope: The scope of the Web ACL ('CLOUDFRONT' or 'REGIONAL').
+        region_name: The AWS region to check in. Required for REGIONAL scope.
+                     If None, uses the default session region.  For CLOUDFRONT,
+                     the region should be 'us-east-1'.
+    Returns:
+        A tuple:
+        - The first element is True if the Web ACL exists, False otherwise.
+        - The second element is the Web ACL object (dictionary) if found,
+          None otherwise.
+    """
+    if scope not in ['CLOUDFRONT', 'REGIONAL']:
+        raise ValueError("Scope must be either 'CLOUDFRONT' or 'REGIONAL'")
+    if scope == 'REGIONAL' and not region_name:
+        raise ValueError("Region name is required for REGIONAL scope")
+    if scope == 'CLOUDFRONT':
+        region_name = 'us-east-1'  # CloudFront scope requires us-east-1
+    if region_name:
+        waf_client = boto3.client('wafv2', region_name=region_name)
+    else:
+        waf_client = boto3.client('wafv2')
+    try:
+        response = waf_client.list_web_acls(Scope=scope)
+        if 'WebACLs' in response:
+            for web_acl in response['WebACLs']:
+                if web_acl['Name'] == web_acl_name:
+                    # Describe the Web ACL to get the full object.
+                    describe_response = waf_client.describe_web_acl(Name=web_acl_name, Scope=scope)
+                    return True, describe_response['WebACL']
+            return False, {}
+        else:
+            return False, {}
+    except ClientError as e:
+        # Check for the error code indicating the web ACL doesn't exist.
+        if e.response['Error']['Code'] == 'ResourceNotFoundException':
+            return False, {}
+        else:
+            # Re-raise other exceptions.
+            raise
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+        return False, {}
+def add_alb_https_listener_with_cert(
+    scope: Construct,
+    logical_id: str, # A unique ID for this listener construct
+    alb: elb.ApplicationLoadBalancer,
+    acm_certificate_arn: Optional[str], # Optional: If None, no HTTPS listener will be created
+    default_target_group: elb.ITargetGroup, # Mandatory: The target group to forward traffic to
+    listener_port_https: int = 443,
+    listener_open_to_internet: bool = False, # Be cautious with True, ensure ALB security group restricts access
+    # --- Cognito Authentication Parameters ---
+    enable_cognito_auth: bool = False,
+    cognito_user_pool: Optional[cognito.IUserPool] = None,
+    cognito_user_pool_client: Optional[cognito.IUserPoolClient] = None,
+    cognito_user_pool_domain: Optional[str] = None, # E.g., "my-app-domain" for "my-app-domain.auth.region.amazoncognito.com"
+    cognito_auth_scope: Optional[str] = "openid profile email", # Default recommended scope
+    cognito_auth_on_unauthenticated_request: elb.UnauthenticatedAction = elb.UnauthenticatedAction.AUTHENTICATE,
+    stickiness_cookie_duration=None
+    # --- End Cognito Parameters ---
+) -> Optional[elb.ApplicationListener]:
+    """
+    Conditionally adds an HTTPS listener to an ALB with an ACM certificate,
+    and optionally enables Cognito User Pool authentication.
+    Args:
+        scope (Construct): The scope in which to define this construct (e.g., your CDK Stack).
+        logical_id (str): A unique logical ID for the listener construct within the stack.
+        alb (elb.ApplicationLoadBalancer): The Application Load Balancer to add the listener to.
+        acm_certificate_arn (Optional[str]): The ARN of the ACM certificate to attach.
+                                             If None, the HTTPS listener will NOT be created.
+        default_target_group (elb.ITargetGroup): The default target group for the listener to forward traffic to.
+                                                 This is mandatory for a functional listener.
+        listener_port_https (int): The HTTPS port to listen on (default: 443).
+        listener_open_to_internet (bool): Whether the listener should allow connections from all sources.
+                                          If False (recommended), ensure your ALB's security group allows
+                                          inbound traffic on this port from desired sources.
+        enable_cognito_auth (bool): Set to True to enable Cognito User Pool authentication.
+        cognito_user_pool (Optional[cognito.IUserPool]): The Cognito User Pool object. Required if enable_cognito_auth is True.
+        cognito_user_pool_client (Optional[cognito.IUserPoolClient]): The Cognito User Pool App Client object. Required if enable_cognito_auth is True.
+        cognito_user_pool_domain (Optional[str]): The domain prefix for your Cognito User Pool. Required if enable_cognito_auth is True.
+        cognito_auth_scope (Optional[str]): The scope for the Cognito authentication.
+        cognito_auth_on_unauthenticated_request (elb.UnauthenticatedAction): Action for unauthenticated requests.
+                                                                           Defaults to AUTHENTICATE (redirect to login).
+    Returns:
+        Optional[elb.ApplicationListener]: The created ApplicationListener if successful,
+                                           None if no ACM certificate ARN was provided.
+    """
+    https_listener = None
+    if acm_certificate_arn:
+        certificates_list = [elb.ListenerCertificate.from_arn(acm_certificate_arn)]
+        print(f"Attempting to add ALB HTTPS listener on port {listener_port_https} with ACM certificate: {acm_certificate_arn}")
+        # Determine the default action based on whether Cognito auth is enabled
+        default_action = None
+        if enable_cognito_auth == True:
+            if not all([cognito_user_pool, cognito_user_pool_client, cognito_user_pool_domain]):
+                raise ValueError(
+                    "Cognito User Pool, Client, and Domain must be provided if enable_cognito_auth is True."
+                )
+            print(f"Enabling Cognito authentication with User Pool: {cognito_user_pool.user_pool_id}")
+            default_action = elb_act.AuthenticateCognitoAction(
+                next=elb.ListenerAction.forward([default_target_group]), # After successful auth, forward to TG
+                user_pool=cognito_user_pool,
+                user_pool_client=cognito_user_pool_client,
+                user_pool_domain=cognito_user_pool_domain,
+                scope=cognito_auth_scope,
+                on_unauthenticated_request=cognito_auth_on_unauthenticated_request,
+                session_timeout=stickiness_cookie_duration
+                # Additional options you might want to configure:
+                # session_cookie_name="AWSELBCookies"
+            )
+        else:
+            default_action = elb.ListenerAction.forward([default_target_group])
+            print("Cognito authentication is NOT enabled for this listener.")
+        # Add the HTTPS listener
+        https_listener = alb.add_listener(
+            logical_id,
+            port=listener_port_https,
+            open=listener_open_to_internet,
+            certificates=certificates_list,
+            default_action=default_action # Use the determined default action
+        )
+        print(f"ALB HTTPS listener on port {listener_port_https} defined.")
+    else:
+        print("ACM_CERTIFICATE_ARN is not provided. Skipping HTTPS listener creation.")
+    return https_listener
+def ensure_folder_exists(output_folder:str):
+    """Checks if the specified folder exists, creates it if not."""
+    if not os.path.exists(output_folder):
+        # Create the folder if it doesn't exist
+        os.makedirs(output_folder, exist_ok=True)
+        print(f"Created the {output_folder} folder.")
+    else:
+        print(f"The {output_folder} folder already exists.")
+def create_basic_config_env(out_dir:str="config", S3_LOG_CONFIG_BUCKET_NAME=S3_LOG_CONFIG_BUCKET_NAME, S3_OUTPUT_BUCKET_NAME=S3_OUTPUT_BUCKET_NAME, ACCESS_LOG_DYNAMODB_TABLE_NAME=ACCESS_LOG_DYNAMODB_TABLE_NAME, FEEDBACK_LOG_DYNAMODB_TABLE_NAME=FEEDBACK_LOG_DYNAMODB_TABLE_NAME, USAGE_LOG_DYNAMODB_TABLE_NAME=USAGE_LOG_DYNAMODB_TABLE_NAME):
+    '''
+    Create a basic config.env file for the user to use with their newly deployed redaction app.
+    '''
+    variables = {
+    'COGNITO_AUTH':'1',
+    'RUN_AWS_FUNCTIONS':'1',
+    'DISPLAY_FILE_NAMES_IN_LOGS':'False',
+    'SESSION_OUTPUT_FOLDER':'True',
+    'SAVE_LOGS_TO_DYNAMODB':'True',
+    'SHOW_COSTS':'True',
+    'SHOW_WHOLE_DOCUMENT_TEXTRACT_CALL_OPTIONS':'True',
+    'LOAD_PREVIOUS_TEXTRACT_JOBS_S3':'True',
+    'DOCUMENT_REDACTION_BUCKET':S3_LOG_CONFIG_BUCKET_NAME,
+    'TEXTRACT_WHOLE_DOCUMENT_ANALYSIS_BUCKET':S3_OUTPUT_BUCKET_NAME,
+    'ACCESS_LOG_DYNAMODB_TABLE_NAME':ACCESS_LOG_DYNAMODB_TABLE_NAME,
+    'FEEDBACK_LOG_DYNAMODB_TABLE_NAME':FEEDBACK_LOG_DYNAMODB_TABLE_NAME,
+    'USAGE_LOG_DYNAMODB_TABLE_NAME':USAGE_LOG_DYNAMODB_TABLE_NAME,
+    'DISPLAY_FILE_NAMES_IN_LOGS':'False'
+    }
+    # Write variables to .env file
+    ensure_folder_exists(out_dir + "/")
+    env_file_path = os.path.abspath(os.path.join(out_dir, 'config.env'))
+    # It's good practice to ensure the file exists before calling set_key repeatedly.
+    # set_key will create it, but for a loop, it might be cleaner to ensure it's empty/exists once.
+    if not os.path.exists(env_file_path):
+        with open(env_file_path, 'w') as f:
+            pass # Create empty file
+    for key, value in variables.items():
+        set_key(env_file_path, key, str(value), quote_mode="never")
+    return variables
+def start_codebuild_build(PROJECT_NAME:str, AWS_REGION:str = AWS_REGION):
+    '''
+    Start an existing Codebuild project build
+    '''
+    # --- Initialize CodeBuild client ---
+    client = boto3.client('codebuild', region_name=AWS_REGION)
+    try:
+        print(f"Attempting to start build for project: {PROJECT_NAME}")
+        response = client.start_build(
+            projectName=PROJECT_NAME
+        )
+        build_id = response['build']['id']
+        print(f"Successfully started build with ID: {build_id}")
+        print(f"Build ARN: {response['build']['arn']}")
+        print(f"Build URL (approximate - construct based on region and ID):")
+        print(f"https://{AWS_REGION}.console.aws.amazon.com/codesuite/codebuild/projects/{PROJECT_NAME}/build/{build_id.split(':')[-1]}/detail")
+        # You can inspect the full response if needed
+        # print("\nFull response:")
+        # import json
+        # print(json.dumps(response, indent=2))
+    except client.exceptions.ResourceNotFoundException:
+        print(f"Error: Project '{PROJECT_NAME}' not found in region '{AWS_REGION}'.")
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+def upload_file_to_s3(local_file_paths:List[str], s3_key:str, s3_bucket:str, RUN_AWS_FUNCTIONS:str = "1"):
+    """
+    Uploads a file from local machine to Amazon S3.
+    Args:
+    - local_file_path: Local file path(s) of the file(s) to upload.
+    - s3_key: Key (path) to the file in the S3 bucket.
+    - s3_bucket: Name of the S3 bucket.
+    Returns:
+    - Message as variable/printed to console
+    """
+    final_out_message = []
+    final_out_message_str = ""
+    if RUN_AWS_FUNCTIONS == "1":
+        try:
+            if s3_bucket and local_file_paths:
+                s3_client = boto3.client('s3', region_name=AWS_REGION)
+                if isinstance(local_file_paths, str):
+                    local_file_paths = [local_file_paths]
+                for file in local_file_paths:
+                    if s3_client:
+                        #print(s3_client)
+                        try:
+                            # Get file name off file path
+                            file_name = os.path.basename(file)
+                            s3_key_full = s3_key + file_name
+                            print("S3 key: ", s3_key_full)
+                            s3_client.upload_file(file, s3_bucket, s3_key_full)
+                            out_message = "File " + file_name + " uploaded successfully!"
+                            print(out_message)
+                        except Exception as e:
+                            out_message = f"Error uploading file(s): {e}"
+                            print(out_message)
+                        final_out_message.append(out_message)
+                        final_out_message_str = '\n'.join(final_out_message)
+                    else: final_out_message_str = "Could not connect to AWS."
+            else: final_out_message_str = "At least one essential variable is empty, could not upload to S3"
+        except Exception as e:
+            final_out_message_str = "Could not upload files to S3 due to: " + str(e)
+            print(final_out_message_str)
+    else:
+        final_out_message_str = "App not set to run AWS functions"
+    return final_out_message_str
+# Initialize ECS client
+def start_ecs_task(cluster_name, service_name):
+    ecs_client = boto3.client('ecs')
+    try:
+        # Update the service to set the desired count to 1
+        response = ecs_client.update_service(
+            cluster=cluster_name,
+            service=service_name,
+            desiredCount=1
+        )
+        return {
+            "statusCode": 200,
+            "body": f"Service {service_name} in cluster {cluster_name} has been updated to 1 task."
+        }
+    except Exception as e:
+        return {
+            "statusCode": 500,
+            "body": f"Error updating service: {str(e)}"
+        }

cdk/cdk_stack.py ADDED Viewed

	@@ -0,0 +1,1317 @@

+import os
+import json # You might still need json if loading task_definition.json
+from typing import List, Dict, Any
+from aws_cdk import (
+    Stack,
+    CfnTag,    # <-- Import CfnTag directly
+    CfnOutput, # <-- Import CfnOutput directly
+    Duration,
+    RemovalPolicy,
+    SecretValue,
+    aws_ec2 as ec2,
+    aws_ecr as ecr,
+    aws_s3 as s3,
+    aws_ecs as ecs,
+    aws_iam as iam,
+    aws_codebuild as codebuild,
+    aws_cognito as cognito,
+    aws_secretsmanager as secretsmanager,
+    aws_cloudfront as cloudfront,
+    aws_cloudfront_origins as origins,
+    aws_elasticloadbalancingv2 as elbv2,
+    aws_logs as logs,
+    aws_wafv2 as wafv2,
+    aws_dynamodb as dynamodb # Import the DynamoDB module
+)
+from constructs import Construct
+from cdk_config import CDK_PREFIX, VPC_NAME, AWS_MANAGED_TASK_ROLES_LIST, GITHUB_REPO_USERNAME, GITHUB_REPO_NAME, GITHUB_REPO_BRANCH, ECS_TASK_MEMORY_SIZE, ECS_TASK_CPU_SIZE, CUSTOM_HEADER, CUSTOM_HEADER_VALUE, AWS_REGION, CLOUDFRONT_GEO_RESTRICTION, DAYS_TO_DISPLAY_WHOLE_DOCUMENT_JOBS, GRADIO_SERVER_PORT, PUBLIC_SUBNETS_TO_USE, PUBLIC_SUBNET_CIDR_BLOCKS, PUBLIC_SUBNET_AVAILABILITY_ZONES, PRIVATE_SUBNETS_TO_USE, PRIVATE_SUBNET_CIDR_BLOCKS, PRIVATE_SUBNET_AVAILABILITY_ZONES, CODEBUILD_PROJECT_NAME, ECS_SECURITY_GROUP_NAME, ALB_NAME_SECURITY_GROUP_NAME, ALB_NAME, COGNITO_USER_POOL_NAME, COGNITO_USER_POOL_CLIENT_NAME, COGNITO_USER_POOL_CLIENT_SECRET_NAME, FARGATE_TASK_DEFINITION_NAME, ECS_SERVICE_NAME, WEB_ACL_NAME, CLOUDFRONT_DISTRIBUTION_NAME, ECS_TASK_ROLE_NAME, ALB_TARGET_GROUP_NAME, S3_LOG_CONFIG_BUCKET_NAME, S3_OUTPUT_BUCKET_NAME, ACM_CERTIFICATE_ARN, CLUSTER_NAME, CODEBUILD_ROLE_NAME, ECS_TASK_EXECUTION_ROLE_NAME, ECR_CDK_REPO_NAME, ECS_LOG_GROUP_NAME, SAVE_LOGS_TO_DYNAMODB, ACCESS_LOG_DYNAMODB_TABLE_NAME, FEEDBACK_LOG_DYNAMODB_TABLE_NAME, USAGE_LOG_DYNAMODB_TABLE_NAME, TASK_DEFINITION_FILE_LOCATION, EXISTING_IGW_ID, SINGLE_NAT_GATEWAY_ID, NAT_GATEWAY_NAME, COGNITO_USER_POOL_DOMAIN_PREFIX, COGNITO_REDIRECTION_URL, AWS_ACCOUNT_ID, ECS_USE_FARGATE_SPOT, ECS_READ_ONLY_FILE_SYSTEM, USE_CLOUDFRONT, LOAD_BALANCER_WEB_ACL_NAME
+from cdk_functions import create_subnets, create_web_acl_with_common_rules, add_custom_policies, add_alb_https_listener_with_cert, create_nat_gateway # Only keep CDK-native functions
+def _get_env_list(env_var_name: str) -> List[str]:
+    """Parses a comma-separated environment variable into a list of strings."""
+    value = env_var_name[1:-1].strip().replace('\"', '').replace("\'","")
+    if not value:
+        return []
+    # Split by comma and filter out any empty strings that might result from extra commas
+    return [s.strip() for s in value.split(',') if s.strip()]
+# 1. Try to load CIDR/AZs from environment variables
+if PUBLIC_SUBNETS_TO_USE: PUBLIC_SUBNETS_TO_USE = _get_env_list(PUBLIC_SUBNETS_TO_USE)
+if PRIVATE_SUBNETS_TO_USE: PRIVATE_SUBNETS_TO_USE = _get_env_list(PRIVATE_SUBNETS_TO_USE)
+if PUBLIC_SUBNET_CIDR_BLOCKS: PUBLIC_SUBNET_CIDR_BLOCKS = _get_env_list("PUBLIC_SUBNET_CIDR_BLOCKS")
+if PUBLIC_SUBNET_AVAILABILITY_ZONES: PUBLIC_SUBNET_AVAILABILITY_ZONES = _get_env_list("PUBLIC_SUBNET_AVAILABILITY_ZONES")
+if PRIVATE_SUBNET_CIDR_BLOCKS: PRIVATE_SUBNET_CIDR_BLOCKS = _get_env_list("PRIVATE_SUBNET_CIDR_BLOCKS")
+if PRIVATE_SUBNET_AVAILABILITY_ZONES: PRIVATE_SUBNET_AVAILABILITY_ZONES = _get_env_list("PRIVATE_SUBNET_AVAILABILITY_ZONES")
+if AWS_MANAGED_TASK_ROLES_LIST: AWS_MANAGED_TASK_ROLES_LIST = _get_env_list(AWS_MANAGED_TASK_ROLES_LIST)
+class CdkStack(Stack):
+    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
+        super().__init__(scope, construct_id, **kwargs)
+# --- Helper to get context values ---
+        def get_context_bool(key: str, default: bool = False) -> bool:
+            return self.node.try_get_context(key) or default
+        def get_context_str(key: str, default: str = None) -> str:
+             return self.node.try_get_context(key) or default
+        def get_context_dict(key: str, default: dict = None) -> dict:
+            return self.node.try_get_context(key) or default
+        def get_context_list_of_dicts(key: str) -> List[Dict[str, Any]]:
+            ctx_value = self.node.try_get_context(key)
+            if not isinstance(ctx_value, list):
+                print(f"Warning: Context key '{key}' not found or not a list. Returning empty list.")
+                return []
+            # Optional: Add validation that all items in the list are dicts
+            return ctx_value
+        # --- VPC and Subnets (Assuming VPC is always lookup, Subnets are created/returned by create_subnets) ---
+        # --- VPC Lookup (Always lookup as per your assumption) ---
+        try:
+            vpc = ec2.Vpc.from_lookup(
+                self,
+                "VPC",
+                vpc_name=VPC_NAME
+            )
+            print("Successfully looked up VPC:", vpc.vpc_id)
+        except Exception as e:
+            raise Exception(f"Could not look up VPC with name '{VPC_NAME}' due to: {e}")
+        # --- Subnet Handling (Check Context and Create/Import) ---
+        # Initialize lists to hold ISubnet objects (L2) and CfnSubnet/CfnRouteTable (L1)
+        # We will store ISubnet for consistency, as CfnSubnet has a .subnet_id property
+        self.public_subnets: List[ec2.ISubnet] = []
+        self.private_subnets: List[ec2.ISubnet] = []
+        # Store L1 CfnRouteTables explicitly if you need to reference them later
+        self.private_route_tables_cfn: List[ec2.CfnRouteTable] = []
+        self.public_route_tables_cfn: List[ec2.CfnRouteTable] = [] # New: to store public RTs
+        names_to_create_private = []
+        names_to_create_public = []
+        if not PUBLIC_SUBNETS_TO_USE and not PRIVATE_SUBNETS_TO_USE:
+            print("Warning: No public or private subnets specified in *_SUBNETS_TO_USE. Attempting to select from existing VPC subnets.")
+            print("vpc.public_subnets:", vpc.public_subnets)
+            print("vpc.private_subnets:", vpc.private_subnets)
+            # public_subnets_by_az: Dict[str, List[ec2.ISubnet]] = {}
+            # private_subnets_by_az: Dict[str, List[ec2.ISubnet]] = {}
+            # Iterate through the subnets exposed by the Vpc L2 construct.
+            # for subnet in vpc.public_subnets:
+            #     az = subnet.availability_zone
+            #     if az not in public_subnets_by_az:
+            #         public_subnets_by_az[az] = []
+            #     public_subnets_by_az[az].append(subnet)
+            selected_public_subnets = vpc.select_subnets(subnet_type=ec2.SubnetType.PUBLIC, one_per_az=True)
+            private_subnets_egress = vpc.select_subnets(subnet_type=ec2.SubnetType.PRIVATE_WITH_EGRESS, one_per_az=True)
+            private_subnets_isolated = vpc.select_subnets(subnet_type=ec2.SubnetType.PRIVATE_ISOLATED, one_per_az=True)
+            combined_subnet_objects = []
+            if private_subnets_egress.subnets:
+                # Add the first PRIVATE_WITH_EGRESS subnet
+                combined_subnet_objects.append(private_subnets_egress.subnets[0])
+            else:
+                self.node.add_warning("No PRIVATE_WITH_EGRESS subnets found to select the first one.")
+            # Add all PRIVATE_ISOLATED subnets *except* the first one (if they exist)
+            if len(private_subnets_isolated.subnets) > 1:
+                combined_subnet_objects.extend(private_subnets_isolated.subnets[1:])
+            elif private_subnets_isolated.subnets: # Only 1 isolated subnet, add a warning if [1:] was desired
+                self.node.add_warning("Only one PRIVATE_ISOLATED subnet found, private_subnets_isolated.subnets[1:] will be empty.")
+            else:
+                self.node.add_warning("No PRIVATE_ISOLATED subnets found.")
+            # Create an ec2.SelectedSubnets object from the combined private subnet list.
+            selected_private_subnets = vpc.select_subnets(
+                subnets=combined_subnet_objects
+            )
+            print("selected_public_subnets:", selected_public_subnets)
+            print("selected_private_subnets:", selected_private_subnets)
+            #self.private_route_tables_cfn = []
+            # for subnet in vpc.private_subnets:
+            #     az = subnet.availability_zone
+            #     if az not in private_subnets_by_az:
+            #         private_subnets_by_az[az] = []
+            #     private_subnets_by_az[az].append(subnet)
+            #selected_public_subnets: List[ec2.ISubnet] = []
+            #selected_private_subnets: List[ec2.ISubnet] = []
+            # Select one public subnet per AZ, preferring the first one found
+            # for az in sorted(public_subnets_by_az.keys()):
+            #     if public_subnets_by_az[az]:
+            #         selected_public_subnets.append(public_subnets_by_az[az][0])
+            #         print(f"Selected existing public subnet: {public_subnets_by_az[az][0].subnet_id} from AZ {az}.")
+            # Select one private subnet per AZ, preferring the first one found
+            # for az in sorted(private_subnets_by_az.keys()):
+            #     if private_subnets_by_az[az]:
+            #         selected_private_subnets.append(private_subnets_by_az[az][0])
+            #         print(f"Selected existing private subnet: {private_subnets_by_az[az][0].subnet_id} from AZ {az}.")
+            if len(selected_public_subnets.subnet_ids) < 2 or len(selected_private_subnets.subnet_ids) < 2:
+                raise Exception("Need at least two public or private subnets in different availability zones")
+            if not selected_public_subnets and not selected_private_subnets:
+                # If no subnets could be found even with automatic selection, raise an error.
+                # This ensures the stack doesn't proceed if it absolutely needs subnets.
+                print("Error: No existing public or private subnets could be found in the VPC for automatic selection. "
+                      "You must either specify subnets in *_SUBNETS_TO_USE or ensure the VPC has discoverable subnets.")
+                raise RuntimeError("No suitable subnets found for automatic selection.")
+            else:
+                self.public_subnets = selected_public_subnets.subnets
+                self.private_subnets = selected_private_subnets.subnets
+                print(f"Automatically selected {len(self.public_subnets)} public and {len(self.private_subnets)} private subnets based on VPC discovery.")
+                print("self.public_subnets:", self.public_subnets)
+                print("self.private_subnets:", self.private_subnets)
+                # Since subnets are now assigned, we can exit this processing block.
+                # The rest of the original code (which iterates *_SUBNETS_TO_USE) will be skipped.
+        checked_public_subnets_ctx = get_context_dict("checked_public_subnets")
+        checked_private_subnets_ctx = get_context_dict("checked_private_subnets")
+        public_subnets_data_for_creation_ctx = get_context_list_of_dicts("public_subnets_to_create")
+        private_subnets_data_for_creation_ctx = get_context_list_of_dicts("private_subnets_to_create")
+        # --- 3. Process Public Subnets ---
+        print("\n--- Processing Public Subnets ---")
+        # Import existing public subnets
+        if checked_public_subnets_ctx:
+            for i, subnet_name in enumerate(PUBLIC_SUBNETS_TO_USE):
+                subnet_info = checked_public_subnets_ctx.get(subnet_name)
+                if subnet_info and subnet_info.get("exists"):
+                    subnet_id = subnet_info.get("id")
+                    if not subnet_id:
+                        raise RuntimeError(f"Context for existing public subnet '{subnet_name}' is missing 'id'.")
+                    try:
+                        imported_subnet = ec2.Subnet.from_subnet_id(
+                            self, f"ImportedPublicSubnet{subnet_name.replace('-', '')}{i}", subnet_id
+                        )
+                        #self.public_subnets.append(imported_subnet)
+                        print(f"Imported existing public subnet: {subnet_name} (ID: {subnet_id})")
+                    except Exception as e:
+                        raise RuntimeError(f"Failed to import public subnet '{subnet_name}' with ID '{subnet_id}'. Error: {e}")
+        # Create new public subnets based on public_subnets_data_for_creation_ctx
+        if public_subnets_data_for_creation_ctx:
+            names_to_create_public = [s['name'] for s in public_subnets_data_for_creation_ctx]
+            cidrs_to_create_public = [s['cidr'] for s in public_subnets_data_for_creation_ctx]
+            azs_to_create_public = [s['az'] for s in public_subnets_data_for_creation_ctx]
+            if names_to_create_public:
+                print(f"Attempting to create {len(names_to_create_public)} new public subnets: {names_to_create_public}")
+                newly_created_public_subnets, newly_created_public_rts_cfn = create_subnets(
+                self, vpc, CDK_PREFIX, names_to_create_public, cidrs_to_create_public, azs_to_create_public,
+                is_public=True,
+                internet_gateway_id=EXISTING_IGW_ID
+                )
+                self.public_subnets.extend(newly_created_public_subnets)
+                self.public_route_tables_cfn.extend(newly_created_public_rts_cfn)
+        if not self.public_subnets:
+            raise Exception("No public subnets found or created, exiting.")
+        # --- NAT Gateway Creation/Lookup ---
+        self.single_nat_gateway_id = None
+        nat_gw_id_from_context = SINGLE_NAT_GATEWAY_ID
+        if nat_gw_id_from_context:
+            print(f"Using existing NAT Gateway ID from context: {nat_gw_id_from_context}")
+            self.single_nat_gateway_id = nat_gw_id_from_context
+        else:
+            # If not in context, create a new one, but only if we have a public subnet.
+            if self.public_subnets:
+                print("NAT Gateway ID not found in context. Creating a new one.")
+                # Place the NAT GW in the first available public subnet
+                first_public_subnet = self.public_subnets[0]
+                self.single_nat_gateway_id = create_nat_gateway(
+                    self,
+                    first_public_subnet,
+                    nat_gateway_name=NAT_GATEWAY_NAME,
+                    nat_gateway_id_context_key=SINGLE_NAT_GATEWAY_ID
+                )
+            else:
+                print("WARNING: No public subnets available. Cannot create a NAT Gateway.")
+        # --- 4. Process Private Subnets ---
+        print("\n--- Processing Private Subnets ---")
+        # ... (rest of your existing subnet processing logic for checked_private_subnets_ctx) ...
+        # (This part for importing existing subnets remains the same)
+        # Create new private subnets
+        if private_subnets_data_for_creation_ctx:
+            names_to_create_private = [s['name'] for s in private_subnets_data_for_creation_ctx]
+            cidrs_to_create_private = [s['cidr'] for s in private_subnets_data_for_creation_ctx]
+            azs_to_create_private = [s['az'] for s in private_subnets_data_for_creation_ctx]
+            if names_to_create_private:
+                print(f"Attempting to create {len(names_to_create_private)} new private subnets: {names_to_create_private}")
+                # --- CALL THE NEW CREATE_SUBNETS FUNCTION FOR PRIVATE ---
+                # Ensure self.single_nat_gateway_id is available before this call
+                if not self.single_nat_gateway_id:
+                    raise ValueError("A single NAT Gateway ID is required for private subnets but was not resolved.")
+                newly_created_private_subnets_cfn, newly_created_private_rts_cfn = create_subnets(
+                    self, vpc, CDK_PREFIX, names_to_create_private, cidrs_to_create_private, azs_to_create_private,
+                    is_public=False,
+                    single_nat_gateway_id=self.single_nat_gateway_id # Pass the single NAT Gateway ID
+                )
+                self.private_subnets.extend(newly_created_private_subnets_cfn)
+                self.private_route_tables_cfn.extend(newly_created_private_rts_cfn)
+                print(f"Successfully defined {len(newly_created_private_subnets_cfn)} new private subnets and their route tables for creation.")
+        else:
+            print("No private subnets specified for creation in context ('private_subnets_to_create').")
+        if not self.private_subnets:
+            raise Exception("No private subnets found or created, exiting.")
+        # --- 5. Sanity Check and Output ---
+        # Output the single NAT Gateway ID for verification
+        if self.single_nat_gateway_id:
+            CfnOutput(self, "SingleNatGatewayId", value=self.single_nat_gateway_id,
+                      description="ID of the single NAT Gateway used for private subnets.")
+        else:
+            raise Exception("No single NAT Gateway was created or resolved.")
+        # --- Outputs for other stacks/regions ---
+        # These are crucial for cross-stack, cross-region referencing
+        self.params = dict()
+        self.params["vpc_id"] = vpc.vpc_id
+        self.params["private_subnets"] = self.private_subnets
+        self.params["private_route_tables"] = self.private_route_tables_cfn
+        self.params["public_subnets"] = self.public_subnets
+        self.params["public_route_tables"] = self.public_route_tables_cfn
+#class CdkStackMain(Stack):
+ #   def __init__(self, scope: Construct, construct_id: str, private_subnets:List[ec2.ISubnet]=[], private_route_tables: List[ec2.CfnRouteTable]=[], public_subnets:List[ec2.ISubnet]=[], public_route_tables: List[ec2.CfnRouteTable]=[], **kwargs) -> None:
+  #      super().__init__(scope, construct_id, **kwargs)
+        # --- Helper to get context values ---
+        # def get_context_bool(key: str, default: bool = False) -> bool:
+        #     return self.node.try_get_context(key) or default
+        # def get_context_str(key: str, default: str = None) -> str:
+        #      return self.node.try_get_context(key) or default
+        # def get_context_dict(key: str, default: dict = None) -> dict:
+        #     return self.node.try_get_context(key) or default
+        # def get_context_list_of_dicts(key: str) -> List[Dict[str, Any]]:
+        #     ctx_value = self.node.try_get_context(key)
+        #     if not isinstance(ctx_value, list):
+        #         print(f"Warning: Context key '{key}' not found or not a list. Returning empty list.")
+        #         return []
+        #     # Optional: Add validation that all items in the list are dicts
+        #     return ctx_value
+        # self.private_subnets: List[ec2.ISubnet] = private_subnets
+        # self.private_route_tables_cfn: List[ec2.CfnRouteTable] = private_route_tables
+        # self.public_subnets: List[ec2.ISubnet] = public_subnets
+        # self.public_route_tables_cfn: List[ec2.CfnRouteTable] = public_route_tables
+        private_subnet_selection = ec2.SubnetSelection(subnets=self.private_subnets)
+        public_subnet_selection = ec2.SubnetSelection(subnets=self.public_subnets)
+        for sub in private_subnet_selection.subnets:
+            print("private subnet:", sub.subnet_id, "is in availability zone:", sub.availability_zone)
+        for sub in public_subnet_selection.subnets:
+            print("public subnet:", sub.subnet_id, "is in availability zone:", sub.availability_zone)
+        # try:
+        #     vpc = ec2.Vpc.from_lookup(
+        #         self,
+        #         "VPC",
+        #         vpc_name=VPC_NAME
+        #     )
+        #     print("Successfully looked up VPC")
+        # except Exception as e:
+        #     raise Exception(f"Could not look up VPC with name '{VPC_NAME}' due to: {e}")
+        print("Private subnet route tables:", self.private_route_tables_cfn)
+        # Add the S3 Gateway Endpoint to the VPC
+        if names_to_create_private:
+            try:
+                s3_gateway_endpoint = vpc.add_gateway_endpoint(
+                    "S3GatewayEndpoint",
+                    service=ec2.GatewayVpcEndpointAwsService.S3, subnets=[private_subnet_selection])
+            except Exception as e:
+                print("Could not add S3 gateway endpoint to subnets due to:", e)
+            #Output some useful information
+            CfnOutput(self, "VpcIdOutput", value=vpc.vpc_id,
+                description="The ID of the VPC where the S3 Gateway Endpoint is deployed.")
+            CfnOutput(self, "S3GatewayEndpointService", value=s3_gateway_endpoint.vpc_endpoint_id,
+                description="The id for the S3 Gateway Endpoint.") # Specify the S3 service
+        # --- IAM Roles ---
+        try:
+            codebuild_role_name = CODEBUILD_ROLE_NAME
+            custom_sts_kms_policy = """{
+"Version": "2012-10-17",
+"Statement": [
+    {
+        "Sid": "STSCallerIdentity",
+        "Effect": "Allow",
+        "Action": [
+            "sts:GetCallerIdentity"
+        ],
+        "Resource": "*"
+    },
+    {
+      "Sid": "KMSAccess",
+      "Effect": "Allow",
+      "Action": [
+        "kms:Encrypt",
+        "kms:Decrypt",
+        "kms:GenerateDataKey"
+      ],
+      "Resource": "*"
+    }
+]
+}"""
+            if get_context_bool(f"exists:{codebuild_role_name}"):
+                # If exists, lookup/import the role using ARN from context
+                role_arn = get_context_str(f"arn:{codebuild_role_name}")
+                if not role_arn:
+                     raise ValueError(f"Context value 'arn:{codebuild_role_name}' is required if role exists.")
+                codebuild_role = iam.Role.from_role_arn(self, "CodeBuildRole", role_arn=role_arn)
+                print("Using existing CodeBuild role")
+            else:
+                # If not exists, create the role
+                codebuild_role = iam.Role(
+                    self, "CodeBuildRole", # Logical ID
+                    role_name=codebuild_role_name, # Explicit resource name
+                    assumed_by=iam.ServicePrincipal("codebuild.amazonaws.com")
+                )
+                codebuild_role.add_managed_policy(iam.ManagedPolicy.from_aws_managed_policy_name(f"EC2InstanceProfileForImageBuilderECRContainerBuilds"))
+                print("Successfully created new CodeBuild role")
+            task_role_name = ECS_TASK_ROLE_NAME
+            if get_context_bool(f"exists:{task_role_name}"):
+                role_arn = get_context_str(f"arn:{task_role_name}")
+                if not role_arn:
+                     raise ValueError(f"Context value 'arn:{task_role_name}' is required if role exists.")
+                task_role = iam.Role.from_role_arn(self, "TaskRole", role_arn=role_arn)
+                print("Using existing ECS task role")
+            else:
+                task_role = iam.Role(
+                    self, "TaskRole", # Logical ID
+                    role_name=task_role_name, # Explicit resource name
+                    assumed_by=iam.ServicePrincipal("ecs-tasks.amazonaws.com")
+                )
+                for role in AWS_MANAGED_TASK_ROLES_LIST:
+                    print(f"Adding {role} to policy")
+                    task_role.add_managed_policy(iam.ManagedPolicy.from_aws_managed_policy_name(f"{role}"))
+                task_role = add_custom_policies(self, task_role, custom_policy_text=custom_sts_kms_policy)
+                print("Successfully created new ECS task role")
+            execution_role_name = ECS_TASK_EXECUTION_ROLE_NAME
+            if get_context_bool(f"exists:{execution_role_name}"):
+                 role_arn = get_context_str(f"arn:{execution_role_name}")
+                 if not role_arn:
+                      raise ValueError(f"Context value 'arn:{execution_role_name}' is required if role exists.")
+                 execution_role = iam.Role.from_role_arn(self, "ExecutionRole", role_arn=role_arn)
+                 print("Using existing ECS execution role")
+            else:
+                 execution_role = iam.Role(
+                     self, "ExecutionRole", # Logical ID
+                     role_name=execution_role_name, # Explicit resource name
+                     assumed_by=iam.ServicePrincipal("ecs-tasks.amazonaws.com")
+                 )
+                 for role in AWS_MANAGED_TASK_ROLES_LIST:
+                     execution_role.add_managed_policy(iam.ManagedPolicy.from_aws_managed_policy_name(f"{role}"))
+                 execution_role = add_custom_policies(self, execution_role, custom_policy_text=custom_sts_kms_policy)
+                 print("Successfully created new ECS execution role")
+        except Exception as e:
+            raise Exception("Failed at IAM role step due to:", e)
+        # --- S3 Buckets ---
+        try:
+            log_bucket_name = S3_LOG_CONFIG_BUCKET_NAME
+            if get_context_bool(f"exists:{log_bucket_name}"):
+                bucket = s3.Bucket.from_bucket_name(self, "LogConfigBucket", bucket_name=log_bucket_name)
+                print("Using existing S3 bucket", log_bucket_name)
+            else:
+                bucket = s3.Bucket(self, "LogConfigBucket", bucket_name=log_bucket_name,
+                versioned=False, # Set to True if you need versioning
+                # IMPORTANT: Set removal_policy to DESTROY
+                removal_policy=RemovalPolicy.DESTROY,
+                # IMPORTANT: Set auto_delete_objects to True to empty the bucket before deletion
+                auto_delete_objects=True
+                ) # Explicitly set bucket_name
+                print("Created S3 bucket", log_bucket_name)
+            # Add policies - this will apply to both created and imported buckets
+            # CDK handles idempotent policy additions
+            bucket.add_to_resource_policy(
+                iam.PolicyStatement(
+                    effect=iam.Effect.ALLOW,
+                    principals=[task_role], # Pass the role object directly
+                    actions=["s3:GetObject", "s3:PutObject"],
+                    resources=[f"{bucket.bucket_arn}/*"]
+                )
+            )
+            bucket.add_to_resource_policy(
+                iam.PolicyStatement(
+                    effect=iam.Effect.ALLOW,
+                    principals=[task_role],
+                    actions=["s3:ListBucket"],
+                    resources=[bucket.bucket_arn]
+                )
+            )
+            output_bucket_name = S3_OUTPUT_BUCKET_NAME
+            if get_context_bool(f"exists:{output_bucket_name}"):
+                output_bucket = s3.Bucket.from_bucket_name(self, "OutputBucket", bucket_name=output_bucket_name)
+                print("Using existing Output bucket", output_bucket_name)
+            else:
+                output_bucket = s3.Bucket(self, "OutputBucket", bucket_name=output_bucket_name,
+                     lifecycle_rules=[
+                         s3.LifecycleRule(
+                             expiration=Duration.days(int(DAYS_TO_DISPLAY_WHOLE_DOCUMENT_JOBS))
+                         )
+                     ],
+                    versioned=False, # Set to True if you need versioning
+                    # IMPORTANT: Set removal_policy to DESTROY
+                    removal_policy=RemovalPolicy.DESTROY,
+                    # IMPORTANT: Set auto_delete_objects to True to empty the bucket before deletion
+                    auto_delete_objects=True
+                )
+                print("Created Output bucket:", output_bucket_name)
+            # Add policies to output bucket
+            output_bucket.add_to_resource_policy(
+                iam.PolicyStatement(
+                    effect=iam.Effect.ALLOW,
+                    principals=[task_role],
+                    actions=["s3:GetObject", "s3:PutObject"],
+                    resources=[f"{output_bucket.bucket_arn}/*"]
+                )
+            )
+            output_bucket.add_to_resource_policy(
+                iam.PolicyStatement(
+                    effect=iam.Effect.ALLOW,
+                    principals=[task_role],
+                    actions=["s3:ListBucket"],
+                    resources=[output_bucket.bucket_arn]
+                )
+            )
+        except Exception as e:
+            raise Exception("Could not handle S3 buckets due to:", e)
+        # --- Elastic Container Registry ---
+        try:
+            full_ecr_repo_name = ECR_CDK_REPO_NAME
+            if get_context_bool(f"exists:{full_ecr_repo_name}"):
+                ecr_repo = ecr.Repository.from_repository_name(self, "ECRRepo", repository_name=full_ecr_repo_name)
+                print("Using existing ECR repository")
+            else:
+                ecr_repo = ecr.Repository(self, "ECRRepo", repository_name=full_ecr_repo_name) # Explicitly set repository_name
+                print("Created ECR repository", full_ecr_repo_name)
+            ecr_image_loc = ecr_repo.repository_uri
+        except Exception as e:
+            raise Exception("Could not handle ECR repo due to:", e)
+        # --- CODEBUILD ---
+        try:
+            codebuild_project_name = CODEBUILD_PROJECT_NAME
+            if get_context_bool(f"exists:{codebuild_project_name}"):
+                # Lookup CodeBuild project by ARN from context
+                 project_arn = get_context_str(f"arn:{codebuild_project_name}")
+                 if not project_arn:
+                     raise ValueError(f"Context value 'arn:{codebuild_project_name}' is required if project exists.")
+                 codebuild_project = codebuild.Project.from_project_arn(self, "CodeBuildProject", project_arn=project_arn)
+                 print("Using existing CodeBuild project")
+            else:
+                codebuild_project = codebuild.Project(self,
+                                            "CodeBuildProject", # Logical ID
+                                            project_name=codebuild_project_name, # Explicit resource name
+                                            source=codebuild.Source.git_hub(
+                                            owner=GITHUB_REPO_USERNAME,
+                                            repo=GITHUB_REPO_NAME,
+                                            branch_or_ref=GITHUB_REPO_BRANCH
+                                            ),
+                    environment=codebuild.BuildEnvironment(
+                        build_image=codebuild.LinuxBuildImage.STANDARD_7_0,
+                        privileged=True,
+                        environment_variables={"ECR_REPO_NAME": codebuild.BuildEnvironmentVariable(value=full_ecr_repo_name),
+                                               "AWS_DEFAULT_REGION": codebuild.BuildEnvironmentVariable(value=AWS_REGION),
+                                               "AWS_ACCOUNT_ID": codebuild.BuildEnvironmentVariable(value=AWS_ACCOUNT_ID)}
+                    ),
+                    build_spec=codebuild.BuildSpec.from_object({
+                        "version": "0.2",
+                        "phases": {
+                            "pre_build": {
+                                "commands": [
+                                    "echo Logging in to Amazon ECR",
+                                    "aws ecr get-login-password --region $AWS_DEFAULT_REGION | docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
+                                ]
+                            },
+                            "build": {
+                                "commands": [
+                                    "echo Building the Docker image",
+                                    "docker build -t $ECR_REPO_NAME:latest .",
+                                    "docker tag $ECR_REPO_NAME:latest $AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/$ECR_REPO_NAME:latest"
+                                ]
+                            },
+                            "post_build": {
+                                "commands": [
+                                    "echo Pushing the Docker image",
+                                    "docker push $AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/$ECR_REPO_NAME:latest"
+                                ]
+                            }
+                        }
+                    })
+                )
+                print("Successfully created CodeBuild project", codebuild_project_name)
+            # Grant permissions - applies to both created and imported project role
+            ecr_repo.grant_pull_push(codebuild_project.role)
+        except Exception as e:
+            raise Exception("Could not handle Codebuild project due to:", e)
+        # --- Security Groups ---
+        try:
+            ecs_security_group_name = ECS_SECURITY_GROUP_NAME
+            # Following checks by name don't really work
+            # Use CDK's from_lookup_by_name which handles lookup or throws an error if not found
+            #try:
+            #     ecs_security_group = ec2.SecurityGroup.from_lookup_by_name(
+            #         self, "ECSSecurityGroup", vpc=vpc, security_group_name=ecs_security_group_name
+            #     )
+            #     print(f"Using existing Security Group: {ecs_security_group_name}")
+            # except Exception: # If lookup fails, create
+            try:
+                 ecs_security_group = ec2.SecurityGroup(
+                     self,
+                     "ECSSecurityGroup", # Logical ID
+                     security_group_name=ecs_security_group_name, # Explicit resource name
+                     vpc=vpc,
+                 )
+                 print(f"Created Security Group: {ecs_security_group_name}")
+            except Exception as e: # If lookup fails, create
+                print("Failed to create ECS security group due to:", e)
+            alb_security_group_name = ALB_NAME_SECURITY_GROUP_NAME
+            # try:
+            #     alb_security_group = ec2.SecurityGroup.from_lookup_by_name(
+            #         self, "ALBSecurityGroup", vpc=vpc, security_group_name=alb_security_group_name
+            #     )
+            #     print(f"Using existing Security Group: {alb_security_group_name}")
+            # except Exception: # If lookup fails, create
+            try:
+                alb_security_group = ec2.SecurityGroup(
+                    self,
+                    "ALBSecurityGroup", # Logical ID
+                    security_group_name=alb_security_group_name, # Explicit resource name
+                    vpc=vpc
+                )
+                print(f"Created Security Group: {alb_security_group_name}")
+            except Exception as e: # If lookup fails, create
+                print("Failed to create ALB security group due to:", e)
+            # Define Ingress Rules - CDK will manage adding/removing these as needed
+            ec2_port_gradio_server_port = ec2.Port.tcp(int(GRADIO_SERVER_PORT)) # Ensure port is int
+            ecs_security_group.add_ingress_rule(
+                peer=alb_security_group,
+                connection=ec2_port_gradio_server_port,
+                description="ALB traffic",
+            )
+            alb_security_group.add_ingress_rule(
+                peer=ec2.Peer.prefix_list("pl-93a247fa"),
+                connection=ec2.Port.all_traffic(),
+                description="CloudFront traffic",
+            )
+        except Exception as e:
+            raise Exception("Could not handle security groups due to:", e)
+        # --- DynamoDB tables for logs (optional) ---
+        if SAVE_LOGS_TO_DYNAMODB == 'True':
+            try:
+                print("Creating DynamoDB tables for logs")
+                dynamodb_table_access = dynamodb.Table(self, "RedactionAccessDataTable",
+                table_name=ACCESS_LOG_DYNAMODB_TABLE_NAME,
+                partition_key=dynamodb.Attribute(
+                    name="id",
+                    type=dynamodb.AttributeType.STRING),
+                billing_mode=dynamodb.BillingMode.PAY_PER_REQUEST,
+                removal_policy=RemovalPolicy.DESTROY)
+                dynamodb_table_feedback = dynamodb.Table(self, "RedactionFeedbackDataTable",
+                table_name=FEEDBACK_LOG_DYNAMODB_TABLE_NAME,
+                partition_key=dynamodb.Attribute(
+                    name="id",
+                    type=dynamodb.AttributeType.STRING),
+                billing_mode=dynamodb.BillingMode.PAY_PER_REQUEST,
+                removal_policy=RemovalPolicy.DESTROY)
+                dynamodb_table_usage = dynamodb.Table(self, "RedactionUsageDataTable",
+                table_name=USAGE_LOG_DYNAMODB_TABLE_NAME,
+                partition_key=dynamodb.Attribute(
+                    name="id",
+                    type=dynamodb.AttributeType.STRING),
+                billing_mode=dynamodb.BillingMode.PAY_PER_REQUEST,
+                removal_policy=RemovalPolicy.DESTROY)
+            except Exception as e:
+                raise Exception("Could not create DynamoDB tables due to:", e)
+        # --- ALB ---
+        try:
+            load_balancer_name = ALB_NAME
+            if len(load_balancer_name) > 32: load_balancer_name = load_balancer_name[-32:]
+            if get_context_bool(f"exists:{load_balancer_name}"):
+                 # Lookup ALB by ARN from context
+                 alb_arn = get_context_str(f"arn:{load_balancer_name}")
+                 if not alb_arn:
+                     raise ValueError(f"Context value 'arn:{load_balancer_name}' is required if ALB exists.")
+                 alb = elbv2.ApplicationLoadBalancer.from_lookup(
+                     self, "ALB", # Logical ID
+                     load_balancer_arn=alb_arn
+                 )
+                 print(f"Using existing Application Load Balancer {load_balancer_name}.")
+            else:
+                alb = elbv2.ApplicationLoadBalancer(
+                    self,
+                    "ALB", # Logical ID
+                    load_balancer_name=load_balancer_name, # Explicit resource name
+                    vpc=vpc,
+                    internet_facing=True,
+                    security_group=alb_security_group, # Link to SG
+                    vpc_subnets=public_subnet_selection # Link to subnets
+                )
+                print("Successfully created new Application Load Balancer")
+        except Exception as e:
+            raise Exception("Could not handle application load balancer due to:", e)
+        # --- Cognito User Pool ---
+        try:
+            if get_context_bool(f"exists:{COGNITO_USER_POOL_NAME}"):
+                # Lookup by ID from context
+                user_pool_id = get_context_str(f"id:{COGNITO_USER_POOL_NAME}")
+                if not user_pool_id:
+                     raise ValueError(f"Context value 'id:{COGNITO_USER_POOL_NAME}' is required if User Pool exists.")
+                user_pool = cognito.UserPool.from_user_pool_id(self, "UserPool", user_pool_id=user_pool_id)
+                print(f"Using existing user pool {user_pool_id}.")
+            else:
+                user_pool = cognito.UserPool(self, "UserPool",
+                                            user_pool_name=COGNITO_USER_POOL_NAME,
+                                            mfa=cognito.Mfa.OFF, # Adjust as needed
+                                            sign_in_aliases=cognito.SignInAliases(email=True),
+                                            removal_policy=RemovalPolicy.DESTROY) # Adjust as needed
+                print(f"Created new user pool {user_pool.user_pool_id}.")
+            # If you're using a certificate, assume that you will be using the ALB Cognito login features. You need different redirect URLs to accept the token that comes from Cognito authentication.
+            if ACM_CERTIFICATE_ARN:
+                redirect_uris = [COGNITO_REDIRECTION_URL, COGNITO_REDIRECTION_URL + "/oauth2/idpresponse"]
+            else:
+                redirect_uris = [COGNITO_REDIRECTION_URL]
+            user_pool_client_name = COGNITO_USER_POOL_CLIENT_NAME
+            if get_context_bool(f"exists:{user_pool_client_name}"):
+                 # Lookup by ID from context (requires User Pool object)
+                 user_pool_client_id = get_context_str(f"id:{user_pool_client_name}")
+                 if not user_pool_client_id:
+                     raise ValueError(f"Context value 'id:{user_pool_client_name}' is required if User Pool Client exists.")
+                 user_pool_client = cognito.UserPoolClient.from_user_pool_client_id(self, "UserPoolClient", user_pool_client_id=user_pool_client_id)
+                 print(f"Using existing user pool client {user_pool_client_id}.")
+            else:
+                 user_pool_client = cognito.UserPoolClient(self, "UserPoolClient",
+                                                        auth_flows=cognito.AuthFlow(user_srp=True, user_password=True), # Example: enable SRP for secure sign-in
+                                                        user_pool=user_pool,
+                                                        generate_secret=True,
+                                                        user_pool_client_name=user_pool_client_name,
+                                                        supported_identity_providers=[cognito.UserPoolClientIdentityProvider.COGNITO],
+                                                        o_auth=cognito.OAuthSettings(
+                                                        flows=cognito.OAuthFlows(authorization_code_grant=True),
+                                                        scopes=[cognito.OAuthScope.OPENID, cognito.OAuthScope.EMAIL, cognito.OAuthScope.PROFILE],
+                                                        callback_urls=redirect_uris
+                                                        )
+                                    )
+            CfnOutput(self, "CognitoAppClientId", value=user_pool_client.user_pool_client_id)
+            print(f"Created new user pool client {user_pool_client.user_pool_client_id}.")
+            # Add a domain to the User Pool (crucial for ALB integration)
+            user_pool_domain = user_pool.add_domain(
+                "UserPoolDomain",
+                cognito_domain=cognito.CognitoDomainOptions(
+                    domain_prefix=COGNITO_USER_POOL_DOMAIN_PREFIX)
+            )
+            # Apply removal_policy to the created UserPoolDomain construct
+            user_pool_domain.apply_removal_policy(policy=RemovalPolicy.DESTROY)
+            CfnOutput(self, "CognitoUserPoolLoginUrl", value=user_pool_domain.base_url())
+        except Exception as e:
+            raise Exception("Could not handle Cognito resources due to:", e)
+        # --- Secrets Manager Secret ---
+        try:
+             secret_name = COGNITO_USER_POOL_CLIENT_SECRET_NAME
+             if get_context_bool(f"exists:{secret_name}"):
+                 # Lookup by name
+                 secret = secretsmanager.Secret.from_secret_name_v2(self, "CognitoSecret", secret_name=secret_name)
+                 print(f"Using existing Secret {secret_name}.")
+             else:
+                 secret = secretsmanager.Secret(self, "CognitoSecret", # Logical ID
+                     secret_name=secret_name, # Explicit resource name
+                     secret_object_value={
+                         "REDACTION_USER_POOL_ID": SecretValue.unsafe_plain_text(user_pool.user_pool_id), # Use the CDK attribute
+                         "REDACTION_CLIENT_ID": SecretValue.unsafe_plain_text(user_pool_client.user_pool_client_id), # Use the CDK attribute
+                         "REDACTION_CLIENT_SECRET": user_pool_client.user_pool_client_secret # Use the CDK attribute
+                     }
+                 )
+                 print(f"Created new secret {secret_name}.")
+        except Exception as e:
+             raise Exception("Could not handle Secrets Manager secret due to:", e)
+        # --- Fargate Task Definition ---
+        try:
+            # For task definitions, re-creating with the same logical ID creates new revisions.
+            # If you want to use a *specific existing revision*, you'd need to look it up by ARN.
+            # If you want to update the latest revision, defining it here is the standard.
+            # Let's assume we always define it here to get revision management.
+            fargate_task_definition_name = FARGATE_TASK_DEFINITION_NAME
+            read_only_file_system = ECS_READ_ONLY_FILE_SYSTEM == 'True'
+            if os.path.exists(TASK_DEFINITION_FILE_LOCATION):
+                with open(TASK_DEFINITION_FILE_LOCATION) as f: # Use correct path
+                    task_def_params = json.load(f)
+                # Need to ensure taskRoleArn and executionRoleArn in JSON are correct ARN strings
+            else:
+                epheremal_storage_volume_name = "appEphemeralVolume"
+                task_def_params = {}
+                task_def_params['taskRoleArn'] = task_role.role_arn # Use CDK role object ARN
+                task_def_params['executionRoleArn'] = execution_role.role_arn # Use CDK role object ARN
+                task_def_params['memory'] = ECS_TASK_MEMORY_SIZE
+                task_def_params['cpu'] = ECS_TASK_CPU_SIZE
+                container_def = {
+                    "name": full_ecr_repo_name,
+                    "image": ecr_image_loc + ":latest",
+                    "essential": True,
+                    "portMappings": [{"containerPort": int(GRADIO_SERVER_PORT), "hostPort": int(GRADIO_SERVER_PORT), "protocol": "tcp", "appProtocol": "http"}],
+                    "logConfiguration": {"logDriver": "awslogs", "options": {"awslogs-group": ECS_LOG_GROUP_NAME, "awslogs-region": AWS_REGION, "awslogs-stream-prefix": "ecs"}},
+                    "environmentFiles": [{"value": bucket.bucket_arn + "/config.env", "type": "s3"}],
+                    "memoryReservation": int(task_def_params['memory']) - 512, # Reserve some memory for the container
+                    "mountPoints": [
+                    {
+                        "sourceVolume": epheremal_storage_volume_name,
+                        "containerPath": "/home/user/app/logs",
+                        "readOnly": False
+                    },
+                    {
+                        "sourceVolume": epheremal_storage_volume_name,
+                        "containerPath": "/home/user/app/feedback",
+                        "readOnly": False
+                    },
+                    {
+                        "sourceVolume": epheremal_storage_volume_name,
+                        "containerPath": "/home/user/app/usage",
+                        "readOnly": False
+                    },
+                    {
+                        "sourceVolume": epheremal_storage_volume_name,
+                        "containerPath": "/home/user/app/input",
+                        "readOnly": False
+                    },
+                    {
+                        "sourceVolume": epheremal_storage_volume_name,
+                        "containerPath": "/home/user/app/output",
+                        "readOnly": False
+                    },
+                    {
+                        "sourceVolume": epheremal_storage_volume_name,
+                        "containerPath": "/home/user/app/tmp",
+                        "readOnly": False
+                    },
+                    {
+                        "sourceVolume": epheremal_storage_volume_name,
+                        "containerPath": "/home/user/app/config",
+                        "readOnly": False
+                    },
+                    {
+                        "sourceVolume": epheremal_storage_volume_name,
+                        "containerPath": "/tmp/matplotlib_cache",
+                        "readOnly": False
+                    },
+                    {
+                        "sourceVolume": epheremal_storage_volume_name,
+                        "containerPath": "/tmp",
+                        "readOnly": False
+                    },
+                    {
+                        "sourceVolume": epheremal_storage_volume_name,
+                        "containerPath": "/var/tmp",
+                        "readOnly": False
+                    },
+                    {
+                        "sourceVolume": epheremal_storage_volume_name,
+                        "containerPath": "/tmp/tld",
+                        "readOnly": False
+                    },
+                    {
+                        "sourceVolume": epheremal_storage_volume_name,
+                        "containerPath": "/tmp/gradio_tmp",
+                        "readOnly": False
+                    }
+                ],
+                "readonlyRootFilesystem": read_only_file_system,
+                }
+                task_def_params['containerDefinitions'] = [container_def]
+            log_group_name_from_config=task_def_params['containerDefinitions'][0]['logConfiguration']['options']['awslogs-group']
+            cdk_managed_log_group = logs.LogGroup(self, "MyTaskLogGroup", # CDK Logical ID
+            log_group_name=log_group_name_from_config,
+            retention=logs.RetentionDays.ONE_MONTH, # Example: set retention
+            removal_policy=RemovalPolicy.DESTROY # If you want it deleted when stack is deleted
+            )
+            epheremal_storage_volume_cdk_obj = ecs.Volume(
+            name=epheremal_storage_volume_name
+            )
+            fargate_task_definition = ecs.FargateTaskDefinition(
+            self,
+            "FargateTaskDefinition", # Logical ID
+            family=fargate_task_definition_name,
+            cpu=int(task_def_params['cpu']),
+            memory_limit_mib=int(task_def_params['memory']),
+            task_role=task_role,
+            execution_role=execution_role,
+            runtime_platform=ecs.RuntimePlatform(
+                cpu_architecture=ecs.CpuArchitecture.X86_64,
+                operating_system_family=ecs.OperatingSystemFamily.LINUX
+            ),
+            # 1. Specify the total ephemeral storage for the task
+            ephemeral_storage_gib=21, # Minimum is 21 GiB
+            # 2. Define the volume at the task level
+            # This volume will use the ephemeral storage configured above.
+            volumes=[epheremal_storage_volume_cdk_obj]
+            )
+            print("Fargate task definition defined.")
+            # Add container definitions to the task definition object
+            if task_def_params['containerDefinitions']:
+                container_def_params = task_def_params['containerDefinitions'][0]
+                if container_def_params.get('environmentFiles'):
+                    env_files = []
+                    for env_file_param in container_def_params['environmentFiles']:
+                        # Need to parse the ARN to get the bucket object and key
+                        env_file_arn_parts = env_file_param['value'].split(":::")
+                        bucket_name_and_key = env_file_arn_parts[-1]
+                        env_bucket_name, env_key = bucket_name_and_key.split("/", 1)
+                        env_file = ecs.EnvironmentFile.from_bucket(bucket, env_key)
+                        env_files.append(env_file)
+                container = fargate_task_definition.add_container(
+                    container_def_params['name'],
+                    image=ecs.ContainerImage.from_registry(container_def_params['image']),
+                    logging=ecs.LogDriver.aws_logs(
+                        stream_prefix=container_def_params['logConfiguration']['options']['awslogs-stream-prefix'],
+                        log_group=cdk_managed_log_group
+                        ),
+                        secrets={
+                            "AWS_USER_POOL_ID": ecs.Secret.from_secrets_manager(secret, "REDACTION_USER_POOL_ID"),
+                            "AWS_CLIENT_ID": ecs.Secret.from_secrets_manager(secret, "REDACTION_CLIENT_ID"),
+                            "AWS_CLIENT_SECRET": ecs.Secret.from_secrets_manager(secret, "REDACTION_CLIENT_SECRET")
+                        },
+                    environment_files=env_files,
+                    readonly_root_filesystem=read_only_file_system
+                )
+                for port_mapping in container_def_params['portMappings']:
+                    container.add_port_mappings(
+                        ecs.PortMapping(
+                            container_port=int(port_mapping['containerPort']),
+                            host_port=int(port_mapping['hostPort']),
+                            name="port-" + str(port_mapping['containerPort']),
+                            app_protocol=ecs.AppProtocol.http,
+                            protocol=ecs.Protocol.TCP
+                        )
+                    )
+                container.add_port_mappings(ecs.PortMapping(
+                            container_port=80,
+                            host_port=80,
+                            name="port-80",
+                            app_protocol=ecs.AppProtocol.http,
+                            protocol=ecs.Protocol.TCP
+                        ))
+                if container_def_params.get('mountPoints'):
+                    mount_points=[]
+                    for mount_point in container_def_params['mountPoints']:
+                        mount_points.append(ecs.MountPoint(container_path=mount_point['containerPath'], read_only=mount_point['readOnly'], source_volume=epheremal_storage_volume_name))
+                    container.add_mount_points(*mount_points)
+        except Exception as e:
+            raise Exception("Could not handle Fargate task definition due to:", e)
+        # --- ECS Cluster ---
+        try:
+            cluster = ecs.Cluster(
+                self,
+                "ECSCluster", # Logical ID
+                cluster_name=CLUSTER_NAME, # Explicit resource name
+                enable_fargate_capacity_providers=True,
+                vpc=vpc
+            )
+            print("Successfully created new ECS cluster")
+        except Exception as e:
+            raise Exception("Could not handle ECS cluster due to:", e)
+        # --- ECS Service ---
+        try:
+            ecs_service_name = ECS_SERVICE_NAME
+            if ECS_USE_FARGATE_SPOT == 'True': use_fargate_spot = "FARGATE_SPOT"
+            if ECS_USE_FARGATE_SPOT == 'False': use_fargate_spot = "FARGATE"
+            # Check if service exists - from_service_arn or from_service_name (needs cluster)
+            try:
+                 # from_service_name is useful if you have the cluster object
+                 ecs_service = ecs.FargateService.from_service_attributes(
+                     self, "ECSService", # Logical ID
+                     cluster=cluster, # Requires the cluster object
+                     service_name=ecs_service_name
+                 )
+                 print(f"Using existing ECS service {ecs_service_name}.")
+            except Exception:
+                 # Service will be created with a count of 0, because you haven't yet actually built the initial Docker container with CodeBuild
+                 ecs_service = ecs.FargateService(
+                     self,
+                     "ECSService", # Logical ID
+                     service_name=ecs_service_name, # Explicit resource name
+                     platform_version=ecs.FargatePlatformVersion.LATEST,
+                     capacity_provider_strategies=[ecs.CapacityProviderStrategy(capacity_provider=use_fargate_spot, base=0, weight=1)],
+                     cluster=cluster,
+                     task_definition=fargate_task_definition, # Link to TD
+                     security_groups=[ecs_security_group], # Link to SG
+                     vpc_subnets=ec2.SubnetSelection(subnets=self.private_subnets), # Link to subnets
+                     min_healthy_percent=0,
+                     max_healthy_percent=100,
+                     desired_count=0
+                 )
+                 print("Successfully created new ECS service")
+            # Note: Auto-scaling setup would typically go here if needed for the service
+        except Exception as e:
+            raise Exception("Could not handle ECS service due to:", e)
+        # --- Grant Secret Read Access (Applies to both created and imported roles) ---
+        try:
+             secret.grant_read(task_role)
+             secret.grant_read(execution_role)
+        except Exception as e:
+             raise Exception("Could not grant access to Secrets Manager due to:", e)
+        # --- ALB TARGET GROUPS AND LISTENERS ---
+        # This section should primarily define the resources if they are managed by this stack.
+        # CDK handles adding/removing targets and actions on updates.
+        # If they might pre-exist outside the stack, you need lookups.
+        cookie_duration = Duration.hours(12)
+        target_group_name = ALB_TARGET_GROUP_NAME # Explicit resource name
+        cloudfront_distribution_url = "cloudfront_placeholder.net" # Need to replace this afterwards with the actual cloudfront_distribution.domain_name
+        try:
+            # --- CREATING TARGET GROUPS AND ADDING THE CLOUDFRONT LISTENER RULE ---
+            target_group = elbv2.ApplicationTargetGroup(
+                self,
+                "AppTargetGroup", # Logical ID
+                target_group_name=target_group_name, # Explicit resource name
+                port=int(GRADIO_SERVER_PORT), # Ensure port is int
+                protocol=elbv2.ApplicationProtocol.HTTP,
+                targets=[ecs_service], # Link to ECS Service
+                stickiness_cookie_duration=cookie_duration,
+                vpc=vpc, # Target Groups need VPC
+            )
+            print(f"ALB target group {target_group_name} defined.")
+            # First HTTP
+            listener_port = 80
+            # Check if Listener exists - from_listener_arn or lookup by port/ALB
+            http_listener = alb.add_listener(
+                "HttpListener", # Logical ID
+                port=listener_port,
+                open=False, # Be cautious with open=True, usually restrict source SG
+            )
+            print(f"ALB listener on port {listener_port} defined.")
+            if ACM_CERTIFICATE_ARN:
+                http_listener.add_action(
+                        "DefaultAction", # Logical ID for the default action
+                        action=elbv2.ListenerAction.redirect(protocol='HTTPS',
+                                                             host='#{host}',
+                                                             port='443',
+                                                             path='/#{path}',
+                                                             query='#{query}')
+                )
+            else:
+                if USE_CLOUDFRONT == 'True':
+                    # The following default action can be added for the listener after a host header rule is added to the listener manually in the Console as suggested in the above comments.
+                    http_listener.add_action(
+                        "DefaultAction", # Logical ID for the default action
+                        action=elbv2.ListenerAction.fixed_response(
+                            status_code=403,
+                            content_type="text/plain",
+                            message_body="Access denied",
+                        ),
+                    )
+                    # Add the Listener Rule for the specific CloudFront Host Header
+                    http_listener.add_action(
+                        "CloudFrontHostHeaderRule",
+                        action=elbv2.ListenerAction.forward(target_groups=[target_group],stickiness_duration=cookie_duration),
+                        priority=1, # Example priority. Adjust as needed. Lower is evaluated first.
+                        conditions=[
+                            elbv2.ListenerCondition.host_headers([cloudfront_distribution_url]) # May have to redefine url in console afterwards if not specified in config file
+                        ]
+                    )
+                else:
+                    # Add the Listener Rule for the specific CloudFront Host Header
+                    http_listener.add_action(
+                        "CloudFrontHostHeaderRule",
+                        action=elbv2.ListenerAction.forward(target_groups=[target_group],stickiness_duration=cookie_duration)
+                    )
+                print("Added targets and actions to ALB HTTP listener.")
+            # Now the same for HTTPS if you have an ACM certificate
+            if ACM_CERTIFICATE_ARN:
+                listener_port_https = 443
+                # Check if Listener exists - from_listener_arn or lookup by port/ALB
+                https_listener = add_alb_https_listener_with_cert(
+                self,
+                "MyHttpsListener", # Logical ID for the HTTPS listener
+                alb,
+                acm_certificate_arn=ACM_CERTIFICATE_ARN,
+                default_target_group=target_group,
+                enable_cognito_auth=True,
+                cognito_user_pool=user_pool,
+                cognito_user_pool_client=user_pool_client,
+                cognito_user_pool_domain=user_pool_domain,
+                listener_open_to_internet=True,
+                stickiness_cookie_duration=cookie_duration
+                )
+                if https_listener:
+                    CfnOutput(self, "HttpsListenerArn", value=https_listener.listener_arn)
+                print(f"ALB listener on port {listener_port_https} defined.")
+                # if USE_CLOUDFRONT == 'True':
+                #     # Add default action to the listener
+                #     https_listener.add_action(
+                #         "DefaultAction", # Logical ID for the default action
+                #         action=elbv2.ListenerAction.fixed_response(
+                #             status_code=403,
+                #             content_type="text/plain",
+                #             message_body="Access denied",
+                #         ),
+                #     )
+                #     # Add the Listener Rule for the specific CloudFront Host Header
+                #     https_listener.add_action(
+                #         "CloudFrontHostHeaderRuleHTTPS",
+                #         action=elbv2.ListenerAction.forward(target_groups=[target_group],stickiness_duration=cookie_duration),
+                #         priority=1, # Example priority. Adjust as needed. Lower is evaluated first.
+                #         conditions=[
+                #             elbv2.ListenerCondition.host_headers([cloudfront_distribution_url])
+                #         ]
+                #     )
+                # else:
+                #     https_listener.add_action(
+                #         "CloudFrontHostHeaderRuleHTTPS",
+                #         action=elbv2.ListenerAction.forward(target_groups=[target_group],stickiness_duration=cookie_duration))
+                print("Added targets and actions to ALB HTTPS listener.")
+        except Exception as e:
+            raise Exception("Could not handle ALB target groups and listeners due to:", e)
+        # Create WAF to attach to load balancer
+        try:
+            web_acl_name = LOAD_BALANCER_WEB_ACL_NAME
+            if get_context_bool(f"exists:{web_acl_name}"):
+                # Lookup WAF ACL by ARN from context
+                    web_acl_arn = get_context_str(f"arn:{web_acl_name}")
+                    if not web_acl_arn:
+                        raise ValueError(f"Context value 'arn:{web_acl_name}' is required if Web ACL exists.")
+                    web_acl = create_web_acl_with_common_rules(self, web_acl_name, waf_scope="REGIONAL") # Assuming it takes scope and name
+                    print(f"Handled ALB WAF web ACL {web_acl_name}.")
+            else:
+                web_acl = create_web_acl_with_common_rules(self, web_acl_name, waf_scope="REGIONAL") # Assuming it takes scope and name
+                print(f"Created ALB WAF web ACL {web_acl_name}.")
+            alb_waf_association = wafv2.CfnWebACLAssociation(self, id="alb_waf_association", resource_arn=alb.load_balancer_arn, web_acl_arn=web_acl.attr_arn)
+        except Exception as e:
+            raise Exception("Could not handle create ALB WAF web ACL due to:", e)
+        # --- Outputs for other stacks/regions ---
+        self.params = dict()
+        self.params["alb_arn_output"] = alb.load_balancer_arn
+        self.params["alb_security_group_id"] = alb_security_group.security_group_id
+        self.params["alb_dns_name"] = alb.load_balancer_dns_name
+        CfnOutput(self, "AlbArnOutput",
+                    value=alb.load_balancer_arn,
+                    description="ARN of the Application Load Balancer",
+                    export_name=f"{self.stack_name}-AlbArn") # Export name must be unique within the account/region
+        CfnOutput(self, "AlbSecurityGroupIdOutput",
+                        value=alb_security_group.security_group_id,
+                        description="ID of the ALB's Security Group",
+                        export_name=f"{self.stack_name}-AlbSgId")
+        CfnOutput(self, "ALBName", value=alb.load_balancer_name)
+        CfnOutput(self, "RegionalAlbDnsName", value=alb.load_balancer_dns_name)
+        CfnOutput(self, "CognitoPoolId", value=user_pool.user_pool_id)
+        # Add other outputs if needed
+        CfnOutput(self, "ECRRepoUri", value=ecr_repo.repository_uri)
+# --- CLOUDFRONT DISTRIBUTION in separate stack (us-east-1 required) ---
+class CdkStackCloudfront(Stack):
+    def __init__(self, scope: Construct, construct_id: str, alb_arn: str, alb_sec_group_id:str, alb_dns_name:str, **kwargs) -> None:
+        super().__init__(scope, construct_id, **kwargs)
+        # --- Helper to get context values ---
+        def get_context_bool(key: str, default: bool = False) -> bool:
+            return self.node.try_get_context(key) or default
+        def get_context_str(key: str, default: str = None) -> str:
+             return self.node.try_get_context(key) or default
+        def get_context_dict(scope: Construct, key: str, default: dict = None) -> dict:
+            return scope.node.try_get_context(key) or default
+        print(f"CloudFront Stack: Received ALB ARN: {alb_arn}")
+        print(f"CloudFront Stack: Received ALB Security Group ID: {alb_sec_group_id}")
+        if not alb_arn:
+            raise ValueError("ALB ARN must be provided to CloudFront stack")
+        if not alb_sec_group_id:
+             raise ValueError("ALB Security Group ID must be provided to CloudFront stack")
+        # 2. Import the ALB using its ARN
+        # This imports an existing ALB as a construct in the CloudFront stack's context.
+        # CloudFormation will understand this reference at deploy time.
+        alb = elbv2.ApplicationLoadBalancer.from_application_load_balancer_attributes(
+            self, "ImportedAlb", load_balancer_arn=alb_arn, security_group_id=alb_sec_group_id, load_balancer_dns_name=alb_dns_name
+        )
+        try:
+            web_acl_name = WEB_ACL_NAME
+            if get_context_bool(f"exists:{web_acl_name}"):
+                # Lookup WAF ACL by ARN from context
+                    web_acl_arn = get_context_str(f"arn:{web_acl_name}")
+                    if not web_acl_arn:
+                        raise ValueError(f"Context value 'arn:{web_acl_name}' is required if Web ACL exists.")
+                    web_acl = create_web_acl_with_common_rules(self, web_acl_name) # Assuming it takes scope and name
+                    print(f"Handled Cloudfront WAF web ACL {web_acl_name}.")
+            else:
+                web_acl = create_web_acl_with_common_rules(self, web_acl_name) # Assuming it takes scope and name
+                print(f"Created Cloudfront WAF web ACL {web_acl_name}.")
+        # Add ALB as CloudFront Origin
+            origin = origins.LoadBalancerV2Origin(
+                alb, # Use the created or looked-up ALB object
+                custom_headers={CUSTOM_HEADER: CUSTOM_HEADER_VALUE},
+                origin_shield_enabled=False,
+                protocol_policy=cloudfront.OriginProtocolPolicy.HTTP_ONLY,
+            )
+            if CLOUDFRONT_GEO_RESTRICTION: geo_restrict = cloudfront.GeoRestriction.allowlist(CLOUDFRONT_GEO_RESTRICTION)
+            else: geo_restrict = None
+            cloudfront_distribution = cloudfront.Distribution(
+                self,
+                "CloudFrontDistribution", # Logical ID
+                comment=CLOUDFRONT_DISTRIBUTION_NAME, # Use name as comment for easier identification
+                geo_restriction=geo_restrict,
+                default_behavior=cloudfront.BehaviorOptions(
+                    origin=origin,
+                    viewer_protocol_policy=cloudfront.ViewerProtocolPolicy.REDIRECT_TO_HTTPS,
+                    allowed_methods=cloudfront.AllowedMethods.ALLOW_ALL,
+                    cache_policy=cloudfront.CachePolicy.CACHING_DISABLED,
+                    origin_request_policy=cloudfront.OriginRequestPolicy.ALL_VIEWER,
+                ),
+                web_acl_id=web_acl.attr_arn
+            )
+            print(f"Cloudfront distribution {CLOUDFRONT_DISTRIBUTION_NAME} defined.")
+        except Exception as e:
+            raise Exception("Could not handle Cloudfront distribution due to:", e)
+        # --- Outputs ---
+        CfnOutput(self, "CloudFrontDistributionURL",
+                  value=cloudfront_distribution.domain_name)

cdk/check_resources.py ADDED Viewed

	@@ -0,0 +1,297 @@

+import json
+import os
+from cdk_config import CDK_PREFIX, VPC_NAME, AWS_REGION, PUBLIC_SUBNETS_TO_USE, PRIVATE_SUBNETS_TO_USE, CODEBUILD_ROLE_NAME, ECS_TASK_ROLE_NAME, ECS_TASK_EXECUTION_ROLE_NAME, S3_LOG_CONFIG_BUCKET_NAME, S3_OUTPUT_BUCKET_NAME, ECR_CDK_REPO_NAME, CODEBUILD_PROJECT_NAME, ALB_NAME, COGNITO_USER_POOL_NAME, COGNITO_USER_POOL_CLIENT_NAME, COGNITO_USER_POOL_CLIENT_SECRET_NAME, WEB_ACL_NAME, CONTEXT_FILE, PUBLIC_SUBNET_CIDR_BLOCKS, PRIVATE_SUBNET_CIDR_BLOCKS, PUBLIC_SUBNET_AVAILABILITY_ZONES, PRIVATE_SUBNET_AVAILABILITY_ZONES, CDK_FOLDER, CDK_CONFIG_PATH  # Import necessary config
+from cdk_functions import ( # Import your check functions (assuming they use Boto3)
+    get_vpc_id_by_name,
+    check_subnet_exists_by_name,
+    check_for_existing_role,
+    check_s3_bucket_exists,
+    check_ecr_repo_exists,
+    check_codebuild_project_exists,
+    check_alb_exists,
+    check_for_existing_user_pool,
+    check_for_existing_user_pool_client,
+    check_for_secret,
+    check_cloudfront_distribution_exists,
+    check_web_acl_exists,
+    _get_existing_subnets_in_vpc,
+    validate_subnet_creation_parameters
+    # Add other check functions as needed
+)
+from typing import List, Dict, Any
+cdk_folder = CDK_FOLDER #<FULL_PATH_TO_CDK_FOLDER_HERE>
+# Full path needed to find config file
+os.environ["CDK_CONFIG_PATH"] = cdk_folder + CDK_CONFIG_PATH
+# --- Helper to parse environment variables into lists ---
+def _get_env_list(env_var_name: str) -> List[str]:
+    """Parses a comma-separated environment variable into a list of strings."""
+    value = env_var_name[1:-1].strip().replace('\"', '').replace("\'","")
+    if not value:
+        return []
+    # Split by comma and filter out any empty strings that might result from extra commas
+    return [s.strip() for s in value.split(',') if s.strip()]
+if PUBLIC_SUBNETS_TO_USE and not isinstance(PUBLIC_SUBNETS_TO_USE, list): PUBLIC_SUBNETS_TO_USE = _get_env_list(PUBLIC_SUBNETS_TO_USE)
+if PRIVATE_SUBNETS_TO_USE and not isinstance(PRIVATE_SUBNETS_TO_USE, list): PRIVATE_SUBNETS_TO_USE = _get_env_list(PRIVATE_SUBNETS_TO_USE)
+if PUBLIC_SUBNET_CIDR_BLOCKS and not isinstance(PUBLIC_SUBNET_CIDR_BLOCKS, list): PUBLIC_SUBNET_CIDR_BLOCKS = _get_env_list(PUBLIC_SUBNET_CIDR_BLOCKS)
+if PUBLIC_SUBNET_AVAILABILITY_ZONES and not isinstance(PUBLIC_SUBNET_AVAILABILITY_ZONES, list): PUBLIC_SUBNET_AVAILABILITY_ZONES = _get_env_list(PUBLIC_SUBNET_AVAILABILITY_ZONES)
+if PRIVATE_SUBNET_CIDR_BLOCKS and not isinstance(PRIVATE_SUBNET_CIDR_BLOCKS, list): PRIVATE_SUBNET_CIDR_BLOCKS = _get_env_list(PRIVATE_SUBNET_CIDR_BLOCKS)
+if PRIVATE_SUBNET_AVAILABILITY_ZONES and not isinstance(PRIVATE_SUBNET_AVAILABILITY_ZONES, list): PRIVATE_SUBNET_AVAILABILITY_ZONES = _get_env_list(PRIVATE_SUBNET_AVAILABILITY_ZONES)
+# Check for the existence of elements in your AWS environment to see if it's necessary to create new versions of the same
+def check_and_set_context():
+    context_data = {}
+    # --- Find the VPC ID first ---
+    print("VPC_NAME:", VPC_NAME)
+    vpc_id, nat_gateways = get_vpc_id_by_name(VPC_NAME)
+    # If you expect only one, or one per AZ and you're creating one per AZ in CDK:
+    if nat_gateways:
+        # For simplicity, let's just check if *any* NAT exists in the VPC
+        # A more robust check would match by subnet, AZ, or a specific tag.
+        context_data["exists:NatGateway"] = True
+        context_data["id:NatGateway"] = nat_gateways[0]['NatGatewayId'] # Store the ID of the first one found
+    else:
+        context_data["exists:NatGateway"] = False
+        context_data["id:NatGateway"] = None
+    if not vpc_id:
+        # If the VPC doesn't exist, you might not be able to check/create subnets.
+        # Decide how to handle this: raise an error, set a flag, etc.
+        raise RuntimeError(f"Required VPC '{VPC_NAME}' not found. Cannot proceed with subnet checks.")
+    context_data["vpc_id"] = vpc_id # Store VPC ID in context
+    # SUBNET CHECKS
+    context_data: Dict[str, Any] = {}
+    all_proposed_subnets_data: List[Dict[str, str]] = []
+    # Flag to indicate if full validation mode (with CIDR/AZs) is active
+    full_validation_mode = False
+    # Determine if full validation mode is possible/desired
+    # It's 'desired' if CIDR/AZs are provided, and their lengths match the name lists.
+    public_ready_for_full_validation = (
+        len(PUBLIC_SUBNETS_TO_USE) > 0 and
+        len(PUBLIC_SUBNET_CIDR_BLOCKS) == len(PUBLIC_SUBNETS_TO_USE) and
+        len(PUBLIC_SUBNET_AVAILABILITY_ZONES) == len(PUBLIC_SUBNETS_TO_USE)
+    )
+    private_ready_for_full_validation = (
+        len(PRIVATE_SUBNETS_TO_USE) > 0 and
+        len(PRIVATE_SUBNET_CIDR_BLOCKS) == len(PRIVATE_SUBNETS_TO_USE) and
+        len(PRIVATE_SUBNET_AVAILABILITY_ZONES) == len(PRIVATE_SUBNETS_TO_USE)
+    )
+    # Activate full validation if *any* type of subnet (public or private) has its full details provided.
+    # You might adjust this logic if you require ALL subnet types to have CIDRs, or NONE.
+    if public_ready_for_full_validation or private_ready_for_full_validation:
+        full_validation_mode = True
+        # If some are ready but others aren't, print a warning or raise an error based on your strictness
+        if public_ready_for_full_validation and not private_ready_for_full_validation and PRIVATE_SUBNETS_TO_USE:
+            print("Warning: Public subnets have CIDRs/AZs, but private subnets do not. Only public will be fully validated/created with CIDRs.")
+        if private_ready_for_full_validation and not public_ready_for_full_validation and PUBLIC_SUBNETS_TO_USE:
+            print("Warning: Private subnets have CIDRs/AZs, but public subnets do not. Only private will be fully validated/created with CIDRs.")
+        # Prepare data for validate_subnet_creation_parameters for all subnets that have full details
+        if public_ready_for_full_validation:
+            for i, name in enumerate(PUBLIC_SUBNETS_TO_USE):
+                all_proposed_subnets_data.append({
+                    'name': name,
+                    'cidr': PUBLIC_SUBNET_CIDR_BLOCKS[i],
+                    'az': PUBLIC_SUBNET_AVAILABILITY_ZONES[i]
+                })
+        if private_ready_for_full_validation:
+            for i, name in enumerate(PRIVATE_SUBNETS_TO_USE):
+                all_proposed_subnets_data.append({
+                    'name': name,
+                    'cidr': PRIVATE_SUBNET_CIDR_BLOCKS[i],
+                    'az': PRIVATE_SUBNET_AVAILABILITY_ZONES[i]
+                })
+    print(f"Target VPC ID for Boto3 lookup: {vpc_id}")
+    # Fetch all existing subnets in the target VPC once to avoid repeated API calls
+    try:
+        existing_aws_subnets = _get_existing_subnets_in_vpc(vpc_id)
+    except Exception as e:
+        print(f"Failed to fetch existing VPC subnets. Aborting. Error: {e}")
+        raise SystemExit(1) # Exit immediately if we can't get baseline data
+    print("\n--- Running Name-Only Subnet Existence Check Mode ---")
+    # Fallback: check only by name using the existing data
+    checked_public_subnets = {}
+    if PUBLIC_SUBNETS_TO_USE:
+        for subnet_name in PUBLIC_SUBNETS_TO_USE:
+            print("subnet_name:", subnet_name)
+            exists, subnet_id = check_subnet_exists_by_name(subnet_name, existing_aws_subnets)
+            checked_public_subnets[subnet_name] = {"exists": exists, "id": subnet_id}
+            # If the subnet exists, remove it from the proposed subnets list
+            if checked_public_subnets[subnet_name]["exists"] == True:
+                all_proposed_subnets_data = [
+                    subnet for subnet in all_proposed_subnets_data
+                    if subnet['name'] != subnet_name
+                ]
+    context_data["checked_public_subnets"] = checked_public_subnets
+    checked_private_subnets = {}
+    if PRIVATE_SUBNETS_TO_USE:
+        for subnet_name in PRIVATE_SUBNETS_TO_USE:
+            print("subnet_name:", subnet_name)
+            exists, subnet_id = check_subnet_exists_by_name(subnet_name, existing_aws_subnets)
+            checked_private_subnets[subnet_name] = {"exists": exists, "id": subnet_id}
+            # If the subnet exists, remove it from the proposed subnets list
+            if checked_private_subnets[subnet_name]["exists"] == True:
+                all_proposed_subnets_data = [
+                    subnet for subnet in all_proposed_subnets_data
+                    if subnet['name'] != subnet_name
+                ]
+    context_data["checked_private_subnets"] = checked_private_subnets
+    print("\nName-only existence subnet check complete.\n")
+    if full_validation_mode:
+        print("\n--- Running in Full Subnet Validation Mode (CIDR/AZs provided) ---")
+        try:
+            validate_subnet_creation_parameters(vpc_id, all_proposed_subnets_data, existing_aws_subnets)
+            print("\nPre-synth validation successful. Proceeding with CDK synth.\n")
+            # Populate context_data for downstream CDK construct creation
+            context_data["public_subnets_to_create"] = []
+            if public_ready_for_full_validation:
+                for i, name in enumerate(PUBLIC_SUBNETS_TO_USE):
+                    context_data["public_subnets_to_create"].append({
+                        'name': name,
+                        'cidr': PUBLIC_SUBNET_CIDR_BLOCKS[i],
+                        'az': PUBLIC_SUBNET_AVAILABILITY_ZONES[i],
+                        'is_public': True
+                    })
+            context_data["private_subnets_to_create"] = []
+            if private_ready_for_full_validation:
+                for i, name in enumerate(PRIVATE_SUBNETS_TO_USE):
+                    context_data["private_subnets_to_create"].append({
+                        'name': name,
+                        'cidr': PRIVATE_SUBNET_CIDR_BLOCKS[i],
+                        'az': PRIVATE_SUBNET_AVAILABILITY_ZONES[i],
+                        'is_public': False
+                    })
+        except (ValueError, Exception) as e:
+            print(f"\nFATAL ERROR: Subnet parameter validation failed: {e}\n")
+            raise SystemExit(1) # Exit if validation fails
+    # Example checks and setting context values
+    # IAM Roles
+    role_name = CODEBUILD_ROLE_NAME
+    exists, _, _ = check_for_existing_role(role_name)
+    context_data[f"exists:{role_name}"] = exists # Use boolean
+    if exists:
+         _, role_arn, _ = check_for_existing_role(role_name) # Get ARN if needed
+         context_data[f"arn:{role_name}"] = role_arn
+    role_name = ECS_TASK_ROLE_NAME
+    exists, _, _ = check_for_existing_role(role_name)
+    context_data[f"exists:{role_name}"] = exists
+    if exists:
+         _, role_arn, _ = check_for_existing_role(role_name)
+         context_data[f"arn:{role_name}"] = role_arn
+    role_name = ECS_TASK_EXECUTION_ROLE_NAME
+    exists, _, _ = check_for_existing_role(role_name)
+    context_data[f"exists:{role_name}"] = exists
+    if exists:
+         _, role_arn, _ = check_for_existing_role(role_name)
+         context_data[f"arn:{role_name}"] = role_arn
+    # S3 Buckets
+    bucket_name = S3_LOG_CONFIG_BUCKET_NAME
+    exists, _ = check_s3_bucket_exists(bucket_name)
+    context_data[f"exists:{bucket_name}"] = exists
+    if exists:
+        # You might not need the ARN if using from_bucket_name
+        pass
+    output_bucket_name = S3_OUTPUT_BUCKET_NAME
+    exists, _ = check_s3_bucket_exists(output_bucket_name)
+    context_data[f"exists:{output_bucket_name}"] = exists
+    if exists:
+         pass
+    # ECR Repository
+    repo_name = ECR_CDK_REPO_NAME
+    exists, _ = check_ecr_repo_exists(repo_name)
+    context_data[f"exists:{repo_name}"] = exists
+    if exists:
+         pass # from_repository_name is sufficient
+    # CodeBuild Project
+    project_name = CODEBUILD_PROJECT_NAME
+    exists, _ = check_codebuild_project_exists(project_name)
+    context_data[f"exists:{project_name}"] = exists
+    if exists:
+         # Need a way to get the ARN from the check function
+         _, project_arn = check_codebuild_project_exists(project_name) # Assuming it returns ARN
+         context_data[f"arn:{project_name}"] = project_arn
+    # ALB (by name lookup)
+    alb_name = ALB_NAME
+    exists, _ = check_alb_exists(alb_name, region_name=AWS_REGION)
+    context_data[f"exists:{alb_name}"] = exists
+    if exists:
+        _, alb_object = check_alb_exists(alb_name, region_name=AWS_REGION) # Assuming check returns object
+        print("alb_object:", alb_object)
+        context_data[f"arn:{alb_name}"] = alb_object['LoadBalancerArn']
+    # Cognito User Pool (by name)
+    user_pool_name = COGNITO_USER_POOL_NAME
+    exists, user_pool_id, _ = check_for_existing_user_pool(user_pool_name)
+    context_data[f"exists:{user_pool_name}"] = exists
+    if exists:
+        context_data[f"id:{user_pool_name}"] = user_pool_id
+    # Cognito User Pool Client (by name and pool ID) - requires User Pool ID from check
+    if user_pool_id:
+        user_pool_id_for_client_check = user_pool_id #context_data.get(f"id:{user_pool_name}") # Use ID from context
+        user_pool_client_name = COGNITO_USER_POOL_CLIENT_NAME
+        if user_pool_id_for_client_check:
+            exists, client_id, _ = check_for_existing_user_pool_client(user_pool_client_name, user_pool_id_for_client_check)
+            context_data[f"exists:{user_pool_client_name}"] = exists
+            if exists:
+                context_data[f"id:{user_pool_client_name}"] = client_id
+    # Secrets Manager Secret (by name)
+    secret_name = COGNITO_USER_POOL_CLIENT_SECRET_NAME
+    exists, _ = check_for_secret(secret_name)
+    context_data[f"exists:{secret_name}"] = exists
+    # You might not need the ARN if using from_secret_name_v2
+    # WAF Web ACL (by name and scope)
+    web_acl_name = WEB_ACL_NAME
+    exists, _ = check_web_acl_exists(web_acl_name, scope="CLOUDFRONT") # Assuming check returns object
+    context_data[f"exists:{web_acl_name}"] = exists
+    if exists:
+        _, existing_web_acl = check_web_acl_exists(web_acl_name, scope="CLOUDFRONT")
+        context_data[f"arn:{web_acl_name}"] = existing_web_acl.attr_arn
+    # Write the context data to the file
+    with open(CONTEXT_FILE, "w") as f:
+        json.dump(context_data, f, indent=2)
+    print(f"Context data written to {CONTEXT_FILE}")

cdk/post_cdk_build_quickstart.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import time
+from cdk_config import CODEBUILD_PROJECT_NAME, S3_LOG_CONFIG_BUCKET_NAME, CLUSTER_NAME, ECS_SERVICE_NAME
+from cdk_functions import start_codebuild_build, upload_file_to_s3, start_ecs_task, create_basic_config_env
+from tqdm import tqdm
+# Create basic config.env file that user can use to run the app later. Input is the folder it is saved into.
+create_basic_config_env("config")
+# Start codebuild build
+print("Starting CodeBuild project.")
+start_codebuild_build(PROJECT_NAME=CODEBUILD_PROJECT_NAME)
+# Upload config.env file to S3 bucket
+upload_file_to_s3(local_file_paths="config/config.env", s3_key="", s3_bucket=S3_LOG_CONFIG_BUCKET_NAME)
+total_seconds = 480 # 8 minutes * 60 seconds/minute
+update_interval = 1 # Update every second
+print("Waiting eight minutes for the CodeBuild container to build.")
+# tqdm iterates over a range, and you perform a small sleep in each iteration
+for i in tqdm(range(total_seconds), desc="Building container"):
+    time.sleep(update_interval)
+# Start task on ECS
+print("Starting ECS task")
+start_ecs_task(cluster_name=CLUSTER_NAME, service_name=ECS_SERVICE_NAME)

cdk/requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+aws-cdk-lib==2.200.2
+boto3==1.38.35
+pandas==2.2.3
+nodejs==0.1.1
+python-dotenv==1.0.1

tools/config.py CHANGED Viewed

@@ -64,10 +64,12 @@ def add_folder_to_path(folder_path: str):
 # LOAD CONFIG FROM ENV FILE
 ###
-ensure_folder_exists("config/")
 # If you have an aws_config env file in the config folder, you can load in app variables this way, e.g. 'config/app_config.env'
-APP_CONFIG_PATH = get_or_create_env_var('APP_CONFIG_PATH', 'config/app_config.env') # e.g. config/app_config.env
 if APP_CONFIG_PATH:
     if os.path.exists(APP_CONFIG_PATH):
@@ -75,10 +77,6 @@ if APP_CONFIG_PATH:
         load_dotenv(APP_CONFIG_PATH)
     else: print("App config file not found at location:", APP_CONFIG_PATH)
 ###
 # AWS OPTIONS
 ###
@@ -149,6 +147,12 @@ if OUTPUT_FOLDER == "TEMP" or INPUT_FOLDER == "TEMP":
         if INPUT_FOLDER == "TEMP": INPUT_FOLDER = temp_dir + "/"
 ###
 # LOGGING OPTIONS
 ###
@@ -182,7 +186,7 @@ DISPLAY_FILE_NAMES_IN_LOGS = get_or_create_env_var('DISPLAY_FILE_NAMES_IN_LOGS',
 CSV_ACCESS_LOG_HEADERS = get_or_create_env_var('CSV_ACCESS_LOG_HEADERS', '') # If blank, uses component labels
 CSV_FEEDBACK_LOG_HEADERS = get_or_create_env_var('CSV_FEEDBACK_LOG_HEADERS', '') # If blank, uses component labels
-CSV_USAGE_LOG_HEADERS = get_or_create_env_var('CSV_USAGE_LOG_HEADERS', '["session_hash_textbox",	"doc_full_file_name_textbox",	"data_full_file_name_textbox",	"actual_time_taken_number",	"total_page_count",	"textract_query_number", "pii_detection_method", "comprehend_query_number",  "cost_code", "textract_handwriting_signature", "host_name_textbox", "text_extraction_method", "is_this_a_textract_api_call"]') # If blank, uses component labels
 ### DYNAMODB logs. Whether to save to DynamoDB, and the headers of the table
@@ -310,7 +314,7 @@ COMPRESS_REDACTED_PDF = get_or_create_env_var("COMPRESS_REDACTED_PDF","False") #
 # APP RUN OPTIONS
 ###
-TLDEXTRACT_CACHE = get_or_create_env_var('TLDEXTRACT_CACHE', 'tld/.tld_set_snapshot')
 try:
     extract = TLDExtract(cache_dir=TLDEXTRACT_CACHE)
 except:

 # LOAD CONFIG FROM ENV FILE
 ###
+CONFIG_FOLDER = get_or_create_env_var('CONFIG_FOLDER', 'config/')
+ensure_folder_exists(CONFIG_FOLDER)
 # If you have an aws_config env file in the config folder, you can load in app variables this way, e.g. 'config/app_config.env'
+APP_CONFIG_PATH = get_or_create_env_var('APP_CONFIG_PATH', CONFIG_FOLDER + 'app_config.env') # e.g. config/app_config.env
 if APP_CONFIG_PATH:
     if os.path.exists(APP_CONFIG_PATH):
         load_dotenv(APP_CONFIG_PATH)
     else: print("App config file not found at location:", APP_CONFIG_PATH)
 ###
 # AWS OPTIONS
 ###
         if INPUT_FOLDER == "TEMP": INPUT_FOLDER = temp_dir + "/"
+GRADIO_TEMP_DIR = get_or_create_env_var('GRADIO_TEMP_DIR', 'tmp/gradio_tmp/') # Default Gradio temp folder
+MPLCONFIGDIR = get_or_create_env_var('MPLCONFIGDIR', 'tmp/matplotlib_cache/') # Matplotlib cache folder
+ensure_folder_exists(GRADIO_TEMP_DIR)
+ensure_folder_exists(MPLCONFIGDIR)
 ###
 # LOGGING OPTIONS
 ###
 CSV_ACCESS_LOG_HEADERS = get_or_create_env_var('CSV_ACCESS_LOG_HEADERS', '') # If blank, uses component labels
 CSV_FEEDBACK_LOG_HEADERS = get_or_create_env_var('CSV_FEEDBACK_LOG_HEADERS', '') # If blank, uses component labels
+CSV_USAGE_LOG_HEADERS = get_or_create_env_var('CSV_USAGE_LOG_HEADERS', '["session_hash_textbox", "doc_full_file_name_textbox", "data_full_file_name_textbox", "actual_time_taken_number",	"total_page_count",	"textract_query_number", "pii_detection_method", "comprehend_query_number",  "cost_code", "textract_handwriting_signature", "host_name_textbox", "text_extraction_method", "is_this_a_textract_api_call"]') # If blank, uses component labels
 ### DYNAMODB logs. Whether to save to DynamoDB, and the headers of the table
 # APP RUN OPTIONS
 ###
+TLDEXTRACT_CACHE = get_or_create_env_var('TLDEXTRACT_CACHE', 'tmp/tld/')
 try:
     extract = TLDExtract(cache_dir=TLDEXTRACT_CACHE)
 except: