seanpedrickcase commited on
Commit
36574ae
·
1 Parent(s): a7566b9

Added folder with CDK code and app. Updated config py file to be compatible with all temp folders needed for read only file systems

Browse files
cdk/__init__.py ADDED
File without changes
cdk/app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from aws_cdk import (App, Environment)
3
+
4
+ # Assuming these are still relevant for you
5
+ from check_resources import check_and_set_context, CONTEXT_FILE
6
+ from cdk_config import AWS_ACCOUNT_ID, AWS_REGION, RUN_USEAST_STACK, USE_CLOUDFRONT
7
+ from cdk_stack import CdkStack, CdkStackCloudfront#, CdkStackMain
8
+ from cdk_functions import load_context_from_file, create_basic_config_env
9
+
10
+ # Initialize the CDK app
11
+ app = App()
12
+
13
+ # --- ENHANCED CONTEXT GENERATION AND LOADING ---
14
+ # 1. Always ensure the old context file is removed before generation
15
+ if os.path.exists(CONTEXT_FILE):
16
+ try:
17
+ os.remove(CONTEXT_FILE)
18
+ print(f"Removed stale context file: {CONTEXT_FILE}")
19
+ except OSError as e:
20
+ print(f"Warning: Could not remove old context file {CONTEXT_FILE}: {e}")
21
+ # Proceed anyway, check_and_set_context might handle overwriting
22
+
23
+ # 2. Always run the pre-check script to generate fresh context
24
+ print("Running pre-check script to generate application context...")
25
+ try:
26
+ check_and_set_context()
27
+ if not os.path.exists(CONTEXT_FILE):
28
+ raise RuntimeError(f"check_and_set_context() finished, but {CONTEXT_FILE} was not created.")
29
+ print(f"Context generated successfully at {CONTEXT_FILE}.")
30
+ except Exception as e:
31
+ raise RuntimeError(f"Failed to generate context via check_and_set_context(): {e}")
32
+
33
+ if os.path.exists(CONTEXT_FILE):
34
+ load_context_from_file(app, CONTEXT_FILE)
35
+ else:
36
+ raise RuntimeError(f"Could not find {CONTEXT_FILE}.")
37
+
38
+ # Create basic config.env file that user can use to run the app later. Input is the folder it is saved into.
39
+ create_basic_config_env("config")
40
+
41
+ # Define the environment for the regional stack (where ALB resides)
42
+ aws_env_regional = Environment(account=AWS_ACCOUNT_ID, region=AWS_REGION)
43
+
44
+ # Create the regional stack (ALB, SGs, etc.)
45
+ # regional_stack = CdkStack(app,
46
+ # "RedactionStackSubnets",
47
+ # env=aws_env_regional,
48
+ # cross_region_references=True)
49
+
50
+ # regional_stack_main = CdkStackMain(app,
51
+ # "RedactionStackMain",
52
+ # env=aws_env_regional,
53
+ # private_subnets=regional_stack.params["private_subnets"],
54
+ # private_route_tables=regional_stack.params["private_route_tables"],
55
+ # public_subnets=regional_stack.params["public_subnets"],
56
+ # public_route_tables=regional_stack.params["public_route_tables"],
57
+ # cross_region_references=True)
58
+
59
+ regional_stack = CdkStack(app,
60
+ "RedactionStack",
61
+ env=aws_env_regional,
62
+ cross_region_references=True)
63
+
64
+ if USE_CLOUDFRONT == 'True' and RUN_USEAST_STACK == 'True':
65
+ # Define the environment for the CloudFront stack (always us-east-1 for CF-level resources like WAFv2 WebACLs for CF)
66
+ aws_env_us_east_1 = Environment(account=AWS_ACCOUNT_ID, region="us-east-1")
67
+
68
+ # Create the CloudFront stack, passing the outputs from the regional stack
69
+ cloudfront_stack = CdkStackCloudfront(
70
+ app,
71
+ "RedactionStackCloudfront",
72
+ env=aws_env_us_east_1,
73
+ alb_arn=regional_stack.params["alb_arn_output"],
74
+ alb_sec_group_id=regional_stack.params["alb_security_group_id"],
75
+ alb_dns_name=regional_stack.params["alb_dns_name"],
76
+ cross_region_references=True
77
+ )
78
+
79
+
80
+ # Synthesize the CloudFormation template
81
+ app.synth(validate_on_synthesis=True)
cdk/cdk_config.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ from dotenv import load_dotenv
4
+
5
+ # Set or retrieve configuration variables for CDK redaction deployment
6
+
7
+ def get_or_create_env_var(var_name:str, default_value:str, print_val:bool=False):
8
+ '''
9
+ Get an environmental variable, and set it to a default value if it doesn't exist
10
+ '''
11
+ # Get the environment variable if it exists
12
+ value = os.environ.get(var_name)
13
+
14
+ # If it doesn't exist, set the environment variable to the default value
15
+ if value is None:
16
+ os.environ[var_name] = default_value
17
+ value = default_value
18
+
19
+ if print_val == True:
20
+ print(f'The value of {var_name} is {value}')
21
+
22
+ return value
23
+
24
+ def ensure_folder_exists(output_folder:str):
25
+ """Checks if the specified folder exists, creates it if not."""
26
+
27
+ if not os.path.exists(output_folder):
28
+ # Create the folder if it doesn't exist
29
+ os.makedirs(output_folder, exist_ok=True)
30
+ print(f"Created the {output_folder} folder.")
31
+ else:
32
+ print(f"The {output_folder} folder already exists.")
33
+
34
+ def add_folder_to_path(folder_path: str):
35
+ '''
36
+ Check if a folder exists on your system. If so, get the absolute path and then add it to the system Path variable if it doesn't already exist. Function is only relevant for locally-created executable files based on this app (when using pyinstaller it creates a _internal folder that contains tesseract and poppler. These need to be added to the system path to enable the app to run)
37
+ '''
38
+
39
+ if os.path.exists(folder_path) and os.path.isdir(folder_path):
40
+ print(folder_path, "folder exists.")
41
+
42
+ # Resolve relative path to absolute path
43
+ absolute_path = os.path.abspath(folder_path)
44
+
45
+ current_path = os.environ['PATH']
46
+ if absolute_path not in current_path.split(os.pathsep):
47
+ full_path_extension = absolute_path + os.pathsep + current_path
48
+ os.environ['PATH'] = full_path_extension
49
+ #print(f"Updated PATH with: ", full_path_extension)
50
+ else:
51
+ print(f"Directory {folder_path} already exists in PATH.")
52
+ else:
53
+ print(f"Folder not found at {folder_path} - not added to PATH")
54
+
55
+ ###
56
+ # LOAD CONFIG FROM ENV FILE
57
+ ###
58
+ CONFIG_FOLDER = get_or_create_env_var('CONFIG_FOLDER', "config/")
59
+
60
+ ensure_folder_exists(CONFIG_FOLDER)
61
+
62
+ # If you have an aws_config env file in the config folder, you can load in app variables this way, e.g. 'config/cdk_config.env'
63
+ CDK_CONFIG_PATH = get_or_create_env_var('CDK_CONFIG_PATH', 'config/cdk_config.env') # e.g. config/cdk_config.env
64
+
65
+ if CDK_CONFIG_PATH:
66
+ if os.path.exists(CDK_CONFIG_PATH):
67
+ print(f"Loading CDK variables from config file {CDK_CONFIG_PATH}")
68
+ load_dotenv(CDK_CONFIG_PATH)
69
+ else: print("CDK config file not found at location:", CDK_CONFIG_PATH)
70
+
71
+ ###
72
+ # AWS OPTIONS
73
+ ###
74
+ AWS_REGION = get_or_create_env_var('AWS_REGION', '')
75
+ AWS_ACCOUNT_ID = get_or_create_env_var('AWS_ACCOUNT_ID', '')
76
+
77
+ ###
78
+ # CDK OPTIONS
79
+ ###
80
+ CDK_PREFIX = get_or_create_env_var('CDK_PREFIX', '')
81
+ CONTEXT_FILE = get_or_create_env_var('CONTEXT_FILE', 'cdk.context.json') # Define the CDK output context file name
82
+ CDK_FOLDER = get_or_create_env_var('CDK_FOLDER', '') # FULL_PATH_TO_CDK_FOLDER_HERE (with forward slash)
83
+ RUN_USEAST_STACK = get_or_create_env_var('RUN_USEAST_STACK', 'False')
84
+
85
+ ### VPC
86
+ VPC_NAME = get_or_create_env_var('VPC_NAME', '')
87
+ EXISTING_IGW_ID = get_or_create_env_var('EXISTING_IGW_ID', '')
88
+ SINGLE_NAT_GATEWAY_ID = get_or_create_env_var('SINGLE_NAT_GATEWAY_ID', '')
89
+
90
+ ### SUBNETS / ROUTE TABLES / NAT GATEWAY
91
+ PUBLIC_SUBNETS_TO_USE = get_or_create_env_var('PUBLIC_SUBNETS_TO_USE', '') # e.g. ['PublicSubnet1', 'PublicSubnet2']
92
+ PUBLIC_SUBNET_CIDR_BLOCKS = get_or_create_env_var('PUBLIC_SUBNET_CIDR_BLOCKS', '') # e.g. ["10.0.1.0/24", "10.0.2.0/24"]
93
+ PUBLIC_SUBNET_AVAILABILITY_ZONES = get_or_create_env_var('PUBLIC_SUBNET_AVAILABILITY_ZONES', '') # e.g. ["eu-east-1b", "eu-east1b"]
94
+
95
+ PRIVATE_SUBNETS_TO_USE = get_or_create_env_var('PRIVATE_SUBNETS_TO_USE', '') # e.g. ['PrivateSubnet1', 'PrivateSubnet2']
96
+ PRIVATE_SUBNET_CIDR_BLOCKS = get_or_create_env_var('PRIVATE_SUBNET_CIDR_BLOCKS', '') # e.g. ["10.0.1.0/24", "10.0.2.0/24"]
97
+ PRIVATE_SUBNET_AVAILABILITY_ZONES = get_or_create_env_var('PRIVATE_SUBNET_AVAILABILITY_ZONES', '') # e.g. ["eu-east-1b", "eu-east1b"]
98
+
99
+ ROUTE_TABLE_BASE_NAME = get_or_create_env_var('ROUTE_TABLE_BASE_NAME', f'{CDK_PREFIX}PrivateRouteTable')
100
+ NAT_GATEWAY_EIP_NAME = get_or_create_env_var('NAT_GATEWAY_EIP_NAME', f"{CDK_PREFIX}NatGatewayEip")
101
+ NAT_GATEWAY_NAME = get_or_create_env_var('NAT_GATEWAY_NAME', f"{CDK_PREFIX}NatGateway")
102
+
103
+ # IAM roles
104
+ AWS_MANAGED_TASK_ROLES_LIST = get_or_create_env_var('AWS_MANAGED_TASK_ROLES_LIST', '["AmazonCognitoReadOnly", "service-role/AmazonECSTaskExecutionRolePolicy", "AmazonS3FullAccess", "AmazonTextractFullAccess", "ComprehendReadOnly", "AmazonDynamoDBFullAccess", "service-role/AWSAppSyncPushToCloudWatchLogs"]')
105
+ POLICY_FILE_LOCATIONS = get_or_create_env_var('POLICY_FILE_LOCATIONS', '') # e.g. '["config/sts_permissions.json"]'
106
+ POLICY_FILE_ARNS = get_or_create_env_var('POLICY_FILE_ARNS', '')
107
+
108
+ # GITHUB REPO
109
+ GITHUB_REPO_USERNAME = get_or_create_env_var('GITHUB_REPO_USERNAME', 'seanpedrick-case')
110
+ GITHUB_REPO_NAME = get_or_create_env_var('GITHUB_REPO_NAME', 'doc_redaction')
111
+ GITHUB_REPO_BRANCH = get_or_create_env_var('GITHUB_REPO_BRANCH', 'main')
112
+
113
+ ### CODEBUILD
114
+ CODEBUILD_ROLE_NAME = get_or_create_env_var('CODEBUILD_ROLE_NAME', f"{CDK_PREFIX}CodeBuildRole")
115
+ CODEBUILD_PROJECT_NAME = get_or_create_env_var('CODEBUILD_PROJECT_NAME', f"{CDK_PREFIX}CodeBuildProject")
116
+
117
+ ### ECR
118
+ ECR_REPO_NAME = get_or_create_env_var('ECR_REPO_NAME', 'doc-redaction') # Beware - cannot have underscores and must be lower case
119
+ ECR_CDK_REPO_NAME = get_or_create_env_var('ECR_CDK_REPO_NAME', f"{CDK_PREFIX}{ECR_REPO_NAME}".lower())
120
+
121
+ ### S3
122
+ S3_LOG_CONFIG_BUCKET_NAME = get_or_create_env_var('S3_LOG_CONFIG_BUCKET_NAME', f"{CDK_PREFIX}s3-logs".lower()) # S3 bucket names need to be lower case
123
+ S3_OUTPUT_BUCKET_NAME = get_or_create_env_var('S3_OUTPUT_BUCKET_NAME', f"{CDK_PREFIX}s3-output".lower())
124
+
125
+ ### ECS
126
+ FARGATE_TASK_DEFINITION_NAME = get_or_create_env_var('FARGATE_TASK_DEFINITION_NAME', f"{CDK_PREFIX}FargateTaskDefinition")
127
+ TASK_DEFINITION_FILE_LOCATION = get_or_create_env_var('TASK_DEFINITION_FILE_LOCATION', CDK_FOLDER + CONFIG_FOLDER + "task_definition.json")
128
+
129
+ CLUSTER_NAME = get_or_create_env_var('CLUSTER_NAME', f"{CDK_PREFIX}Cluster")
130
+ ECS_SERVICE_NAME = get_or_create_env_var('ECS_SERVICE_NAME', f"{CDK_PREFIX}ECSService")
131
+ ECS_TASK_ROLE_NAME = get_or_create_env_var('ECS_TASK_ROLE_NAME', f"{CDK_PREFIX}TaskRole")
132
+ ECS_TASK_EXECUTION_ROLE_NAME = get_or_create_env_var('ECS_TASK_EXECUTION_ROLE_NAME', f"{CDK_PREFIX}ExecutionRole")
133
+ ECS_SECURITY_GROUP_NAME = get_or_create_env_var('ECS_SECURITY_GROUP_NAME', f"{CDK_PREFIX}SecurityGroupECS")
134
+ ECS_LOG_GROUP_NAME = get_or_create_env_var('ECS_LOG_GROUP_NAME', f"/ecs/{ECS_SERVICE_NAME}-logs".lower())
135
+
136
+ ECS_TASK_CPU_SIZE = get_or_create_env_var('ECS_TASK_CPU_SIZE', '1024')
137
+ ECS_TASK_MEMORY_SIZE = get_or_create_env_var('ECS_TASK_MEMORY_SIZE', '4096')
138
+ ECS_USE_FARGATE_SPOT = get_or_create_env_var('USE_FARGATE_SPOT', 'False')
139
+ ECS_READ_ONLY_FILE_SYSTEM = get_or_create_env_var('ECS_READ_ONLY_FILE_SYSTEM', 'True')
140
+
141
+ ### Cognito
142
+ COGNITO_USER_POOL_NAME = get_or_create_env_var('COGNITO_USER_POOL_NAME', f"{CDK_PREFIX}UserPool")
143
+ COGNITO_USER_POOL_CLIENT_NAME = get_or_create_env_var('COGNITO_USER_POOL_CLIENT_NAME', f"{CDK_PREFIX}UserPoolClient")
144
+ COGNITO_USER_POOL_CLIENT_SECRET_NAME = get_or_create_env_var('COGNITO_USER_POOL_CLIENT_SECRET_NAME', f"{CDK_PREFIX}ParamCognitoSecret")
145
+ COGNITO_USER_POOL_DOMAIN_PREFIX = get_or_create_env_var('COGNITO_USER_POOL_DOMAIN_PREFIX', "redaction-app-domain") # Should change this to something unique or you'll probably hit an error
146
+
147
+ # Application load balancer
148
+ ALB_NAME = get_or_create_env_var('ALB_NAME', f"{CDK_PREFIX}Alb"[-32:]) # Application load balancer name can be max 32 characters, so taking the last 32 characters of the suggested name
149
+ ALB_NAME_SECURITY_GROUP_NAME = get_or_create_env_var('ALB_SECURITY_GROUP_NAME', f"{CDK_PREFIX}SecurityGroupALB")
150
+ ALB_TARGET_GROUP_NAME = get_or_create_env_var('ALB_TARGET_GROUP_NAME', f"{CDK_PREFIX}-tg"[-32:]) # Max 32 characters
151
+ EXISTING_LOAD_BALANCER_ARN = get_or_create_env_var('EXISTING_LOAD_BALANCER_ARN', '')
152
+ EXISTING_LOAD_BALANCER_DNS = get_or_create_env_var('EXISTING_LOAD_BALANCER_ARN', 'placeholder_load_balancer_dns.net')
153
+
154
+ ## CLOUDFRONT
155
+ USE_CLOUDFRONT = get_or_create_env_var('USE_CLOUDFRONT', 'True')
156
+ CLOUDFRONT_PREFIX_LIST_ID = get_or_create_env_var('CLOUDFRONT_PREFIX_LIST_ID', 'pl-93a247fa')
157
+ CLOUDFRONT_GEO_RESTRICTION = get_or_create_env_var('CLOUDFRONT_GEO_RESTRICTION', '') # A country that Cloudfront restricts access to. See here: https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/georestrictions.html
158
+ CLOUDFRONT_DISTRIBUTION_NAME = get_or_create_env_var('CLOUDFRONT_DISTRIBUTION_NAME', f"{CDK_PREFIX}CfDist")
159
+ CLOUDFRONT_DOMAIN = get_or_create_env_var('CLOUDFRONT_DOMAIN', "cloudfront_placeholder.net")
160
+
161
+
162
+ # Certificate for Application load balancer (optional, for HTTPS and logins through the ALB)
163
+ ACM_CERTIFICATE_ARN = get_or_create_env_var('ACM_CERTIFICATE_ARN', '')
164
+ SSL_CERTIFICATE_DOMAIN = get_or_create_env_var('SSL_CERTIFICATE_DOMAIN', '') # e.g. example.com or www.example.com
165
+
166
+ # This should be the CloudFront domain, the domain linked to your ACM certificate, or the DNS of your application load balancer in console afterwards
167
+ if USE_CLOUDFRONT == "True":
168
+ COGNITO_REDIRECTION_URL = get_or_create_env_var('COGNITO_REDIRECTION_URL', "https://" + CLOUDFRONT_DOMAIN)
169
+ elif SSL_CERTIFICATE_DOMAIN:
170
+ COGNITO_REDIRECTION_URL = get_or_create_env_var('COGNITO_REDIRECTION_URL', "https://" + SSL_CERTIFICATE_DOMAIN)
171
+ else:
172
+ COGNITO_REDIRECTION_URL = get_or_create_env_var('COGNITO_REDIRECTION_URL', "https://" + EXISTING_LOAD_BALANCER_DNS)
173
+
174
+ # Custom headers e.g. if routing traffic through Cloudfront
175
+ CUSTOM_HEADER = get_or_create_env_var('CUSTOM_HEADER', '') # Retrieving or setting CUSTOM_HEADER
176
+ CUSTOM_HEADER_VALUE = get_or_create_env_var('CUSTOM_HEADER_VALUE', '') # Retrieving or setting CUSTOM_HEADER_VALUE
177
+
178
+ # Firewall on top of load balancer
179
+ LOAD_BALANCER_WEB_ACL_NAME = get_or_create_env_var('LOAD_BALANCER_WEB_ACL_NAME', f"{CDK_PREFIX}alb-web-acl")
180
+
181
+ # Firewall on top of CloudFront
182
+ WEB_ACL_NAME = get_or_create_env_var('WEB_ACL_NAME', f"{CDK_PREFIX}cloudfront-web-acl")
183
+
184
+ ###
185
+ # File I/O options
186
+ ###
187
+
188
+ OUTPUT_FOLDER = get_or_create_env_var('GRADIO_OUTPUT_FOLDER', 'output/') # 'output/'
189
+ INPUT_FOLDER = get_or_create_env_var('GRADIO_INPUT_FOLDER', 'input/') # 'input/'
190
+
191
+ # Allow for files to be saved in a temporary folder for increased security in some instances
192
+ if OUTPUT_FOLDER == "TEMP" or INPUT_FOLDER == "TEMP":
193
+ # Create a temporary directory
194
+ with tempfile.TemporaryDirectory() as temp_dir:
195
+ print(f'Temporary directory created at: {temp_dir}')
196
+
197
+ if OUTPUT_FOLDER == "TEMP": OUTPUT_FOLDER = temp_dir + "/"
198
+ if INPUT_FOLDER == "TEMP": INPUT_FOLDER = temp_dir + "/"
199
+
200
+ ###
201
+ # LOGGING OPTIONS
202
+ ###
203
+
204
+ SAVE_LOGS_TO_CSV = get_or_create_env_var('SAVE_LOGS_TO_CSV', 'True')
205
+
206
+ ### DYNAMODB logs. Whether to save to DynamoDB, and the headers of the table
207
+ SAVE_LOGS_TO_DYNAMODB = get_or_create_env_var('SAVE_LOGS_TO_DYNAMODB', 'True')
208
+ ACCESS_LOG_DYNAMODB_TABLE_NAME = get_or_create_env_var('ACCESS_LOG_DYNAMODB_TABLE_NAME', f"{CDK_PREFIX}dynamodb-access-log".lower())
209
+ FEEDBACK_LOG_DYNAMODB_TABLE_NAME = get_or_create_env_var('FEEDBACK_LOG_DYNAMODB_TABLE_NAME', f"{CDK_PREFIX}dynamodb-feedback".lower())
210
+ USAGE_LOG_DYNAMODB_TABLE_NAME = get_or_create_env_var('USAGE_LOG_DYNAMODB_TABLE_NAME', f"{CDK_PREFIX}dynamodb-usage".lower())
211
+
212
+ ###
213
+ # REDACTION OPTIONS
214
+ ###
215
+
216
+ # Get some environment variables and Launch the Gradio app
217
+ COGNITO_AUTH = get_or_create_env_var('COGNITO_AUTH', '0')
218
+
219
+ GRADIO_SERVER_PORT = int(get_or_create_env_var('GRADIO_SERVER_PORT', '7860'))
220
+
221
+ ###
222
+ # WHOLE DOCUMENT API OPTIONS
223
+ ###
224
+
225
+ DAYS_TO_DISPLAY_WHOLE_DOCUMENT_JOBS = get_or_create_env_var('DAYS_TO_DISPLAY_WHOLE_DOCUMENT_JOBS', '7') # How many days into the past should whole document Textract jobs be displayed? After that, the data is not deleted from the Textract jobs csv, but it is just filtered out. Included to align with S3 buckets where the file outputs will be automatically deleted after X days.
cdk/cdk_functions.py ADDED
@@ -0,0 +1,1293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import boto3
2
+ from botocore.exceptions import ClientError
3
+ import json
4
+ import os
5
+ import pandas as pd
6
+ import ipaddress
7
+ from constructs import Construct
8
+ from dotenv import set_key
9
+ from typing import List, Tuple, Optional, Dict, Any
10
+ from aws_cdk import (
11
+ App,
12
+ CfnTag,
13
+ aws_ec2 as ec2,
14
+ aws_wafv2 as wafv2,
15
+ aws_elasticloadbalancingv2 as elb,
16
+ aws_elasticloadbalancingv2_actions as elb_act,
17
+ aws_certificatemanager as acm, # You might need this if you were looking up a cert, but not strictly for ARN
18
+ aws_cognito as cognito,
19
+ aws_iam as iam,
20
+ CfnOutput,
21
+ Tags
22
+ )
23
+
24
+
25
+
26
+ from cdk_config import PUBLIC_SUBNETS_TO_USE, PRIVATE_SUBNETS_TO_USE, PUBLIC_SUBNET_CIDR_BLOCKS, PRIVATE_SUBNET_CIDR_BLOCKS, PUBLIC_SUBNET_AVAILABILITY_ZONES, PRIVATE_SUBNET_AVAILABILITY_ZONES, POLICY_FILE_LOCATIONS, NAT_GATEWAY_EIP_NAME, S3_LOG_CONFIG_BUCKET_NAME, S3_OUTPUT_BUCKET_NAME, ACCESS_LOG_DYNAMODB_TABLE_NAME, FEEDBACK_LOG_DYNAMODB_TABLE_NAME, USAGE_LOG_DYNAMODB_TABLE_NAME, AWS_REGION
27
+
28
+ # --- Function to load context from file ---
29
+ def load_context_from_file(app: App, file_path: str):
30
+ if os.path.exists(file_path):
31
+ with open(file_path, 'r') as f:
32
+ context_data = json.load(f)
33
+ for key, value in context_data.items():
34
+ app.node.set_context(key, value)
35
+ print(f"Loaded context from {file_path}")
36
+ else:
37
+ print(f"Context file not found: {file_path}")
38
+
39
+ # --- Helper to parse environment variables into lists ---
40
+ def _get_env_list(env_var_name: str) -> List[str]:
41
+ """Parses a comma-separated environment variable into a list of strings."""
42
+ value = env_var_name[1:-1].strip().replace('\"', '').replace("\'","")
43
+ if not value:
44
+ return []
45
+ # Split by comma and filter out any empty strings that might result from extra commas
46
+ return [s.strip() for s in value.split(',') if s.strip()]
47
+
48
+ # 1. Try to load CIDR/AZs from environment variables
49
+ if PUBLIC_SUBNETS_TO_USE: PUBLIC_SUBNETS_TO_USE = _get_env_list(PUBLIC_SUBNETS_TO_USE)
50
+ if PRIVATE_SUBNETS_TO_USE: PRIVATE_SUBNETS_TO_USE = _get_env_list(PRIVATE_SUBNETS_TO_USE)
51
+
52
+ if PUBLIC_SUBNET_CIDR_BLOCKS: PUBLIC_SUBNET_CIDR_BLOCKS = _get_env_list("PUBLIC_SUBNET_CIDR_BLOCKS")
53
+ if PUBLIC_SUBNET_AVAILABILITY_ZONES: PUBLIC_SUBNET_AVAILABILITY_ZONES = _get_env_list("PUBLIC_SUBNET_AVAILABILITY_ZONES")
54
+ if PRIVATE_SUBNET_CIDR_BLOCKS: PRIVATE_SUBNET_CIDR_BLOCKS = _get_env_list("PRIVATE_SUBNET_CIDR_BLOCKS")
55
+ if PRIVATE_SUBNET_AVAILABILITY_ZONES: PRIVATE_SUBNET_AVAILABILITY_ZONES = _get_env_list("PRIVATE_SUBNET_AVAILABILITY_ZONES")
56
+
57
+ if POLICY_FILE_LOCATIONS: POLICY_FILE_LOCATIONS = _get_env_list(POLICY_FILE_LOCATIONS)
58
+
59
+ def check_for_existing_role(role_name:str):
60
+ try:
61
+ iam = boto3.client('iam')
62
+ #iam.get_role(RoleName=role_name)
63
+
64
+ response = iam.get_role(RoleName=role_name)
65
+ role = response['Role']['Arn']
66
+
67
+ print("Response Role:", role)
68
+
69
+ return True, role, ""
70
+ except iam.exceptions.NoSuchEntityException:
71
+ return False, "", ""
72
+ except Exception as e:
73
+ raise Exception("Getting information on IAM role failed due to:", e)
74
+
75
+ import json
76
+ from typing import List, Dict, Any, Union, Optional
77
+ from aws_cdk import (
78
+ aws_iam as iam,
79
+ )
80
+ from constructs import Construct
81
+
82
+ # Assume POLICY_FILE_LOCATIONS is defined globally or passed as a default
83
+ # For example:
84
+ # POLICY_FILE_LOCATIONS = ["./policies/my_read_policy.json", "./policies/my_write_policy.json"]
85
+
86
+
87
+ def add_statement_to_policy(role: iam.IRole, policy_document: Dict[str, Any]):
88
+ """
89
+ Adds individual policy statements from a parsed policy document to a CDK Role.
90
+
91
+ Args:
92
+ role: The CDK Role construct to attach policies to.
93
+ policy_document: A Python dictionary representing an IAM policy document.
94
+ """
95
+ # Ensure the loaded JSON is a valid policy document structure
96
+ if 'Statement' not in policy_document or not isinstance(policy_document['Statement'], list):
97
+ print(f"Warning: Policy document does not contain a 'Statement' list. Skipping.")
98
+ return # Do not return role, just log and exit
99
+
100
+ for statement_dict in policy_document['Statement']:
101
+ try:
102
+ # Create a CDK PolicyStatement from the dictionary
103
+ cdk_policy_statement = iam.PolicyStatement.from_json(statement_dict)
104
+
105
+ # Add the policy statement to the role
106
+ role.add_to_policy(cdk_policy_statement)
107
+ print(f" - Added statement: {statement_dict.get('Sid', 'No Sid')}")
108
+ except Exception as e:
109
+ print(f"Warning: Could not process policy statement: {statement_dict}. Error: {e}")
110
+
111
+ def add_custom_policies(
112
+ scope: Construct, # Not strictly used here, but good practice if you expand to ManagedPolicies
113
+ role: iam.IRole,
114
+ policy_file_locations: Optional[List[str]] = None,
115
+ custom_policy_text: Optional[str] = None
116
+ ) -> iam.IRole:
117
+ """
118
+ Loads custom policies from JSON files or a string and attaches them to a CDK Role.
119
+
120
+ Args:
121
+ scope: The scope in which to define constructs (if needed, e.g., for iam.ManagedPolicy).
122
+ role: The CDK Role construct to attach policies to.
123
+ policy_file_locations: List of file paths to JSON policy documents.
124
+ custom_policy_text: A JSON string representing a policy document.
125
+
126
+ Returns:
127
+ The modified CDK Role construct.
128
+ """
129
+ if policy_file_locations is None:
130
+ policy_file_locations = []
131
+
132
+ current_source = "unknown source" # For error messages
133
+
134
+ try:
135
+ if policy_file_locations:
136
+ print(f"Attempting to add policies from files to role {role.node.id}...")
137
+ for path in policy_file_locations:
138
+ current_source = f"file: {path}"
139
+ try:
140
+ with open(path, 'r') as f:
141
+ policy_document = json.load(f)
142
+ print(f"Processing policy from {current_source}...")
143
+ add_statement_to_policy(role, policy_document)
144
+ except FileNotFoundError:
145
+ print(f"Warning: Policy file not found at {path}. Skipping.")
146
+ except json.JSONDecodeError as e:
147
+ print(f"Warning: Invalid JSON in policy file {path}: {e}. Skipping.")
148
+ except Exception as e:
149
+ print(f"An unexpected error occurred processing policy from {path}: {e}. Skipping.")
150
+
151
+ if custom_policy_text:
152
+ current_source = "custom policy text string"
153
+ print(f"Attempting to add policy from custom text to role {role.node.id}...")
154
+ try:
155
+ # *** FIX: Parse the JSON string into a Python dictionary ***
156
+ policy_document = json.loads(custom_policy_text)
157
+ print(f"Processing policy from {current_source}...")
158
+ add_statement_to_policy(role, policy_document)
159
+ except json.JSONDecodeError as e:
160
+ print(f"Warning: Invalid JSON in custom_policy_text: {e}. Skipping.")
161
+ except Exception as e:
162
+ print(f"An unexpected error occurred processing policy from custom_policy_text: {e}. Skipping.")
163
+
164
+ # You might want a final success message, but individual processing messages are also good.
165
+ print(f"Finished processing custom policies for role {role.node.id}.")
166
+
167
+ except Exception as e:
168
+ print(f"An unhandled error occurred during policy addition for {current_source}: {e}")
169
+
170
+ return role
171
+
172
+ # Import the S3 Bucket class if you intend to return a CDK object later
173
+ # from aws_cdk import aws_s3 as s3
174
+
175
+ def check_s3_bucket_exists(bucket_name: str): # Return type hint depends on what you return
176
+ """
177
+ Checks if an S3 bucket with the given name exists and is accessible.
178
+
179
+ Args:
180
+ bucket_name: The name of the S3 bucket to check.
181
+
182
+ Returns:
183
+ A tuple: (bool indicating existence, optional S3 Bucket object or None)
184
+ Note: Returning a Boto3 S3 Bucket object from here is NOT ideal
185
+ for direct use in CDK. You'll likely only need the boolean result
186
+ or the bucket name for CDK lookups/creations.
187
+ For this example, let's return the boolean and the name.
188
+ """
189
+ s3_client = boto3.client('s3')
190
+ try:
191
+ # Use head_bucket to check for existence and access
192
+ s3_client.head_bucket(Bucket=bucket_name)
193
+ print(f"Bucket '{bucket_name}' exists and is accessible.")
194
+ return True, bucket_name # Return True and the bucket name
195
+
196
+ except ClientError as e:
197
+ # If a ClientError occurs, check the error code.
198
+ # '404' means the bucket does not exist.
199
+ # '403' means the bucket exists but you don't have permission.
200
+ error_code = e.response['Error']['Code']
201
+ if error_code == '404':
202
+ print(f"Bucket '{bucket_name}' does not exist.")
203
+ return False, None
204
+ elif error_code == '403':
205
+ # The bucket exists, but you can't access it.
206
+ # Depending on your requirements, this might be treated as "exists"
207
+ # or "not accessible for our purpose". For checking existence,
208
+ # we'll say it exists here, but note the permission issue.
209
+ # NOTE - when I tested this, it was returning 403 even for buckets that don't exist. So I will return False instead
210
+ print(f"Bucket '{bucket_name}' returned 403, which indicates it may exist but is not accessible due to permissions, or that it doesn't exist. Returning False for existence just in case.")
211
+ return False, bucket_name # It exists, even if not accessible
212
+ else:
213
+ # For other errors, it's better to raise the exception
214
+ # to indicate something unexpected happened.
215
+ print(f"An unexpected AWS ClientError occurred checking bucket '{bucket_name}': {e}")
216
+ # Decide how to handle other errors - raising might be safer
217
+ raise # Re-raise the original exception
218
+ except Exception as e:
219
+ print(f"An unexpected non-ClientError occurred checking bucket '{bucket_name}': {e}")
220
+ # Decide how to handle other errors
221
+ raise # Re-raise the original exception
222
+
223
+ # Example usage in your check_resources.py:
224
+ # exists, bucket_name_if_exists = check_s3_bucket_exists(log_bucket_name)
225
+ # context_data[f"exists:{log_bucket_name}"] = exists
226
+ # # You don't necessarily need to store the name in context if using from_bucket_name
227
+
228
+ # Delete an S3 bucket
229
+ def delete_s3_bucket(bucket_name:str):
230
+ s3 = boto3.client('s3')
231
+
232
+ try:
233
+ # List and delete all objects
234
+ response = s3.list_object_versions(Bucket=bucket_name)
235
+ versions = response.get('Versions', []) + response.get('DeleteMarkers', [])
236
+ for version in versions:
237
+ s3.delete_object(Bucket=bucket_name, Key=version['Key'], VersionId=version['VersionId'])
238
+
239
+ # Delete the bucket
240
+ s3.delete_bucket(Bucket=bucket_name)
241
+ return {'Status': 'SUCCESS'}
242
+ except Exception as e:
243
+ return {'Status': 'FAILED', 'Reason': str(e)}
244
+
245
+ # Function to get subnet ID from subnet name
246
+ def get_subnet_id(vpc:str, ec2_client:str, subnet_name:str):
247
+ response = ec2_client.describe_subnets(Filters=[{'Name': 'vpc-id', 'Values': [vpc.vpc_id]}])
248
+
249
+ for subnet in response['Subnets']:
250
+ if subnet['Tags'] and any(tag['Key'] == 'Name' and tag['Value'] == subnet_name for tag in subnet['Tags']):
251
+ return subnet['SubnetId']
252
+
253
+ return None
254
+
255
+ def check_ecr_repo_exists(repo_name: str) -> tuple[bool, dict]:
256
+ """
257
+ Checks if an ECR repository with the given name exists.
258
+
259
+ Args:
260
+ repo_name: The name of the ECR repository to check.
261
+
262
+ Returns:
263
+ True if the repository exists, False otherwise.
264
+ """
265
+ ecr_client = boto3.client('ecr')
266
+ try:
267
+ print("ecr repo_name to check:", repo_name)
268
+ response = ecr_client.describe_repositories(repositoryNames=[repo_name])
269
+ # If describe_repositories succeeds and returns a list of repositories,
270
+ # and the list is not empty, the repository exists.
271
+ return len(response['repositories']) > 0, response['repositories'][0]
272
+ except ClientError as e:
273
+ # Check for the specific error code indicating the repository doesn't exist
274
+ if e.response['Error']['Code'] == 'RepositoryNotFoundException':
275
+ return False, {}
276
+ else:
277
+ # Re-raise other exceptions to handle unexpected errors
278
+ raise
279
+ except Exception as e:
280
+ print(f"An unexpected error occurred: {e}")
281
+ return False, {}
282
+
283
+ def check_codebuild_project_exists(project_name: str): # Adjust return type hint as needed
284
+ """
285
+ Checks if a CodeBuild project with the given name exists.
286
+
287
+ Args:
288
+ project_name: The name of the CodeBuild project to check.
289
+
290
+ Returns:
291
+ A tuple:
292
+ - The first element is True if the project exists, False otherwise.
293
+ - The second element is the project object (dictionary) if found,
294
+ None otherwise.
295
+ """
296
+ codebuild_client = boto3.client('codebuild')
297
+ try:
298
+ # Use batch_get_projects with a list containing the single project name
299
+ response = codebuild_client.batch_get_projects(names=[project_name])
300
+
301
+ # The response for batch_get_projects includes 'projects' (found)
302
+ # and 'projectsNotFound' (not found).
303
+ if response['projects']:
304
+ # If the project is found in the 'projects' list
305
+ print(f"CodeBuild project '{project_name}' found.")
306
+ return True, response['projects'][0]['arn'] # Return True and the project details dict
307
+ elif response['projectsNotFound'] and project_name in response['projectsNotFound']:
308
+ # If the project name is explicitly in the 'projectsNotFound' list
309
+ print(f"CodeBuild project '{project_name}' not found.")
310
+ return False, None
311
+ else:
312
+ # This case is less expected for a single name lookup,
313
+ # but could happen if there's an internal issue or the response
314
+ # structure is slightly different than expected for an error.
315
+ # It's safer to assume it wasn't found if not in 'projects'.
316
+ print(f"CodeBuild project '{project_name}' not found (not in 'projects' list).")
317
+ return False, None
318
+
319
+ except ClientError as e:
320
+ # Catch specific ClientErrors. batch_get_projects might not throw
321
+ # 'InvalidInputException' for a non-existent project name if the
322
+ # name format is valid. It typically just lists it in projectsNotFound.
323
+ # However, other ClientErrors are possible (e.g., permissions).
324
+ print(f"An AWS ClientError occurred checking CodeBuild project '{project_name}': {e}")
325
+ # Decide how to handle other ClientErrors - raising might be safer
326
+ raise # Re-raise the original exception
327
+ except Exception as e:
328
+ print(f"An unexpected non-ClientError occurred checking CodeBuild project '{project_name}': {e}")
329
+ # Decide how to handle other errors
330
+ raise # Re-raise the original exception
331
+
332
+ def get_vpc_id_by_name(vpc_name: str) -> Optional[str]:
333
+ """
334
+ Finds a VPC ID by its 'Name' tag.
335
+ """
336
+ ec2_client = boto3.client('ec2')
337
+ try:
338
+ response = ec2_client.describe_vpcs(
339
+ Filters=[
340
+ {'Name': 'tag:Name', 'Values': [vpc_name]}
341
+ ]
342
+ )
343
+ if response and response['Vpcs']:
344
+ vpc_id = response['Vpcs'][0]['VpcId']
345
+ print(f"VPC '{vpc_name}' found with ID: {vpc_id}")
346
+
347
+ # In get_vpc_id_by_name, after finding VPC ID:
348
+
349
+ # Look for NAT Gateways in this VPC
350
+ ec2_client = boto3.client('ec2')
351
+ nat_gateways = []
352
+ try:
353
+ response = ec2_client.describe_nat_gateways(
354
+ Filters=[
355
+ {'Name': 'vpc-id', 'Values': [vpc_id]},
356
+ # Optional: Add a tag filter if you consistently tag your NATs
357
+ # {'Name': 'tag:Name', 'Values': [f"{prefix}-nat-gateway"]}
358
+ ]
359
+ )
360
+ nat_gateways = response.get('NatGateways', [])
361
+ except Exception as e:
362
+ print(f"Warning: Could not describe NAT Gateways in VPC '{vpc_id}': {e}")
363
+ # Decide how to handle this error - proceed or raise?
364
+
365
+ # Decide how to identify the specific NAT Gateway you want to check for.
366
+
367
+
368
+
369
+ return vpc_id, nat_gateways
370
+ else:
371
+ print(f"VPC '{vpc_name}' not found.")
372
+ return None
373
+ except Exception as e:
374
+ print(f"An unexpected error occurred finding VPC '{vpc_name}': {e}")
375
+ raise
376
+
377
+ # --- Helper to fetch all existing subnets in a VPC once ---
378
+ def _get_existing_subnets_in_vpc(vpc_id: str) -> Dict[str, Any]:
379
+ """
380
+ Fetches all subnets in a given VPC.
381
+ Returns a dictionary with 'by_name' (map of name to subnet data),
382
+ 'by_id' (map of id to subnet data), and 'cidr_networks' (list of ipaddress.IPv4Network).
383
+ """
384
+ ec2_client = boto3.client('ec2')
385
+ existing_subnets_data = {
386
+ "by_name": {}, # {subnet_name: {'id': 'subnet-id', 'cidr': 'x.x.x.x/x'}}
387
+ "by_id": {}, # {subnet_id: {'name': 'subnet-name', 'cidr': 'x.x.x.x/x'}}
388
+ "cidr_networks": [] # List of ipaddress.IPv4Network objects
389
+ }
390
+ try:
391
+ response = ec2_client.describe_subnets(Filters=[{'Name': 'vpc-id', 'Values': [vpc_id]}])
392
+ for s in response.get('Subnets', []):
393
+ subnet_id = s['SubnetId']
394
+ cidr_block = s.get('CidrBlock')
395
+ # Extract 'Name' tag, which is crucial for lookup by name
396
+ name_tag = next((tag['Value'] for tag in s.get('Tags', []) if tag['Key'] == 'Name'), None)
397
+
398
+ subnet_info = {'id': subnet_id, 'cidr': cidr_block, 'name': name_tag}
399
+
400
+ if name_tag:
401
+ existing_subnets_data["by_name"][name_tag] = subnet_info
402
+ existing_subnets_data["by_id"][subnet_id] = subnet_info
403
+
404
+ if cidr_block:
405
+ try:
406
+ existing_subnets_data["cidr_networks"].append(ipaddress.ip_network(cidr_block, strict=False))
407
+ except ValueError:
408
+ print(f"Warning: Existing subnet {subnet_id} has an invalid CIDR: {cidr_block}. Skipping for overlap check.")
409
+
410
+ print(f"Fetched {len(response.get('Subnets', []))} existing subnets from VPC '{vpc_id}'.")
411
+ except Exception as e:
412
+ print(f"Error describing existing subnets in VPC '{vpc_id}': {e}. Cannot perform full validation.")
413
+ raise # Re-raise if this essential step fails
414
+
415
+ return existing_subnets_data
416
+
417
+ # --- Modified validate_subnet_creation_parameters to take pre-fetched data ---
418
+ def validate_subnet_creation_parameters(
419
+ vpc_id: str,
420
+ proposed_subnets_data: List[Dict[str, str]], # e.g., [{'name': 'my-public-subnet', 'cidr': '10.0.0.0/24', 'az': 'us-east-1a'}]
421
+ existing_aws_subnets_data: Dict[str, Any] # Pre-fetched data from _get_existing_subnets_in_vpc
422
+ ) -> None:
423
+ """
424
+ Validates proposed subnet names and CIDR blocks against existing AWS subnets
425
+ in the specified VPC and against each other.
426
+ This function uses pre-fetched AWS subnet data.
427
+
428
+ Args:
429
+ vpc_id: The ID of the VPC (for logging/error messages).
430
+ proposed_subnets_data: A list of dictionaries, where each dict represents
431
+ a proposed subnet with 'name', 'cidr', and 'az'.
432
+ existing_aws_subnets_data: Dictionary containing existing AWS subnet data
433
+ (e.g., from _get_existing_subnets_in_vpc).
434
+
435
+ Raises:
436
+ ValueError: If any proposed subnet name or CIDR block
437
+ conflicts with existing AWS resources or other proposed resources.
438
+ """
439
+ if not proposed_subnets_data:
440
+ print("No proposed subnet data provided for validation. Skipping.")
441
+ return
442
+
443
+ print(f"--- Starting pre-synth validation for VPC '{vpc_id}' with proposed subnets ---")
444
+
445
+ print("Existing subnet data:", pd.DataFrame(existing_aws_subnets_data['by_name']))
446
+
447
+ existing_aws_subnet_names = set(existing_aws_subnets_data["by_name"].keys())
448
+ existing_aws_cidr_networks = existing_aws_subnets_data["cidr_networks"]
449
+
450
+ # Sets to track names and list to track networks for internal batch consistency
451
+ proposed_names_seen: set[str] = set()
452
+ proposed_cidr_networks_seen: List[ipaddress.IPv4Network] = []
453
+
454
+ for i, proposed_subnet in enumerate(proposed_subnets_data):
455
+ subnet_name = proposed_subnet.get('name')
456
+ cidr_block_str = proposed_subnet.get('cidr')
457
+ availability_zone = proposed_subnet.get('az')
458
+
459
+ if not all([subnet_name, cidr_block_str, availability_zone]):
460
+ raise ValueError(f"Proposed subnet at index {i} is incomplete. Requires 'name', 'cidr', and 'az'.")
461
+
462
+ # 1. Check for duplicate names within the proposed batch
463
+ if subnet_name in proposed_names_seen:
464
+ raise ValueError(f"Proposed subnet name '{subnet_name}' is duplicated within the input list.")
465
+ proposed_names_seen.add(subnet_name)
466
+
467
+ # 2. Check for duplicate names against existing AWS subnets
468
+ if subnet_name in existing_aws_subnet_names:
469
+ print(f"Proposed subnet name '{subnet_name}' already exists in VPC '{vpc_id}'.")
470
+
471
+ # Parse proposed CIDR
472
+ try:
473
+ proposed_net = ipaddress.ip_network(cidr_block_str, strict=False)
474
+ except ValueError as e:
475
+ raise ValueError(f"Invalid CIDR format '{cidr_block_str}' for proposed subnet '{subnet_name}': {e}")
476
+
477
+ # 3. Check for overlapping CIDRs within the proposed batch
478
+ for existing_proposed_net in proposed_cidr_networks_seen:
479
+ if proposed_net.overlaps(existing_proposed_net):
480
+ raise ValueError(
481
+ f"Proposed CIDR '{cidr_block_str}' for subnet '{subnet_name}' "
482
+ f"overlaps with another proposed CIDR '{str(existing_proposed_net)}' "
483
+ f"within the same batch."
484
+ )
485
+
486
+ # 4. Check for overlapping CIDRs against existing AWS subnets
487
+ for existing_aws_net in existing_aws_cidr_networks:
488
+ if proposed_net.overlaps(existing_aws_net):
489
+ raise ValueError(
490
+ f"Proposed CIDR '{cidr_block_str}' for subnet '{subnet_name}' "
491
+ f"overlaps with an existing AWS subnet CIDR '{str(existing_aws_net)}' "
492
+ f"in VPC '{vpc_id}'."
493
+ )
494
+
495
+ # If all checks pass for this subnet, add its network to the list for subsequent checks
496
+ proposed_cidr_networks_seen.append(proposed_net)
497
+ print(f"Validation successful for proposed subnet '{subnet_name}' with CIDR '{cidr_block_str}'.")
498
+
499
+ print(f"--- All proposed subnets passed pre-synth validation checks for VPC '{vpc_id}'. ---")
500
+
501
+ # --- Modified check_subnet_exists_by_name (Uses pre-fetched data) ---
502
+ def check_subnet_exists_by_name(
503
+ subnet_name: str,
504
+ existing_aws_subnets_data: Dict[str, Any]
505
+ ) -> Tuple[bool, Optional[str]]:
506
+ """
507
+ Checks if a subnet with the given name exists within the pre-fetched data.
508
+
509
+ Args:
510
+ subnet_name: The 'Name' tag value of the subnet to check.
511
+ existing_aws_subnets_data: Dictionary containing existing AWS subnet data
512
+ (e.g., from _get_existing_subnets_in_vpc).
513
+
514
+ Returns:
515
+ A tuple:
516
+ - The first element is True if the subnet exists, False otherwise.
517
+ - The second element is the Subnet ID if found, None otherwise.
518
+ """
519
+ subnet_info = existing_aws_subnets_data["by_name"].get(subnet_name)
520
+ if subnet_info:
521
+ print(f"Subnet '{subnet_name}' found with ID: {subnet_info['id']}")
522
+ return True, subnet_info['id']
523
+ else:
524
+ print(f"Subnet '{subnet_name}' not found.")
525
+ return False, None
526
+
527
+ def create_nat_gateway(
528
+ scope: Construct,
529
+ public_subnet_for_nat: ec2.ISubnet, # Expects a proper ISubnet
530
+ nat_gateway_name: str,
531
+ nat_gateway_id_context_key: str
532
+ ) -> str:
533
+ """
534
+ Creates a single NAT Gateway in the specified public subnet.
535
+ It does not handle lookup from context; the calling stack should do that.
536
+ Returns the CloudFormation Ref of the NAT Gateway ID.
537
+ """
538
+ print(f"Defining a new NAT Gateway '{nat_gateway_name}' in subnet '{public_subnet_for_nat.subnet_id}'.")
539
+
540
+ # Create an Elastic IP for the NAT Gateway
541
+ eip = ec2.CfnEIP(scope, NAT_GATEWAY_EIP_NAME,
542
+ tags=[CfnTag(key="Name", value=NAT_GATEWAY_EIP_NAME)]
543
+ )
544
+
545
+ # Create the NAT Gateway
546
+ nat_gateway_logical_id = nat_gateway_name.replace('-', '') + "NatGateway"
547
+ nat_gateway = ec2.CfnNatGateway(scope, nat_gateway_logical_id,
548
+ subnet_id=public_subnet_for_nat.subnet_id, # Associate with the public subnet
549
+ allocation_id=eip.attr_allocation_id, # Associate with the EIP
550
+ tags=[CfnTag(key="Name", value=nat_gateway_name)]
551
+ )
552
+ # The NAT GW depends on the EIP. The dependency on the subnet is implicit via subnet_id.
553
+ nat_gateway.add_dependency(eip)
554
+
555
+ # *** CRUCIAL: Use CfnOutput to export the ID after deployment ***
556
+ # This is how you will get the ID to put into cdk.context.json
557
+ CfnOutput(scope, "SingleNatGatewayIdOutput",
558
+ value=nat_gateway.ref,
559
+ description=f"Physical ID of the Single NAT Gateway. Add this to cdk.context.json under the key '{nat_gateway_id_context_key}'.",
560
+ export_name=f"{scope.stack_name}-NatGatewayId" # Make export name unique
561
+ )
562
+
563
+ print(f"CDK: Defined new NAT Gateway '{nat_gateway.ref}'. Its physical ID will be available in the stack outputs after deployment.")
564
+ # Return the tokenised reference for use within this synthesis
565
+ return nat_gateway.ref
566
+
567
+ def create_subnets(
568
+ scope: Construct,
569
+ vpc: ec2.IVpc,
570
+ prefix: str,
571
+ subnet_names: List[str],
572
+ cidr_blocks: List[str],
573
+ availability_zones: List[str],
574
+ is_public: bool,
575
+ internet_gateway_id: Optional[str] = None,
576
+ single_nat_gateway_id: Optional[str] = None
577
+ ) -> Tuple[List[ec2.CfnSubnet], List[ec2.CfnRouteTable]]:
578
+ """
579
+ Creates subnets using L2 constructs but returns the underlying L1 Cfn objects
580
+ for backward compatibility.
581
+ """
582
+ # --- Validations remain the same ---
583
+ if not (len(subnet_names) == len(cidr_blocks) == len(availability_zones) > 0):
584
+ raise ValueError("Subnet names, CIDR blocks, and Availability Zones lists must be non-empty and match in length.")
585
+ if is_public and not internet_gateway_id:
586
+ raise ValueError("internet_gateway_id must be provided for public subnets.")
587
+ if not is_public and not single_nat_gateway_id:
588
+ raise ValueError("single_nat_gateway_id must be provided for private subnets when using a single NAT Gateway.")
589
+
590
+ # --- We will populate these lists with the L1 objects to return ---
591
+ created_subnets: List[ec2.CfnSubnet] = []
592
+ created_route_tables: List[ec2.CfnRouteTable] = []
593
+
594
+ subnet_type_tag = "public" if is_public else "private"
595
+
596
+ for i, subnet_name in enumerate(subnet_names):
597
+ logical_id = f"{prefix}{subnet_type_tag.capitalize()}Subnet{i+1}"
598
+
599
+ # 1. Create the L2 Subnet (this is the easy part)
600
+ subnet = ec2.Subnet(
601
+ scope,
602
+ logical_id,
603
+ vpc_id=vpc.vpc_id,
604
+ cidr_block=cidr_blocks[i],
605
+ availability_zone=availability_zones[i],
606
+ map_public_ip_on_launch=is_public
607
+ )
608
+ Tags.of(subnet).add("Name", subnet_name)
609
+ Tags.of(subnet).add("Type", subnet_type_tag)
610
+
611
+ if is_public:
612
+ # The subnet's route_table is automatically created by the L2 Subnet construct
613
+ try:
614
+ subnet.add_route(
615
+ "DefaultInternetRoute", # A logical ID for the CfnRoute resource
616
+ router_id=internet_gateway_id,
617
+ router_type=ec2.RouterType.GATEWAY,
618
+ # destination_cidr_block="0.0.0.0/0" is the default for this method
619
+ )
620
+ except Exception as e:
621
+ print("Could not create IGW route for public subnet due to:", e)
622
+ print(f"CDK: Defined public L2 subnet '{subnet_name}' and added IGW route.")
623
+ else:
624
+ try:
625
+ # Using .add_route() for private subnets as well for consistency
626
+ subnet.add_route(
627
+ "DefaultNatRoute", # A logical ID for the CfnRoute resource
628
+ router_id=single_nat_gateway_id,
629
+ router_type=ec2.RouterType.NAT_GATEWAY,
630
+ )
631
+ except Exception as e:
632
+ print("Could not create NAT gateway route for public subnet due to:", e)
633
+ print(f"CDK: Defined private L2 subnet '{subnet_name}' and added NAT GW route.")
634
+
635
+ route_table = subnet.route_table
636
+
637
+ created_subnets.append(subnet)
638
+ created_route_tables.append(route_table)
639
+
640
+ return created_subnets, created_route_tables
641
+
642
+ def ingress_rule_exists(security_group:str, peer:str, port:str):
643
+ for rule in security_group.connections.security_groups:
644
+ if port:
645
+ if rule.peer == peer and rule.connection == port:
646
+ return True
647
+ else:
648
+ if rule.peer == peer:
649
+ return True
650
+ return False
651
+
652
+ def check_for_existing_user_pool(user_pool_name:str):
653
+ cognito_client = boto3.client("cognito-idp")
654
+ list_pools_response = cognito_client.list_user_pools(MaxResults=60) # MaxResults up to 60
655
+
656
+ # ListUserPools might require pagination if you have more than 60 pools
657
+ # This simple example doesn't handle pagination, which could miss your pool
658
+
659
+ existing_user_pool_id = ""
660
+
661
+ for pool in list_pools_response.get('UserPools', []):
662
+ if pool.get('Name') == user_pool_name:
663
+ existing_user_pool_id = pool['Id']
664
+ print(f"Found existing user pool by name '{user_pool_name}' with ID: {existing_user_pool_id}")
665
+ break # Found the one we're looking for
666
+
667
+ if existing_user_pool_id:
668
+ return True, existing_user_pool_id, pool
669
+ else:
670
+ return False, "", ""
671
+
672
+ def check_for_existing_user_pool_client(user_pool_id: str, user_pool_client_name: str):
673
+ """
674
+ Checks if a Cognito User Pool Client with the given name exists in the specified User Pool.
675
+
676
+ Args:
677
+ user_pool_id: The ID of the Cognito User Pool.
678
+ user_pool_client_name: The name of the User Pool Client to check for.
679
+
680
+ Returns:
681
+ A tuple:
682
+ - True, client_id, client_details if the client exists.
683
+ - False, "", {} otherwise.
684
+ """
685
+ cognito_client = boto3.client("cognito-idp")
686
+ next_token = 'string'
687
+
688
+
689
+ while True:
690
+ try:
691
+ response = cognito_client.list_user_pool_clients(
692
+ UserPoolId=user_pool_id,
693
+ MaxResults=60,
694
+ NextToken=next_token
695
+ )
696
+ except cognito_client.exceptions.ResourceNotFoundException:
697
+ print(f"Error: User pool with ID '{user_pool_id}' not found.")
698
+ return False, "", {}
699
+
700
+ except cognito_client.exceptions.InvalidParameterException:
701
+ print(f"Error: No app clients for '{user_pool_id}' found.")
702
+ return False, "", {}
703
+
704
+ except Exception as e:
705
+ print("Could not check User Pool clients due to:", e)
706
+
707
+ for client in response.get('UserPoolClients', []):
708
+ if client.get('ClientName') == user_pool_client_name:
709
+ print(f"Found existing user pool client '{user_pool_client_name}' with ID: {client['ClientId']}")
710
+ return True, client['ClientId'], client
711
+
712
+ next_token = response.get('NextToken')
713
+ if not next_token:
714
+ break
715
+
716
+ return False, "", {}
717
+
718
+ def check_for_secret(secret_name: str, secret_value: dict=""):
719
+ """
720
+ Checks if a Secrets Manager secret with the given name exists.
721
+ If it doesn't exist, it creates the secret.
722
+
723
+ Args:
724
+ secret_name: The name of the Secrets Manager secret.
725
+ secret_value: A dictionary containing the key-value pairs for the secret.
726
+
727
+ Returns:
728
+ True if the secret existed or was created, False otherwise (due to other errors).
729
+ """
730
+ secretsmanager_client = boto3.client("secretsmanager")
731
+
732
+ try:
733
+ # Try to get the secret. If it doesn't exist, a ResourceNotFoundException will be raised.
734
+ secret_value = secretsmanager_client.get_secret_value(SecretId=secret_name)
735
+ print(f"Secret '{secret_name}' already exists.")
736
+ return True, secret_value
737
+ except secretsmanager_client.exceptions.ResourceNotFoundException:
738
+ print("Secret not found")
739
+ return False, {}
740
+ except Exception as e:
741
+ # Handle other potential exceptions during the get operation
742
+ print(f"Error checking for secret '{secret_name}': {e}")
743
+ return False, {}
744
+
745
+ def check_alb_exists(load_balancer_name: str, region_name: str = None) -> tuple[bool, dict]:
746
+ """
747
+ Checks if an Application Load Balancer (ALB) with the given name exists.
748
+
749
+ Args:
750
+ load_balancer_name: The name of the ALB to check.
751
+ region_name: The AWS region to check in. If None, uses the default
752
+ session region.
753
+
754
+ Returns:
755
+ A tuple:
756
+ - The first element is True if the ALB exists, False otherwise.
757
+ - The second element is the ALB object (dictionary) if found,
758
+ None otherwise. Specifically, it returns the first element of
759
+ the LoadBalancers list from the describe_load_balancers response.
760
+ """
761
+ if region_name:
762
+ elbv2_client = boto3.client('elbv2', region_name=region_name)
763
+ else:
764
+ elbv2_client = boto3.client('elbv2')
765
+ try:
766
+ response = elbv2_client.describe_load_balancers(Names=[load_balancer_name])
767
+ if response['LoadBalancers']:
768
+ return True, response['LoadBalancers'][0] # Return True and the first ALB object
769
+ else:
770
+ return False, {}
771
+ except ClientError as e:
772
+ # If the error indicates the ALB doesn't exist, return False
773
+ if e.response['Error']['Code'] == 'LoadBalancerNotFound':
774
+ return False, {}
775
+ else:
776
+ # Re-raise other exceptions
777
+ raise
778
+ except Exception as e:
779
+ print(f"An unexpected error occurred: {e}")
780
+ return False, {}
781
+
782
+ def check_fargate_task_definition_exists(task_definition_name: str, region_name: str = None) -> tuple[bool, dict]:
783
+ """
784
+ Checks if a Fargate task definition with the given name exists.
785
+
786
+ Args:
787
+ task_definition_name: The name or ARN of the task definition to check.
788
+ region_name: The AWS region to check in. If None, uses the default
789
+ session region.
790
+
791
+ Returns:
792
+ A tuple:
793
+ - The first element is True if the task definition exists, False otherwise.
794
+ - The second element is the task definition object (dictionary) if found,
795
+ None otherwise. Specifically, it returns the first element of the
796
+ taskDefinitions list from the describe_task_definition response.
797
+ """
798
+ if region_name:
799
+ ecs_client = boto3.client('ecs', region_name=region_name)
800
+ else:
801
+ ecs_client = boto3.client('ecs')
802
+ try:
803
+ response = ecs_client.describe_task_definition(taskDefinition=task_definition_name)
804
+ # If describe_task_definition succeeds, it returns the task definition.
805
+ # We can directly return True and the task definition.
806
+ return True, response['taskDefinition']
807
+ except ClientError as e:
808
+ # Check for the error code indicating the task definition doesn't exist.
809
+ if e.response['Error']['Code'] == 'ClientException' and 'Task definition' in e.response['Message'] and 'does not exist' in e.response['Message']:
810
+ return False, {}
811
+ else:
812
+ # Re-raise other exceptions.
813
+ raise
814
+ except Exception as e:
815
+ print(f"An unexpected error occurred: {e}")
816
+ return False, {}
817
+
818
+ def check_ecs_service_exists(cluster_name: str, service_name: str, region_name: str = None) -> tuple[bool, dict]:
819
+ """
820
+ Checks if an ECS service with the given name exists in the specified cluster.
821
+
822
+ Args:
823
+ cluster_name: The name or ARN of the ECS cluster.
824
+ service_name: The name of the ECS service to check.
825
+ region_name: The AWS region to check in. If None, uses the default
826
+ session region.
827
+
828
+ Returns:
829
+ A tuple:
830
+ - The first element is True if the service exists, False otherwise.
831
+ - The second element is the service object (dictionary) if found,
832
+ None otherwise.
833
+ """
834
+ if region_name:
835
+ ecs_client = boto3.client('ecs', region_name=region_name)
836
+ else:
837
+ ecs_client = boto3.client('ecs')
838
+ try:
839
+ response = ecs_client.describe_services(cluster=cluster_name, services=[service_name])
840
+ if response['services']:
841
+ return True, response['services'][0] # Return True and the first service object
842
+ else:
843
+ return False, {}
844
+ except ClientError as e:
845
+ # Check for the error code indicating the service doesn't exist.
846
+ if e.response['Error']['Code'] == 'ClusterNotFoundException':
847
+ return False, {}
848
+ elif e.response['Error']['Code'] == 'ServiceNotFoundException':
849
+ return False, {}
850
+ else:
851
+ # Re-raise other exceptions.
852
+ raise
853
+ except Exception as e:
854
+ print(f"An unexpected error occurred: {e}")
855
+ return False, {}
856
+
857
+ def check_cloudfront_distribution_exists(distribution_name: str, region_name: str = None) -> tuple[bool, dict | None]:
858
+ """
859
+ Checks if a CloudFront distribution with the given name exists.
860
+
861
+ Args:
862
+ distribution_name: The name of the CloudFront distribution to check.
863
+ region_name: The AWS region to check in. If None, uses the default
864
+ session region. Note: CloudFront is a global service,
865
+ so the region is usually 'us-east-1', but this parameter
866
+ is included for completeness.
867
+
868
+ Returns:
869
+ A tuple:
870
+ - The first element is True if the distribution exists, False otherwise.
871
+ - The second element is the distribution object (dictionary) if found,
872
+ None otherwise. Specifically, it returns the first element of the
873
+ DistributionList from the ListDistributions response.
874
+ """
875
+ if region_name:
876
+ cf_client = boto3.client('cloudfront', region_name=region_name)
877
+ else:
878
+ cf_client = boto3.client('cloudfront')
879
+ try:
880
+ response = cf_client.list_distributions()
881
+ if 'Items' in response['DistributionList']:
882
+ for distribution in response['DistributionList']['Items']:
883
+ # CloudFront doesn't directly filter by name, so we have to iterate.
884
+ if distribution['AliasSet']['Items'] and distribution['AliasSet']['Items'][0] == distribution_name:
885
+ return True, distribution
886
+ return False, None
887
+ else:
888
+ return False, None
889
+ except ClientError as e:
890
+ # If the error indicates the Distribution doesn't exist, return False
891
+ if e.response['Error']['Code'] == 'NoSuchDistribution':
892
+ return False, None
893
+ else:
894
+ # Re-raise other exceptions
895
+ raise
896
+ except Exception as e:
897
+ print(f"An unexpected error occurred: {e}")
898
+ return False, None
899
+
900
+ def create_web_acl_with_common_rules(scope:Construct, web_acl_name: str, waf_scope:str="CLOUDFRONT"):
901
+ '''
902
+ Use CDK to create a web ACL based on an AWS common rule set with overrides.
903
+ This function now expects a 'scope' argument, typically 'self' from your stack,
904
+ as CfnWebACL requires a construct scope.
905
+ '''
906
+
907
+ # Create full list of rules
908
+ rules = []
909
+ aws_ruleset_names = [
910
+ "AWSManagedRulesCommonRuleSet",
911
+ "AWSManagedRulesKnownBadInputsRuleSet",
912
+ "AWSManagedRulesAmazonIpReputationList"
913
+ ]
914
+
915
+ # Use a separate counter to assign unique priorities sequentially
916
+ priority_counter = 1
917
+
918
+ for aws_rule_name in aws_ruleset_names:
919
+ current_rule_action_overrides = None
920
+
921
+ # All managed rule groups need an override_action.
922
+ # 'none' means use the managed rule group's default action.
923
+ current_override_action = wafv2.CfnWebACL.OverrideActionProperty(none={})
924
+
925
+ current_priority = priority_counter
926
+ priority_counter += 1
927
+
928
+ if aws_rule_name == "AWSManagedRulesCommonRuleSet":
929
+ current_rule_action_overrides = [
930
+ wafv2.CfnWebACL.RuleActionOverrideProperty(
931
+ name="SizeRestrictions_BODY",
932
+ action_to_use=wafv2.CfnWebACL.RuleActionProperty(
933
+ allow={}
934
+ )
935
+ )
936
+ ]
937
+ # No need to set current_override_action here, it's already set above.
938
+ # If you wanted this specific rule to have a *fixed* priority, you'd handle it differently
939
+ # For now, it will get priority 1 from the counter.
940
+
941
+ rule_property = wafv2.CfnWebACL.RuleProperty(
942
+ name=aws_rule_name,
943
+ priority=current_priority,
944
+ statement=wafv2.CfnWebACL.StatementProperty(
945
+ managed_rule_group_statement=wafv2.CfnWebACL.ManagedRuleGroupStatementProperty(
946
+ vendor_name="AWS",
947
+ name=aws_rule_name,
948
+ rule_action_overrides=current_rule_action_overrides
949
+ )
950
+ ),
951
+ visibility_config=wafv2.CfnWebACL.VisibilityConfigProperty(
952
+ cloud_watch_metrics_enabled=True,
953
+ metric_name=aws_rule_name,
954
+ sampled_requests_enabled=True
955
+ ),
956
+ override_action=current_override_action # THIS IS THE CRUCIAL PART FOR ALL MANAGED RULES
957
+ )
958
+
959
+ rules.append(rule_property)
960
+
961
+ # Add the rate limit rule
962
+ rate_limit_priority = priority_counter # Use the next available priority
963
+ rules.append(wafv2.CfnWebACL.RuleProperty(
964
+ name="RateLimitRule",
965
+ priority=rate_limit_priority,
966
+ statement=wafv2.CfnWebACL.StatementProperty(
967
+ rate_based_statement=wafv2.CfnWebACL.RateBasedStatementProperty(
968
+ limit=1000,
969
+ aggregate_key_type="IP"
970
+ )
971
+ ),
972
+ visibility_config=wafv2.CfnWebACL.VisibilityConfigProperty(
973
+ cloud_watch_metrics_enabled=True,
974
+ metric_name="RateLimitRule",
975
+ sampled_requests_enabled=True
976
+ ),
977
+ action=wafv2.CfnWebACL.RuleActionProperty(
978
+ block={}
979
+ )
980
+ ))
981
+
982
+ web_acl = wafv2.CfnWebACL(
983
+ scope,
984
+ "WebACL",
985
+ name=web_acl_name,
986
+ default_action=wafv2.CfnWebACL.DefaultActionProperty(allow={}),
987
+ scope=waf_scope,
988
+ visibility_config=wafv2.CfnWebACL.VisibilityConfigProperty(
989
+ cloud_watch_metrics_enabled=True,
990
+ metric_name="webACL",
991
+ sampled_requests_enabled=True
992
+ ),
993
+ rules=rules
994
+ )
995
+
996
+ CfnOutput(scope, "WebACLArn", value=web_acl.attr_arn)
997
+
998
+ return web_acl
999
+
1000
+ def check_web_acl_exists(web_acl_name: str, scope: str, region_name: str = None) -> tuple[bool, dict]:
1001
+ """
1002
+ Checks if a Web ACL with the given name and scope exists.
1003
+
1004
+ Args:
1005
+ web_acl_name: The name of the Web ACL to check.
1006
+ scope: The scope of the Web ACL ('CLOUDFRONT' or 'REGIONAL').
1007
+ region_name: The AWS region to check in. Required for REGIONAL scope.
1008
+ If None, uses the default session region. For CLOUDFRONT,
1009
+ the region should be 'us-east-1'.
1010
+
1011
+ Returns:
1012
+ A tuple:
1013
+ - The first element is True if the Web ACL exists, False otherwise.
1014
+ - The second element is the Web ACL object (dictionary) if found,
1015
+ None otherwise.
1016
+ """
1017
+ if scope not in ['CLOUDFRONT', 'REGIONAL']:
1018
+ raise ValueError("Scope must be either 'CLOUDFRONT' or 'REGIONAL'")
1019
+
1020
+ if scope == 'REGIONAL' and not region_name:
1021
+ raise ValueError("Region name is required for REGIONAL scope")
1022
+
1023
+ if scope == 'CLOUDFRONT':
1024
+ region_name = 'us-east-1' # CloudFront scope requires us-east-1
1025
+
1026
+ if region_name:
1027
+ waf_client = boto3.client('wafv2', region_name=region_name)
1028
+ else:
1029
+ waf_client = boto3.client('wafv2')
1030
+ try:
1031
+ response = waf_client.list_web_acls(Scope=scope)
1032
+ if 'WebACLs' in response:
1033
+ for web_acl in response['WebACLs']:
1034
+ if web_acl['Name'] == web_acl_name:
1035
+ # Describe the Web ACL to get the full object.
1036
+ describe_response = waf_client.describe_web_acl(Name=web_acl_name, Scope=scope)
1037
+ return True, describe_response['WebACL']
1038
+ return False, {}
1039
+ else:
1040
+ return False, {}
1041
+ except ClientError as e:
1042
+ # Check for the error code indicating the web ACL doesn't exist.
1043
+ if e.response['Error']['Code'] == 'ResourceNotFoundException':
1044
+ return False, {}
1045
+ else:
1046
+ # Re-raise other exceptions.
1047
+ raise
1048
+ except Exception as e:
1049
+ print(f"An unexpected error occurred: {e}")
1050
+ return False, {}
1051
+
1052
+ def add_alb_https_listener_with_cert(
1053
+ scope: Construct,
1054
+ logical_id: str, # A unique ID for this listener construct
1055
+ alb: elb.ApplicationLoadBalancer,
1056
+ acm_certificate_arn: Optional[str], # Optional: If None, no HTTPS listener will be created
1057
+ default_target_group: elb.ITargetGroup, # Mandatory: The target group to forward traffic to
1058
+ listener_port_https: int = 443,
1059
+ listener_open_to_internet: bool = False, # Be cautious with True, ensure ALB security group restricts access
1060
+ # --- Cognito Authentication Parameters ---
1061
+ enable_cognito_auth: bool = False,
1062
+ cognito_user_pool: Optional[cognito.IUserPool] = None,
1063
+ cognito_user_pool_client: Optional[cognito.IUserPoolClient] = None,
1064
+ cognito_user_pool_domain: Optional[str] = None, # E.g., "my-app-domain" for "my-app-domain.auth.region.amazoncognito.com"
1065
+ cognito_auth_scope: Optional[str] = "openid profile email", # Default recommended scope
1066
+ cognito_auth_on_unauthenticated_request: elb.UnauthenticatedAction = elb.UnauthenticatedAction.AUTHENTICATE,
1067
+ stickiness_cookie_duration=None
1068
+ # --- End Cognito Parameters ---
1069
+ ) -> Optional[elb.ApplicationListener]:
1070
+ """
1071
+ Conditionally adds an HTTPS listener to an ALB with an ACM certificate,
1072
+ and optionally enables Cognito User Pool authentication.
1073
+
1074
+ Args:
1075
+ scope (Construct): The scope in which to define this construct (e.g., your CDK Stack).
1076
+ logical_id (str): A unique logical ID for the listener construct within the stack.
1077
+ alb (elb.ApplicationLoadBalancer): The Application Load Balancer to add the listener to.
1078
+ acm_certificate_arn (Optional[str]): The ARN of the ACM certificate to attach.
1079
+ If None, the HTTPS listener will NOT be created.
1080
+ default_target_group (elb.ITargetGroup): The default target group for the listener to forward traffic to.
1081
+ This is mandatory for a functional listener.
1082
+ listener_port_https (int): The HTTPS port to listen on (default: 443).
1083
+ listener_open_to_internet (bool): Whether the listener should allow connections from all sources.
1084
+ If False (recommended), ensure your ALB's security group allows
1085
+ inbound traffic on this port from desired sources.
1086
+ enable_cognito_auth (bool): Set to True to enable Cognito User Pool authentication.
1087
+ cognito_user_pool (Optional[cognito.IUserPool]): The Cognito User Pool object. Required if enable_cognito_auth is True.
1088
+ cognito_user_pool_client (Optional[cognito.IUserPoolClient]): The Cognito User Pool App Client object. Required if enable_cognito_auth is True.
1089
+ cognito_user_pool_domain (Optional[str]): The domain prefix for your Cognito User Pool. Required if enable_cognito_auth is True.
1090
+ cognito_auth_scope (Optional[str]): The scope for the Cognito authentication.
1091
+ cognito_auth_on_unauthenticated_request (elb.UnauthenticatedAction): Action for unauthenticated requests.
1092
+ Defaults to AUTHENTICATE (redirect to login).
1093
+
1094
+ Returns:
1095
+ Optional[elb.ApplicationListener]: The created ApplicationListener if successful,
1096
+ None if no ACM certificate ARN was provided.
1097
+ """
1098
+ https_listener = None
1099
+ if acm_certificate_arn:
1100
+ certificates_list = [elb.ListenerCertificate.from_arn(acm_certificate_arn)]
1101
+ print(f"Attempting to add ALB HTTPS listener on port {listener_port_https} with ACM certificate: {acm_certificate_arn}")
1102
+
1103
+ # Determine the default action based on whether Cognito auth is enabled
1104
+ default_action = None
1105
+ if enable_cognito_auth == True:
1106
+ if not all([cognito_user_pool, cognito_user_pool_client, cognito_user_pool_domain]):
1107
+ raise ValueError(
1108
+ "Cognito User Pool, Client, and Domain must be provided if enable_cognito_auth is True."
1109
+ )
1110
+ print(f"Enabling Cognito authentication with User Pool: {cognito_user_pool.user_pool_id}")
1111
+
1112
+ default_action = elb_act.AuthenticateCognitoAction(
1113
+ next=elb.ListenerAction.forward([default_target_group]), # After successful auth, forward to TG
1114
+ user_pool=cognito_user_pool,
1115
+ user_pool_client=cognito_user_pool_client,
1116
+ user_pool_domain=cognito_user_pool_domain,
1117
+ scope=cognito_auth_scope,
1118
+ on_unauthenticated_request=cognito_auth_on_unauthenticated_request,
1119
+ session_timeout=stickiness_cookie_duration
1120
+ # Additional options you might want to configure:
1121
+ # session_cookie_name="AWSELBCookies"
1122
+ )
1123
+ else:
1124
+ default_action = elb.ListenerAction.forward([default_target_group])
1125
+ print("Cognito authentication is NOT enabled for this listener.")
1126
+
1127
+ # Add the HTTPS listener
1128
+ https_listener = alb.add_listener(
1129
+ logical_id,
1130
+ port=listener_port_https,
1131
+ open=listener_open_to_internet,
1132
+ certificates=certificates_list,
1133
+ default_action=default_action # Use the determined default action
1134
+ )
1135
+ print(f"ALB HTTPS listener on port {listener_port_https} defined.")
1136
+ else:
1137
+ print("ACM_CERTIFICATE_ARN is not provided. Skipping HTTPS listener creation.")
1138
+
1139
+ return https_listener
1140
+
1141
+
1142
+ def ensure_folder_exists(output_folder:str):
1143
+ """Checks if the specified folder exists, creates it if not."""
1144
+
1145
+ if not os.path.exists(output_folder):
1146
+ # Create the folder if it doesn't exist
1147
+ os.makedirs(output_folder, exist_ok=True)
1148
+ print(f"Created the {output_folder} folder.")
1149
+ else:
1150
+ print(f"The {output_folder} folder already exists.")
1151
+
1152
+ def create_basic_config_env(out_dir:str="config", S3_LOG_CONFIG_BUCKET_NAME=S3_LOG_CONFIG_BUCKET_NAME, S3_OUTPUT_BUCKET_NAME=S3_OUTPUT_BUCKET_NAME, ACCESS_LOG_DYNAMODB_TABLE_NAME=ACCESS_LOG_DYNAMODB_TABLE_NAME, FEEDBACK_LOG_DYNAMODB_TABLE_NAME=FEEDBACK_LOG_DYNAMODB_TABLE_NAME, USAGE_LOG_DYNAMODB_TABLE_NAME=USAGE_LOG_DYNAMODB_TABLE_NAME):
1153
+ '''
1154
+ Create a basic config.env file for the user to use with their newly deployed redaction app.
1155
+ '''
1156
+ variables = {
1157
+ 'COGNITO_AUTH':'1',
1158
+ 'RUN_AWS_FUNCTIONS':'1',
1159
+ 'DISPLAY_FILE_NAMES_IN_LOGS':'False',
1160
+ 'SESSION_OUTPUT_FOLDER':'True',
1161
+ 'SAVE_LOGS_TO_DYNAMODB':'True',
1162
+ 'SHOW_COSTS':'True',
1163
+ 'SHOW_WHOLE_DOCUMENT_TEXTRACT_CALL_OPTIONS':'True',
1164
+ 'LOAD_PREVIOUS_TEXTRACT_JOBS_S3':'True',
1165
+ 'DOCUMENT_REDACTION_BUCKET':S3_LOG_CONFIG_BUCKET_NAME,
1166
+ 'TEXTRACT_WHOLE_DOCUMENT_ANALYSIS_BUCKET':S3_OUTPUT_BUCKET_NAME,
1167
+ 'ACCESS_LOG_DYNAMODB_TABLE_NAME':ACCESS_LOG_DYNAMODB_TABLE_NAME,
1168
+ 'FEEDBACK_LOG_DYNAMODB_TABLE_NAME':FEEDBACK_LOG_DYNAMODB_TABLE_NAME,
1169
+ 'USAGE_LOG_DYNAMODB_TABLE_NAME':USAGE_LOG_DYNAMODB_TABLE_NAME,
1170
+ 'DISPLAY_FILE_NAMES_IN_LOGS':'False'
1171
+ }
1172
+
1173
+ # Write variables to .env file
1174
+ ensure_folder_exists(out_dir + "/")
1175
+ env_file_path = os.path.abspath(os.path.join(out_dir, 'config.env'))
1176
+
1177
+ # It's good practice to ensure the file exists before calling set_key repeatedly.
1178
+ # set_key will create it, but for a loop, it might be cleaner to ensure it's empty/exists once.
1179
+ if not os.path.exists(env_file_path):
1180
+ with open(env_file_path, 'w') as f:
1181
+ pass # Create empty file
1182
+
1183
+ for key, value in variables.items():
1184
+ set_key(env_file_path, key, str(value), quote_mode="never")
1185
+
1186
+ return variables
1187
+
1188
+ def start_codebuild_build(PROJECT_NAME:str, AWS_REGION:str = AWS_REGION):
1189
+ '''
1190
+ Start an existing Codebuild project build
1191
+ '''
1192
+
1193
+ # --- Initialize CodeBuild client ---
1194
+ client = boto3.client('codebuild', region_name=AWS_REGION)
1195
+
1196
+ try:
1197
+ print(f"Attempting to start build for project: {PROJECT_NAME}")
1198
+
1199
+ response = client.start_build(
1200
+ projectName=PROJECT_NAME
1201
+ )
1202
+
1203
+ build_id = response['build']['id']
1204
+ print(f"Successfully started build with ID: {build_id}")
1205
+ print(f"Build ARN: {response['build']['arn']}")
1206
+ print(f"Build URL (approximate - construct based on region and ID):")
1207
+ print(f"https://{AWS_REGION}.console.aws.amazon.com/codesuite/codebuild/projects/{PROJECT_NAME}/build/{build_id.split(':')[-1]}/detail")
1208
+
1209
+ # You can inspect the full response if needed
1210
+ # print("\nFull response:")
1211
+ # import json
1212
+ # print(json.dumps(response, indent=2))
1213
+
1214
+ except client.exceptions.ResourceNotFoundException:
1215
+ print(f"Error: Project '{PROJECT_NAME}' not found in region '{AWS_REGION}'.")
1216
+ except Exception as e:
1217
+ print(f"An unexpected error occurred: {e}")
1218
+
1219
+ def upload_file_to_s3(local_file_paths:List[str], s3_key:str, s3_bucket:str, RUN_AWS_FUNCTIONS:str = "1"):
1220
+ """
1221
+ Uploads a file from local machine to Amazon S3.
1222
+
1223
+ Args:
1224
+ - local_file_path: Local file path(s) of the file(s) to upload.
1225
+ - s3_key: Key (path) to the file in the S3 bucket.
1226
+ - s3_bucket: Name of the S3 bucket.
1227
+
1228
+ Returns:
1229
+ - Message as variable/printed to console
1230
+ """
1231
+ final_out_message = []
1232
+ final_out_message_str = ""
1233
+
1234
+ if RUN_AWS_FUNCTIONS == "1":
1235
+ try:
1236
+ if s3_bucket and local_file_paths:
1237
+
1238
+ s3_client = boto3.client('s3', region_name=AWS_REGION)
1239
+
1240
+ if isinstance(local_file_paths, str):
1241
+ local_file_paths = [local_file_paths]
1242
+
1243
+ for file in local_file_paths:
1244
+ if s3_client:
1245
+ #print(s3_client)
1246
+ try:
1247
+ # Get file name off file path
1248
+ file_name = os.path.basename(file)
1249
+
1250
+ s3_key_full = s3_key + file_name
1251
+ print("S3 key: ", s3_key_full)
1252
+
1253
+ s3_client.upload_file(file, s3_bucket, s3_key_full)
1254
+ out_message = "File " + file_name + " uploaded successfully!"
1255
+ print(out_message)
1256
+
1257
+ except Exception as e:
1258
+ out_message = f"Error uploading file(s): {e}"
1259
+ print(out_message)
1260
+
1261
+ final_out_message.append(out_message)
1262
+ final_out_message_str = '\n'.join(final_out_message)
1263
+
1264
+ else: final_out_message_str = "Could not connect to AWS."
1265
+ else: final_out_message_str = "At least one essential variable is empty, could not upload to S3"
1266
+ except Exception as e:
1267
+ final_out_message_str = "Could not upload files to S3 due to: " + str(e)
1268
+ print(final_out_message_str)
1269
+ else:
1270
+ final_out_message_str = "App not set to run AWS functions"
1271
+
1272
+ return final_out_message_str
1273
+
1274
+ # Initialize ECS client
1275
+ def start_ecs_task(cluster_name, service_name):
1276
+ ecs_client = boto3.client('ecs')
1277
+
1278
+ try:
1279
+ # Update the service to set the desired count to 1
1280
+ response = ecs_client.update_service(
1281
+ cluster=cluster_name,
1282
+ service=service_name,
1283
+ desiredCount=1
1284
+ )
1285
+ return {
1286
+ "statusCode": 200,
1287
+ "body": f"Service {service_name} in cluster {cluster_name} has been updated to 1 task."
1288
+ }
1289
+ except Exception as e:
1290
+ return {
1291
+ "statusCode": 500,
1292
+ "body": f"Error updating service: {str(e)}"
1293
+ }
cdk/cdk_stack.py ADDED
@@ -0,0 +1,1317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json # You might still need json if loading task_definition.json
3
+ from typing import List, Dict, Any
4
+ from aws_cdk import (
5
+ Stack,
6
+ CfnTag, # <-- Import CfnTag directly
7
+ CfnOutput, # <-- Import CfnOutput directly
8
+ Duration,
9
+ RemovalPolicy,
10
+ SecretValue,
11
+ aws_ec2 as ec2,
12
+ aws_ecr as ecr,
13
+ aws_s3 as s3,
14
+ aws_ecs as ecs,
15
+ aws_iam as iam,
16
+ aws_codebuild as codebuild,
17
+ aws_cognito as cognito,
18
+ aws_secretsmanager as secretsmanager,
19
+ aws_cloudfront as cloudfront,
20
+ aws_cloudfront_origins as origins,
21
+ aws_elasticloadbalancingv2 as elbv2,
22
+ aws_logs as logs,
23
+ aws_wafv2 as wafv2,
24
+ aws_dynamodb as dynamodb # Import the DynamoDB module
25
+ )
26
+
27
+ from constructs import Construct
28
+ from cdk_config import CDK_PREFIX, VPC_NAME, AWS_MANAGED_TASK_ROLES_LIST, GITHUB_REPO_USERNAME, GITHUB_REPO_NAME, GITHUB_REPO_BRANCH, ECS_TASK_MEMORY_SIZE, ECS_TASK_CPU_SIZE, CUSTOM_HEADER, CUSTOM_HEADER_VALUE, AWS_REGION, CLOUDFRONT_GEO_RESTRICTION, DAYS_TO_DISPLAY_WHOLE_DOCUMENT_JOBS, GRADIO_SERVER_PORT, PUBLIC_SUBNETS_TO_USE, PUBLIC_SUBNET_CIDR_BLOCKS, PUBLIC_SUBNET_AVAILABILITY_ZONES, PRIVATE_SUBNETS_TO_USE, PRIVATE_SUBNET_CIDR_BLOCKS, PRIVATE_SUBNET_AVAILABILITY_ZONES, CODEBUILD_PROJECT_NAME, ECS_SECURITY_GROUP_NAME, ALB_NAME_SECURITY_GROUP_NAME, ALB_NAME, COGNITO_USER_POOL_NAME, COGNITO_USER_POOL_CLIENT_NAME, COGNITO_USER_POOL_CLIENT_SECRET_NAME, FARGATE_TASK_DEFINITION_NAME, ECS_SERVICE_NAME, WEB_ACL_NAME, CLOUDFRONT_DISTRIBUTION_NAME, ECS_TASK_ROLE_NAME, ALB_TARGET_GROUP_NAME, S3_LOG_CONFIG_BUCKET_NAME, S3_OUTPUT_BUCKET_NAME, ACM_CERTIFICATE_ARN, CLUSTER_NAME, CODEBUILD_ROLE_NAME, ECS_TASK_EXECUTION_ROLE_NAME, ECR_CDK_REPO_NAME, ECS_LOG_GROUP_NAME, SAVE_LOGS_TO_DYNAMODB, ACCESS_LOG_DYNAMODB_TABLE_NAME, FEEDBACK_LOG_DYNAMODB_TABLE_NAME, USAGE_LOG_DYNAMODB_TABLE_NAME, TASK_DEFINITION_FILE_LOCATION, EXISTING_IGW_ID, SINGLE_NAT_GATEWAY_ID, NAT_GATEWAY_NAME, COGNITO_USER_POOL_DOMAIN_PREFIX, COGNITO_REDIRECTION_URL, AWS_ACCOUNT_ID, ECS_USE_FARGATE_SPOT, ECS_READ_ONLY_FILE_SYSTEM, USE_CLOUDFRONT, LOAD_BALANCER_WEB_ACL_NAME
29
+ from cdk_functions import create_subnets, create_web_acl_with_common_rules, add_custom_policies, add_alb_https_listener_with_cert, create_nat_gateway # Only keep CDK-native functions
30
+
31
+ def _get_env_list(env_var_name: str) -> List[str]:
32
+ """Parses a comma-separated environment variable into a list of strings."""
33
+ value = env_var_name[1:-1].strip().replace('\"', '').replace("\'","")
34
+ if not value:
35
+ return []
36
+ # Split by comma and filter out any empty strings that might result from extra commas
37
+ return [s.strip() for s in value.split(',') if s.strip()]
38
+
39
+ # 1. Try to load CIDR/AZs from environment variables
40
+ if PUBLIC_SUBNETS_TO_USE: PUBLIC_SUBNETS_TO_USE = _get_env_list(PUBLIC_SUBNETS_TO_USE)
41
+ if PRIVATE_SUBNETS_TO_USE: PRIVATE_SUBNETS_TO_USE = _get_env_list(PRIVATE_SUBNETS_TO_USE)
42
+
43
+ if PUBLIC_SUBNET_CIDR_BLOCKS: PUBLIC_SUBNET_CIDR_BLOCKS = _get_env_list("PUBLIC_SUBNET_CIDR_BLOCKS")
44
+ if PUBLIC_SUBNET_AVAILABILITY_ZONES: PUBLIC_SUBNET_AVAILABILITY_ZONES = _get_env_list("PUBLIC_SUBNET_AVAILABILITY_ZONES")
45
+ if PRIVATE_SUBNET_CIDR_BLOCKS: PRIVATE_SUBNET_CIDR_BLOCKS = _get_env_list("PRIVATE_SUBNET_CIDR_BLOCKS")
46
+ if PRIVATE_SUBNET_AVAILABILITY_ZONES: PRIVATE_SUBNET_AVAILABILITY_ZONES = _get_env_list("PRIVATE_SUBNET_AVAILABILITY_ZONES")
47
+
48
+ if AWS_MANAGED_TASK_ROLES_LIST: AWS_MANAGED_TASK_ROLES_LIST = _get_env_list(AWS_MANAGED_TASK_ROLES_LIST)
49
+ class CdkStack(Stack):
50
+
51
+ def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
52
+ super().__init__(scope, construct_id, **kwargs)
53
+
54
+ # --- Helper to get context values ---
55
+ def get_context_bool(key: str, default: bool = False) -> bool:
56
+ return self.node.try_get_context(key) or default
57
+
58
+ def get_context_str(key: str, default: str = None) -> str:
59
+ return self.node.try_get_context(key) or default
60
+
61
+ def get_context_dict(key: str, default: dict = None) -> dict:
62
+ return self.node.try_get_context(key) or default
63
+
64
+ def get_context_list_of_dicts(key: str) -> List[Dict[str, Any]]:
65
+ ctx_value = self.node.try_get_context(key)
66
+ if not isinstance(ctx_value, list):
67
+ print(f"Warning: Context key '{key}' not found or not a list. Returning empty list.")
68
+ return []
69
+ # Optional: Add validation that all items in the list are dicts
70
+ return ctx_value
71
+
72
+
73
+ # --- VPC and Subnets (Assuming VPC is always lookup, Subnets are created/returned by create_subnets) ---
74
+ # --- VPC Lookup (Always lookup as per your assumption) ---
75
+ try:
76
+ vpc = ec2.Vpc.from_lookup(
77
+ self,
78
+ "VPC",
79
+ vpc_name=VPC_NAME
80
+ )
81
+ print("Successfully looked up VPC:", vpc.vpc_id)
82
+ except Exception as e:
83
+ raise Exception(f"Could not look up VPC with name '{VPC_NAME}' due to: {e}")
84
+
85
+ # --- Subnet Handling (Check Context and Create/Import) ---
86
+ # Initialize lists to hold ISubnet objects (L2) and CfnSubnet/CfnRouteTable (L1)
87
+ # We will store ISubnet for consistency, as CfnSubnet has a .subnet_id property
88
+ self.public_subnets: List[ec2.ISubnet] = []
89
+ self.private_subnets: List[ec2.ISubnet] = []
90
+ # Store L1 CfnRouteTables explicitly if you need to reference them later
91
+ self.private_route_tables_cfn: List[ec2.CfnRouteTable] = []
92
+ self.public_route_tables_cfn: List[ec2.CfnRouteTable] = [] # New: to store public RTs
93
+
94
+ names_to_create_private = []
95
+ names_to_create_public = []
96
+
97
+ if not PUBLIC_SUBNETS_TO_USE and not PRIVATE_SUBNETS_TO_USE:
98
+ print("Warning: No public or private subnets specified in *_SUBNETS_TO_USE. Attempting to select from existing VPC subnets.")
99
+
100
+ print("vpc.public_subnets:", vpc.public_subnets)
101
+ print("vpc.private_subnets:", vpc.private_subnets)
102
+
103
+ # public_subnets_by_az: Dict[str, List[ec2.ISubnet]] = {}
104
+ # private_subnets_by_az: Dict[str, List[ec2.ISubnet]] = {}
105
+
106
+ # Iterate through the subnets exposed by the Vpc L2 construct.
107
+ # for subnet in vpc.public_subnets:
108
+ # az = subnet.availability_zone
109
+ # if az not in public_subnets_by_az:
110
+ # public_subnets_by_az[az] = []
111
+ # public_subnets_by_az[az].append(subnet)
112
+
113
+ selected_public_subnets = vpc.select_subnets(subnet_type=ec2.SubnetType.PUBLIC, one_per_az=True)
114
+ private_subnets_egress = vpc.select_subnets(subnet_type=ec2.SubnetType.PRIVATE_WITH_EGRESS, one_per_az=True)
115
+ private_subnets_isolated = vpc.select_subnets(subnet_type=ec2.SubnetType.PRIVATE_ISOLATED, one_per_az=True)
116
+
117
+ combined_subnet_objects = []
118
+
119
+ if private_subnets_egress.subnets:
120
+ # Add the first PRIVATE_WITH_EGRESS subnet
121
+ combined_subnet_objects.append(private_subnets_egress.subnets[0])
122
+ else:
123
+ self.node.add_warning("No PRIVATE_WITH_EGRESS subnets found to select the first one.")
124
+
125
+ # Add all PRIVATE_ISOLATED subnets *except* the first one (if they exist)
126
+ if len(private_subnets_isolated.subnets) > 1:
127
+ combined_subnet_objects.extend(private_subnets_isolated.subnets[1:])
128
+ elif private_subnets_isolated.subnets: # Only 1 isolated subnet, add a warning if [1:] was desired
129
+ self.node.add_warning("Only one PRIVATE_ISOLATED subnet found, private_subnets_isolated.subnets[1:] will be empty.")
130
+ else:
131
+ self.node.add_warning("No PRIVATE_ISOLATED subnets found.")
132
+
133
+ # Create an ec2.SelectedSubnets object from the combined private subnet list.
134
+ selected_private_subnets = vpc.select_subnets(
135
+ subnets=combined_subnet_objects
136
+ )
137
+
138
+ print("selected_public_subnets:", selected_public_subnets)
139
+ print("selected_private_subnets:", selected_private_subnets)
140
+
141
+
142
+ #self.private_route_tables_cfn = []
143
+
144
+ # for subnet in vpc.private_subnets:
145
+ # az = subnet.availability_zone
146
+ # if az not in private_subnets_by_az:
147
+ # private_subnets_by_az[az] = []
148
+ # private_subnets_by_az[az].append(subnet)
149
+
150
+ #selected_public_subnets: List[ec2.ISubnet] = []
151
+ #selected_private_subnets: List[ec2.ISubnet] = []
152
+
153
+ # Select one public subnet per AZ, preferring the first one found
154
+ # for az in sorted(public_subnets_by_az.keys()):
155
+ # if public_subnets_by_az[az]:
156
+ # selected_public_subnets.append(public_subnets_by_az[az][0])
157
+ # print(f"Selected existing public subnet: {public_subnets_by_az[az][0].subnet_id} from AZ {az}.")
158
+
159
+ # Select one private subnet per AZ, preferring the first one found
160
+ # for az in sorted(private_subnets_by_az.keys()):
161
+ # if private_subnets_by_az[az]:
162
+ # selected_private_subnets.append(private_subnets_by_az[az][0])
163
+ # print(f"Selected existing private subnet: {private_subnets_by_az[az][0].subnet_id} from AZ {az}.")
164
+
165
+ if len(selected_public_subnets.subnet_ids) < 2 or len(selected_private_subnets.subnet_ids) < 2:
166
+ raise Exception("Need at least two public or private subnets in different availability zones")
167
+
168
+ if not selected_public_subnets and not selected_private_subnets:
169
+ # If no subnets could be found even with automatic selection, raise an error.
170
+ # This ensures the stack doesn't proceed if it absolutely needs subnets.
171
+ print("Error: No existing public or private subnets could be found in the VPC for automatic selection. "
172
+ "You must either specify subnets in *_SUBNETS_TO_USE or ensure the VPC has discoverable subnets.")
173
+ raise RuntimeError("No suitable subnets found for automatic selection.")
174
+ else:
175
+ self.public_subnets = selected_public_subnets.subnets
176
+ self.private_subnets = selected_private_subnets.subnets
177
+ print(f"Automatically selected {len(self.public_subnets)} public and {len(self.private_subnets)} private subnets based on VPC discovery.")
178
+
179
+ print("self.public_subnets:", self.public_subnets)
180
+ print("self.private_subnets:", self.private_subnets)
181
+ # Since subnets are now assigned, we can exit this processing block.
182
+ # The rest of the original code (which iterates *_SUBNETS_TO_USE) will be skipped.
183
+
184
+ checked_public_subnets_ctx = get_context_dict("checked_public_subnets")
185
+ checked_private_subnets_ctx = get_context_dict("checked_private_subnets")
186
+
187
+ public_subnets_data_for_creation_ctx = get_context_list_of_dicts("public_subnets_to_create")
188
+ private_subnets_data_for_creation_ctx = get_context_list_of_dicts("private_subnets_to_create")
189
+
190
+ # --- 3. Process Public Subnets ---
191
+ print("\n--- Processing Public Subnets ---")
192
+ # Import existing public subnets
193
+ if checked_public_subnets_ctx:
194
+ for i, subnet_name in enumerate(PUBLIC_SUBNETS_TO_USE):
195
+ subnet_info = checked_public_subnets_ctx.get(subnet_name)
196
+ if subnet_info and subnet_info.get("exists"):
197
+ subnet_id = subnet_info.get("id")
198
+ if not subnet_id:
199
+ raise RuntimeError(f"Context for existing public subnet '{subnet_name}' is missing 'id'.")
200
+ try:
201
+ imported_subnet = ec2.Subnet.from_subnet_id(
202
+ self, f"ImportedPublicSubnet{subnet_name.replace('-', '')}{i}", subnet_id
203
+ )
204
+ #self.public_subnets.append(imported_subnet)
205
+ print(f"Imported existing public subnet: {subnet_name} (ID: {subnet_id})")
206
+ except Exception as e:
207
+ raise RuntimeError(f"Failed to import public subnet '{subnet_name}' with ID '{subnet_id}'. Error: {e}")
208
+
209
+ # Create new public subnets based on public_subnets_data_for_creation_ctx
210
+ if public_subnets_data_for_creation_ctx:
211
+ names_to_create_public = [s['name'] for s in public_subnets_data_for_creation_ctx]
212
+ cidrs_to_create_public = [s['cidr'] for s in public_subnets_data_for_creation_ctx]
213
+ azs_to_create_public = [s['az'] for s in public_subnets_data_for_creation_ctx]
214
+
215
+ if names_to_create_public:
216
+ print(f"Attempting to create {len(names_to_create_public)} new public subnets: {names_to_create_public}")
217
+ newly_created_public_subnets, newly_created_public_rts_cfn = create_subnets(
218
+ self, vpc, CDK_PREFIX, names_to_create_public, cidrs_to_create_public, azs_to_create_public,
219
+ is_public=True,
220
+ internet_gateway_id=EXISTING_IGW_ID
221
+ )
222
+ self.public_subnets.extend(newly_created_public_subnets)
223
+ self.public_route_tables_cfn.extend(newly_created_public_rts_cfn)
224
+
225
+ if not self.public_subnets:
226
+ raise Exception("No public subnets found or created, exiting.")
227
+
228
+
229
+ # --- NAT Gateway Creation/Lookup ---
230
+ self.single_nat_gateway_id = None
231
+
232
+ nat_gw_id_from_context = SINGLE_NAT_GATEWAY_ID
233
+
234
+ if nat_gw_id_from_context:
235
+ print(f"Using existing NAT Gateway ID from context: {nat_gw_id_from_context}")
236
+ self.single_nat_gateway_id = nat_gw_id_from_context
237
+ else:
238
+ # If not in context, create a new one, but only if we have a public subnet.
239
+ if self.public_subnets:
240
+ print("NAT Gateway ID not found in context. Creating a new one.")
241
+ # Place the NAT GW in the first available public subnet
242
+ first_public_subnet = self.public_subnets[0]
243
+
244
+ self.single_nat_gateway_id = create_nat_gateway(
245
+ self,
246
+ first_public_subnet,
247
+ nat_gateway_name=NAT_GATEWAY_NAME,
248
+ nat_gateway_id_context_key=SINGLE_NAT_GATEWAY_ID
249
+ )
250
+ else:
251
+ print("WARNING: No public subnets available. Cannot create a NAT Gateway.")
252
+
253
+
254
+ # --- 4. Process Private Subnets ---
255
+ print("\n--- Processing Private Subnets ---")
256
+ # ... (rest of your existing subnet processing logic for checked_private_subnets_ctx) ...
257
+ # (This part for importing existing subnets remains the same)
258
+
259
+ # Create new private subnets
260
+ if private_subnets_data_for_creation_ctx:
261
+ names_to_create_private = [s['name'] for s in private_subnets_data_for_creation_ctx]
262
+ cidrs_to_create_private = [s['cidr'] for s in private_subnets_data_for_creation_ctx]
263
+ azs_to_create_private = [s['az'] for s in private_subnets_data_for_creation_ctx]
264
+
265
+ if names_to_create_private:
266
+ print(f"Attempting to create {len(names_to_create_private)} new private subnets: {names_to_create_private}")
267
+ # --- CALL THE NEW CREATE_SUBNETS FUNCTION FOR PRIVATE ---
268
+ # Ensure self.single_nat_gateway_id is available before this call
269
+ if not self.single_nat_gateway_id:
270
+ raise ValueError("A single NAT Gateway ID is required for private subnets but was not resolved.")
271
+
272
+ newly_created_private_subnets_cfn, newly_created_private_rts_cfn = create_subnets(
273
+ self, vpc, CDK_PREFIX, names_to_create_private, cidrs_to_create_private, azs_to_create_private,
274
+ is_public=False,
275
+ single_nat_gateway_id=self.single_nat_gateway_id # Pass the single NAT Gateway ID
276
+ )
277
+ self.private_subnets.extend(newly_created_private_subnets_cfn)
278
+ self.private_route_tables_cfn.extend(newly_created_private_rts_cfn)
279
+ print(f"Successfully defined {len(newly_created_private_subnets_cfn)} new private subnets and their route tables for creation.")
280
+ else:
281
+ print("No private subnets specified for creation in context ('private_subnets_to_create').")
282
+
283
+ if not self.private_subnets:
284
+ raise Exception("No private subnets found or created, exiting.")
285
+
286
+ # --- 5. Sanity Check and Output ---
287
+
288
+ # Output the single NAT Gateway ID for verification
289
+ if self.single_nat_gateway_id:
290
+ CfnOutput(self, "SingleNatGatewayId", value=self.single_nat_gateway_id,
291
+ description="ID of the single NAT Gateway used for private subnets.")
292
+ else:
293
+ raise Exception("No single NAT Gateway was created or resolved.")
294
+
295
+ # --- Outputs for other stacks/regions ---
296
+ # These are crucial for cross-stack, cross-region referencing
297
+
298
+ self.params = dict()
299
+ self.params["vpc_id"] = vpc.vpc_id
300
+ self.params["private_subnets"] = self.private_subnets
301
+ self.params["private_route_tables"] = self.private_route_tables_cfn
302
+ self.params["public_subnets"] = self.public_subnets
303
+ self.params["public_route_tables"] = self.public_route_tables_cfn
304
+
305
+
306
+ #class CdkStackMain(Stack):
307
+ # def __init__(self, scope: Construct, construct_id: str, private_subnets:List[ec2.ISubnet]=[], private_route_tables: List[ec2.CfnRouteTable]=[], public_subnets:List[ec2.ISubnet]=[], public_route_tables: List[ec2.CfnRouteTable]=[], **kwargs) -> None:
308
+ # super().__init__(scope, construct_id, **kwargs)
309
+
310
+ # --- Helper to get context values ---
311
+ # def get_context_bool(key: str, default: bool = False) -> bool:
312
+ # return self.node.try_get_context(key) or default
313
+
314
+ # def get_context_str(key: str, default: str = None) -> str:
315
+ # return self.node.try_get_context(key) or default
316
+
317
+ # def get_context_dict(key: str, default: dict = None) -> dict:
318
+ # return self.node.try_get_context(key) or default
319
+
320
+ # def get_context_list_of_dicts(key: str) -> List[Dict[str, Any]]:
321
+ # ctx_value = self.node.try_get_context(key)
322
+
323
+ # if not isinstance(ctx_value, list):
324
+ # print(f"Warning: Context key '{key}' not found or not a list. Returning empty list.")
325
+ # return []
326
+ # # Optional: Add validation that all items in the list are dicts
327
+ # return ctx_value
328
+
329
+ # self.private_subnets: List[ec2.ISubnet] = private_subnets
330
+ # self.private_route_tables_cfn: List[ec2.CfnRouteTable] = private_route_tables
331
+ # self.public_subnets: List[ec2.ISubnet] = public_subnets
332
+ # self.public_route_tables_cfn: List[ec2.CfnRouteTable] = public_route_tables
333
+
334
+ private_subnet_selection = ec2.SubnetSelection(subnets=self.private_subnets)
335
+ public_subnet_selection = ec2.SubnetSelection(subnets=self.public_subnets)
336
+
337
+ for sub in private_subnet_selection.subnets:
338
+ print("private subnet:", sub.subnet_id, "is in availability zone:", sub.availability_zone)
339
+
340
+ for sub in public_subnet_selection.subnets:
341
+ print("public subnet:", sub.subnet_id, "is in availability zone:", sub.availability_zone)
342
+
343
+ # try:
344
+ # vpc = ec2.Vpc.from_lookup(
345
+ # self,
346
+ # "VPC",
347
+ # vpc_name=VPC_NAME
348
+ # )
349
+ # print("Successfully looked up VPC")
350
+ # except Exception as e:
351
+ # raise Exception(f"Could not look up VPC with name '{VPC_NAME}' due to: {e}")
352
+
353
+ print("Private subnet route tables:", self.private_route_tables_cfn)
354
+
355
+ # Add the S3 Gateway Endpoint to the VPC
356
+ if names_to_create_private:
357
+ try:
358
+ s3_gateway_endpoint = vpc.add_gateway_endpoint(
359
+ "S3GatewayEndpoint",
360
+ service=ec2.GatewayVpcEndpointAwsService.S3, subnets=[private_subnet_selection])
361
+ except Exception as e:
362
+ print("Could not add S3 gateway endpoint to subnets due to:", e)
363
+
364
+ #Output some useful information
365
+ CfnOutput(self, "VpcIdOutput", value=vpc.vpc_id,
366
+ description="The ID of the VPC where the S3 Gateway Endpoint is deployed.")
367
+ CfnOutput(self, "S3GatewayEndpointService", value=s3_gateway_endpoint.vpc_endpoint_id,
368
+ description="The id for the S3 Gateway Endpoint.") # Specify the S3 service
369
+
370
+ # --- IAM Roles ---
371
+ try:
372
+ codebuild_role_name = CODEBUILD_ROLE_NAME
373
+ custom_sts_kms_policy = """{
374
+ "Version": "2012-10-17",
375
+ "Statement": [
376
+ {
377
+ "Sid": "STSCallerIdentity",
378
+ "Effect": "Allow",
379
+ "Action": [
380
+ "sts:GetCallerIdentity"
381
+ ],
382
+ "Resource": "*"
383
+ },
384
+ {
385
+ "Sid": "KMSAccess",
386
+ "Effect": "Allow",
387
+ "Action": [
388
+ "kms:Encrypt",
389
+ "kms:Decrypt",
390
+ "kms:GenerateDataKey"
391
+ ],
392
+ "Resource": "*"
393
+ }
394
+ ]
395
+ }"""
396
+
397
+ if get_context_bool(f"exists:{codebuild_role_name}"):
398
+ # If exists, lookup/import the role using ARN from context
399
+ role_arn = get_context_str(f"arn:{codebuild_role_name}")
400
+ if not role_arn:
401
+ raise ValueError(f"Context value 'arn:{codebuild_role_name}' is required if role exists.")
402
+ codebuild_role = iam.Role.from_role_arn(self, "CodeBuildRole", role_arn=role_arn)
403
+ print("Using existing CodeBuild role")
404
+ else:
405
+ # If not exists, create the role
406
+ codebuild_role = iam.Role(
407
+ self, "CodeBuildRole", # Logical ID
408
+ role_name=codebuild_role_name, # Explicit resource name
409
+ assumed_by=iam.ServicePrincipal("codebuild.amazonaws.com")
410
+ )
411
+ codebuild_role.add_managed_policy(iam.ManagedPolicy.from_aws_managed_policy_name(f"EC2InstanceProfileForImageBuilderECRContainerBuilds"))
412
+ print("Successfully created new CodeBuild role")
413
+
414
+ task_role_name = ECS_TASK_ROLE_NAME
415
+ if get_context_bool(f"exists:{task_role_name}"):
416
+ role_arn = get_context_str(f"arn:{task_role_name}")
417
+ if not role_arn:
418
+ raise ValueError(f"Context value 'arn:{task_role_name}' is required if role exists.")
419
+ task_role = iam.Role.from_role_arn(self, "TaskRole", role_arn=role_arn)
420
+ print("Using existing ECS task role")
421
+ else:
422
+ task_role = iam.Role(
423
+ self, "TaskRole", # Logical ID
424
+ role_name=task_role_name, # Explicit resource name
425
+ assumed_by=iam.ServicePrincipal("ecs-tasks.amazonaws.com")
426
+ )
427
+ for role in AWS_MANAGED_TASK_ROLES_LIST:
428
+ print(f"Adding {role} to policy")
429
+ task_role.add_managed_policy(iam.ManagedPolicy.from_aws_managed_policy_name(f"{role}"))
430
+ task_role = add_custom_policies(self, task_role, custom_policy_text=custom_sts_kms_policy)
431
+ print("Successfully created new ECS task role")
432
+
433
+ execution_role_name = ECS_TASK_EXECUTION_ROLE_NAME
434
+ if get_context_bool(f"exists:{execution_role_name}"):
435
+ role_arn = get_context_str(f"arn:{execution_role_name}")
436
+ if not role_arn:
437
+ raise ValueError(f"Context value 'arn:{execution_role_name}' is required if role exists.")
438
+ execution_role = iam.Role.from_role_arn(self, "ExecutionRole", role_arn=role_arn)
439
+ print("Using existing ECS execution role")
440
+ else:
441
+ execution_role = iam.Role(
442
+ self, "ExecutionRole", # Logical ID
443
+ role_name=execution_role_name, # Explicit resource name
444
+ assumed_by=iam.ServicePrincipal("ecs-tasks.amazonaws.com")
445
+ )
446
+ for role in AWS_MANAGED_TASK_ROLES_LIST:
447
+ execution_role.add_managed_policy(iam.ManagedPolicy.from_aws_managed_policy_name(f"{role}"))
448
+ execution_role = add_custom_policies(self, execution_role, custom_policy_text=custom_sts_kms_policy)
449
+ print("Successfully created new ECS execution role")
450
+
451
+ except Exception as e:
452
+ raise Exception("Failed at IAM role step due to:", e)
453
+
454
+ # --- S3 Buckets ---
455
+ try:
456
+ log_bucket_name = S3_LOG_CONFIG_BUCKET_NAME
457
+ if get_context_bool(f"exists:{log_bucket_name}"):
458
+ bucket = s3.Bucket.from_bucket_name(self, "LogConfigBucket", bucket_name=log_bucket_name)
459
+ print("Using existing S3 bucket", log_bucket_name)
460
+ else:
461
+ bucket = s3.Bucket(self, "LogConfigBucket", bucket_name=log_bucket_name,
462
+ versioned=False, # Set to True if you need versioning
463
+ # IMPORTANT: Set removal_policy to DESTROY
464
+ removal_policy=RemovalPolicy.DESTROY,
465
+ # IMPORTANT: Set auto_delete_objects to True to empty the bucket before deletion
466
+ auto_delete_objects=True
467
+ ) # Explicitly set bucket_name
468
+ print("Created S3 bucket", log_bucket_name)
469
+
470
+ # Add policies - this will apply to both created and imported buckets
471
+ # CDK handles idempotent policy additions
472
+ bucket.add_to_resource_policy(
473
+ iam.PolicyStatement(
474
+ effect=iam.Effect.ALLOW,
475
+ principals=[task_role], # Pass the role object directly
476
+ actions=["s3:GetObject", "s3:PutObject"],
477
+ resources=[f"{bucket.bucket_arn}/*"]
478
+ )
479
+ )
480
+ bucket.add_to_resource_policy(
481
+ iam.PolicyStatement(
482
+ effect=iam.Effect.ALLOW,
483
+ principals=[task_role],
484
+ actions=["s3:ListBucket"],
485
+ resources=[bucket.bucket_arn]
486
+ )
487
+ )
488
+
489
+ output_bucket_name = S3_OUTPUT_BUCKET_NAME
490
+ if get_context_bool(f"exists:{output_bucket_name}"):
491
+ output_bucket = s3.Bucket.from_bucket_name(self, "OutputBucket", bucket_name=output_bucket_name)
492
+ print("Using existing Output bucket", output_bucket_name)
493
+ else:
494
+ output_bucket = s3.Bucket(self, "OutputBucket", bucket_name=output_bucket_name,
495
+ lifecycle_rules=[
496
+ s3.LifecycleRule(
497
+ expiration=Duration.days(int(DAYS_TO_DISPLAY_WHOLE_DOCUMENT_JOBS))
498
+ )
499
+ ],
500
+ versioned=False, # Set to True if you need versioning
501
+ # IMPORTANT: Set removal_policy to DESTROY
502
+ removal_policy=RemovalPolicy.DESTROY,
503
+ # IMPORTANT: Set auto_delete_objects to True to empty the bucket before deletion
504
+ auto_delete_objects=True
505
+ )
506
+ print("Created Output bucket:", output_bucket_name)
507
+
508
+ # Add policies to output bucket
509
+ output_bucket.add_to_resource_policy(
510
+ iam.PolicyStatement(
511
+ effect=iam.Effect.ALLOW,
512
+ principals=[task_role],
513
+ actions=["s3:GetObject", "s3:PutObject"],
514
+ resources=[f"{output_bucket.bucket_arn}/*"]
515
+ )
516
+ )
517
+ output_bucket.add_to_resource_policy(
518
+ iam.PolicyStatement(
519
+ effect=iam.Effect.ALLOW,
520
+ principals=[task_role],
521
+ actions=["s3:ListBucket"],
522
+ resources=[output_bucket.bucket_arn]
523
+ )
524
+ )
525
+
526
+ except Exception as e:
527
+ raise Exception("Could not handle S3 buckets due to:", e)
528
+
529
+ # --- Elastic Container Registry ---
530
+ try:
531
+ full_ecr_repo_name = ECR_CDK_REPO_NAME
532
+ if get_context_bool(f"exists:{full_ecr_repo_name}"):
533
+ ecr_repo = ecr.Repository.from_repository_name(self, "ECRRepo", repository_name=full_ecr_repo_name)
534
+ print("Using existing ECR repository")
535
+ else:
536
+ ecr_repo = ecr.Repository(self, "ECRRepo", repository_name=full_ecr_repo_name) # Explicitly set repository_name
537
+ print("Created ECR repository", full_ecr_repo_name)
538
+
539
+ ecr_image_loc = ecr_repo.repository_uri
540
+ except Exception as e:
541
+ raise Exception("Could not handle ECR repo due to:", e)
542
+
543
+ # --- CODEBUILD ---
544
+ try:
545
+ codebuild_project_name = CODEBUILD_PROJECT_NAME
546
+ if get_context_bool(f"exists:{codebuild_project_name}"):
547
+ # Lookup CodeBuild project by ARN from context
548
+ project_arn = get_context_str(f"arn:{codebuild_project_name}")
549
+ if not project_arn:
550
+ raise ValueError(f"Context value 'arn:{codebuild_project_name}' is required if project exists.")
551
+ codebuild_project = codebuild.Project.from_project_arn(self, "CodeBuildProject", project_arn=project_arn)
552
+ print("Using existing CodeBuild project")
553
+ else:
554
+ codebuild_project = codebuild.Project(self,
555
+ "CodeBuildProject", # Logical ID
556
+ project_name=codebuild_project_name, # Explicit resource name
557
+ source=codebuild.Source.git_hub(
558
+ owner=GITHUB_REPO_USERNAME,
559
+ repo=GITHUB_REPO_NAME,
560
+ branch_or_ref=GITHUB_REPO_BRANCH
561
+ ),
562
+ environment=codebuild.BuildEnvironment(
563
+ build_image=codebuild.LinuxBuildImage.STANDARD_7_0,
564
+ privileged=True,
565
+ environment_variables={"ECR_REPO_NAME": codebuild.BuildEnvironmentVariable(value=full_ecr_repo_name),
566
+ "AWS_DEFAULT_REGION": codebuild.BuildEnvironmentVariable(value=AWS_REGION),
567
+ "AWS_ACCOUNT_ID": codebuild.BuildEnvironmentVariable(value=AWS_ACCOUNT_ID)}
568
+ ),
569
+ build_spec=codebuild.BuildSpec.from_object({
570
+ "version": "0.2",
571
+ "phases": {
572
+ "pre_build": {
573
+ "commands": [
574
+ "echo Logging in to Amazon ECR",
575
+ "aws ecr get-login-password --region $AWS_DEFAULT_REGION | docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
576
+ ]
577
+ },
578
+ "build": {
579
+ "commands": [
580
+ "echo Building the Docker image",
581
+ "docker build -t $ECR_REPO_NAME:latest .",
582
+ "docker tag $ECR_REPO_NAME:latest $AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/$ECR_REPO_NAME:latest"
583
+ ]
584
+ },
585
+ "post_build": {
586
+ "commands": [
587
+ "echo Pushing the Docker image",
588
+ "docker push $AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/$ECR_REPO_NAME:latest"
589
+ ]
590
+ }
591
+ }
592
+ })
593
+ )
594
+ print("Successfully created CodeBuild project", codebuild_project_name)
595
+
596
+ # Grant permissions - applies to both created and imported project role
597
+ ecr_repo.grant_pull_push(codebuild_project.role)
598
+
599
+ except Exception as e:
600
+ raise Exception("Could not handle Codebuild project due to:", e)
601
+
602
+ # --- Security Groups ---
603
+ try:
604
+ ecs_security_group_name = ECS_SECURITY_GROUP_NAME
605
+ # Following checks by name don't really work
606
+ # Use CDK's from_lookup_by_name which handles lookup or throws an error if not found
607
+ #try:
608
+ # ecs_security_group = ec2.SecurityGroup.from_lookup_by_name(
609
+ # self, "ECSSecurityGroup", vpc=vpc, security_group_name=ecs_security_group_name
610
+ # )
611
+ # print(f"Using existing Security Group: {ecs_security_group_name}")
612
+ # except Exception: # If lookup fails, create
613
+ try:
614
+ ecs_security_group = ec2.SecurityGroup(
615
+ self,
616
+ "ECSSecurityGroup", # Logical ID
617
+ security_group_name=ecs_security_group_name, # Explicit resource name
618
+ vpc=vpc,
619
+ )
620
+ print(f"Created Security Group: {ecs_security_group_name}")
621
+ except Exception as e: # If lookup fails, create
622
+ print("Failed to create ECS security group due to:", e)
623
+
624
+ alb_security_group_name = ALB_NAME_SECURITY_GROUP_NAME
625
+ # try:
626
+ # alb_security_group = ec2.SecurityGroup.from_lookup_by_name(
627
+ # self, "ALBSecurityGroup", vpc=vpc, security_group_name=alb_security_group_name
628
+ # )
629
+ # print(f"Using existing Security Group: {alb_security_group_name}")
630
+ # except Exception: # If lookup fails, create
631
+ try:
632
+ alb_security_group = ec2.SecurityGroup(
633
+ self,
634
+ "ALBSecurityGroup", # Logical ID
635
+ security_group_name=alb_security_group_name, # Explicit resource name
636
+ vpc=vpc
637
+ )
638
+ print(f"Created Security Group: {alb_security_group_name}")
639
+ except Exception as e: # If lookup fails, create
640
+ print("Failed to create ALB security group due to:", e)
641
+
642
+ # Define Ingress Rules - CDK will manage adding/removing these as needed
643
+ ec2_port_gradio_server_port = ec2.Port.tcp(int(GRADIO_SERVER_PORT)) # Ensure port is int
644
+ ecs_security_group.add_ingress_rule(
645
+ peer=alb_security_group,
646
+ connection=ec2_port_gradio_server_port,
647
+ description="ALB traffic",
648
+ )
649
+
650
+ alb_security_group.add_ingress_rule(
651
+ peer=ec2.Peer.prefix_list("pl-93a247fa"),
652
+ connection=ec2.Port.all_traffic(),
653
+ description="CloudFront traffic",
654
+ )
655
+
656
+ except Exception as e:
657
+ raise Exception("Could not handle security groups due to:", e)
658
+
659
+
660
+ # --- DynamoDB tables for logs (optional) ---
661
+
662
+ if SAVE_LOGS_TO_DYNAMODB == 'True':
663
+ try:
664
+ print("Creating DynamoDB tables for logs")
665
+
666
+ dynamodb_table_access = dynamodb.Table(self, "RedactionAccessDataTable",
667
+ table_name=ACCESS_LOG_DYNAMODB_TABLE_NAME,
668
+ partition_key=dynamodb.Attribute(
669
+ name="id",
670
+ type=dynamodb.AttributeType.STRING),
671
+ billing_mode=dynamodb.BillingMode.PAY_PER_REQUEST,
672
+ removal_policy=RemovalPolicy.DESTROY)
673
+
674
+ dynamodb_table_feedback = dynamodb.Table(self, "RedactionFeedbackDataTable",
675
+ table_name=FEEDBACK_LOG_DYNAMODB_TABLE_NAME,
676
+ partition_key=dynamodb.Attribute(
677
+ name="id",
678
+ type=dynamodb.AttributeType.STRING),
679
+ billing_mode=dynamodb.BillingMode.PAY_PER_REQUEST,
680
+ removal_policy=RemovalPolicy.DESTROY)
681
+
682
+ dynamodb_table_usage = dynamodb.Table(self, "RedactionUsageDataTable",
683
+ table_name=USAGE_LOG_DYNAMODB_TABLE_NAME,
684
+ partition_key=dynamodb.Attribute(
685
+ name="id",
686
+ type=dynamodb.AttributeType.STRING),
687
+ billing_mode=dynamodb.BillingMode.PAY_PER_REQUEST,
688
+ removal_policy=RemovalPolicy.DESTROY)
689
+
690
+ except Exception as e:
691
+ raise Exception("Could not create DynamoDB tables due to:", e)
692
+
693
+ # --- ALB ---
694
+ try:
695
+ load_balancer_name = ALB_NAME
696
+ if len(load_balancer_name) > 32: load_balancer_name = load_balancer_name[-32:]
697
+ if get_context_bool(f"exists:{load_balancer_name}"):
698
+ # Lookup ALB by ARN from context
699
+ alb_arn = get_context_str(f"arn:{load_balancer_name}")
700
+ if not alb_arn:
701
+ raise ValueError(f"Context value 'arn:{load_balancer_name}' is required if ALB exists.")
702
+ alb = elbv2.ApplicationLoadBalancer.from_lookup(
703
+ self, "ALB", # Logical ID
704
+ load_balancer_arn=alb_arn
705
+ )
706
+ print(f"Using existing Application Load Balancer {load_balancer_name}.")
707
+ else:
708
+ alb = elbv2.ApplicationLoadBalancer(
709
+ self,
710
+ "ALB", # Logical ID
711
+ load_balancer_name=load_balancer_name, # Explicit resource name
712
+ vpc=vpc,
713
+ internet_facing=True,
714
+ security_group=alb_security_group, # Link to SG
715
+ vpc_subnets=public_subnet_selection # Link to subnets
716
+ )
717
+ print("Successfully created new Application Load Balancer")
718
+ except Exception as e:
719
+ raise Exception("Could not handle application load balancer due to:", e)
720
+
721
+
722
+
723
+ # --- Cognito User Pool ---
724
+ try:
725
+ if get_context_bool(f"exists:{COGNITO_USER_POOL_NAME}"):
726
+ # Lookup by ID from context
727
+ user_pool_id = get_context_str(f"id:{COGNITO_USER_POOL_NAME}")
728
+ if not user_pool_id:
729
+ raise ValueError(f"Context value 'id:{COGNITO_USER_POOL_NAME}' is required if User Pool exists.")
730
+ user_pool = cognito.UserPool.from_user_pool_id(self, "UserPool", user_pool_id=user_pool_id)
731
+ print(f"Using existing user pool {user_pool_id}.")
732
+ else:
733
+ user_pool = cognito.UserPool(self, "UserPool",
734
+ user_pool_name=COGNITO_USER_POOL_NAME,
735
+ mfa=cognito.Mfa.OFF, # Adjust as needed
736
+ sign_in_aliases=cognito.SignInAliases(email=True),
737
+ removal_policy=RemovalPolicy.DESTROY) # Adjust as needed
738
+ print(f"Created new user pool {user_pool.user_pool_id}.")
739
+
740
+ # If you're using a certificate, assume that you will be using the ALB Cognito login features. You need different redirect URLs to accept the token that comes from Cognito authentication.
741
+ if ACM_CERTIFICATE_ARN:
742
+ redirect_uris = [COGNITO_REDIRECTION_URL, COGNITO_REDIRECTION_URL + "/oauth2/idpresponse"]
743
+ else:
744
+ redirect_uris = [COGNITO_REDIRECTION_URL]
745
+
746
+ user_pool_client_name = COGNITO_USER_POOL_CLIENT_NAME
747
+ if get_context_bool(f"exists:{user_pool_client_name}"):
748
+ # Lookup by ID from context (requires User Pool object)
749
+ user_pool_client_id = get_context_str(f"id:{user_pool_client_name}")
750
+ if not user_pool_client_id:
751
+ raise ValueError(f"Context value 'id:{user_pool_client_name}' is required if User Pool Client exists.")
752
+ user_pool_client = cognito.UserPoolClient.from_user_pool_client_id(self, "UserPoolClient", user_pool_client_id=user_pool_client_id)
753
+ print(f"Using existing user pool client {user_pool_client_id}.")
754
+ else:
755
+ user_pool_client = cognito.UserPoolClient(self, "UserPoolClient",
756
+ auth_flows=cognito.AuthFlow(user_srp=True, user_password=True), # Example: enable SRP for secure sign-in
757
+ user_pool=user_pool,
758
+ generate_secret=True,
759
+ user_pool_client_name=user_pool_client_name,
760
+ supported_identity_providers=[cognito.UserPoolClientIdentityProvider.COGNITO],
761
+ o_auth=cognito.OAuthSettings(
762
+ flows=cognito.OAuthFlows(authorization_code_grant=True),
763
+ scopes=[cognito.OAuthScope.OPENID, cognito.OAuthScope.EMAIL, cognito.OAuthScope.PROFILE],
764
+ callback_urls=redirect_uris
765
+ )
766
+ )
767
+
768
+ CfnOutput(self, "CognitoAppClientId", value=user_pool_client.user_pool_client_id)
769
+
770
+ print(f"Created new user pool client {user_pool_client.user_pool_client_id}.")
771
+
772
+ # Add a domain to the User Pool (crucial for ALB integration)
773
+ user_pool_domain = user_pool.add_domain(
774
+ "UserPoolDomain",
775
+ cognito_domain=cognito.CognitoDomainOptions(
776
+ domain_prefix=COGNITO_USER_POOL_DOMAIN_PREFIX)
777
+ )
778
+
779
+ # Apply removal_policy to the created UserPoolDomain construct
780
+ user_pool_domain.apply_removal_policy(policy=RemovalPolicy.DESTROY)
781
+
782
+ CfnOutput(self, "CognitoUserPoolLoginUrl", value=user_pool_domain.base_url())
783
+
784
+ except Exception as e:
785
+ raise Exception("Could not handle Cognito resources due to:", e)
786
+
787
+ # --- Secrets Manager Secret ---
788
+ try:
789
+ secret_name = COGNITO_USER_POOL_CLIENT_SECRET_NAME
790
+ if get_context_bool(f"exists:{secret_name}"):
791
+ # Lookup by name
792
+ secret = secretsmanager.Secret.from_secret_name_v2(self, "CognitoSecret", secret_name=secret_name)
793
+ print(f"Using existing Secret {secret_name}.")
794
+ else:
795
+ secret = secretsmanager.Secret(self, "CognitoSecret", # Logical ID
796
+ secret_name=secret_name, # Explicit resource name
797
+ secret_object_value={
798
+ "REDACTION_USER_POOL_ID": SecretValue.unsafe_plain_text(user_pool.user_pool_id), # Use the CDK attribute
799
+ "REDACTION_CLIENT_ID": SecretValue.unsafe_plain_text(user_pool_client.user_pool_client_id), # Use the CDK attribute
800
+ "REDACTION_CLIENT_SECRET": user_pool_client.user_pool_client_secret # Use the CDK attribute
801
+ }
802
+ )
803
+ print(f"Created new secret {secret_name}.")
804
+
805
+ except Exception as e:
806
+ raise Exception("Could not handle Secrets Manager secret due to:", e)
807
+
808
+ # --- Fargate Task Definition ---
809
+ try:
810
+ # For task definitions, re-creating with the same logical ID creates new revisions.
811
+ # If you want to use a *specific existing revision*, you'd need to look it up by ARN.
812
+ # If you want to update the latest revision, defining it here is the standard.
813
+ # Let's assume we always define it here to get revision management.
814
+ fargate_task_definition_name = FARGATE_TASK_DEFINITION_NAME
815
+
816
+ read_only_file_system = ECS_READ_ONLY_FILE_SYSTEM == 'True'
817
+
818
+ if os.path.exists(TASK_DEFINITION_FILE_LOCATION):
819
+ with open(TASK_DEFINITION_FILE_LOCATION) as f: # Use correct path
820
+ task_def_params = json.load(f)
821
+ # Need to ensure taskRoleArn and executionRoleArn in JSON are correct ARN strings
822
+ else:
823
+ epheremal_storage_volume_name = "appEphemeralVolume"
824
+
825
+ task_def_params = {}
826
+ task_def_params['taskRoleArn'] = task_role.role_arn # Use CDK role object ARN
827
+ task_def_params['executionRoleArn'] = execution_role.role_arn # Use CDK role object ARN
828
+ task_def_params['memory'] = ECS_TASK_MEMORY_SIZE
829
+ task_def_params['cpu'] = ECS_TASK_CPU_SIZE
830
+ container_def = {
831
+ "name": full_ecr_repo_name,
832
+ "image": ecr_image_loc + ":latest",
833
+ "essential": True,
834
+ "portMappings": [{"containerPort": int(GRADIO_SERVER_PORT), "hostPort": int(GRADIO_SERVER_PORT), "protocol": "tcp", "appProtocol": "http"}],
835
+ "logConfiguration": {"logDriver": "awslogs", "options": {"awslogs-group": ECS_LOG_GROUP_NAME, "awslogs-region": AWS_REGION, "awslogs-stream-prefix": "ecs"}},
836
+ "environmentFiles": [{"value": bucket.bucket_arn + "/config.env", "type": "s3"}],
837
+ "memoryReservation": int(task_def_params['memory']) - 512, # Reserve some memory for the container
838
+ "mountPoints": [
839
+ {
840
+ "sourceVolume": epheremal_storage_volume_name,
841
+ "containerPath": "/home/user/app/logs",
842
+ "readOnly": False
843
+ },
844
+ {
845
+ "sourceVolume": epheremal_storage_volume_name,
846
+ "containerPath": "/home/user/app/feedback",
847
+ "readOnly": False
848
+ },
849
+ {
850
+ "sourceVolume": epheremal_storage_volume_name,
851
+ "containerPath": "/home/user/app/usage",
852
+ "readOnly": False
853
+ },
854
+ {
855
+ "sourceVolume": epheremal_storage_volume_name,
856
+ "containerPath": "/home/user/app/input",
857
+ "readOnly": False
858
+ },
859
+ {
860
+ "sourceVolume": epheremal_storage_volume_name,
861
+ "containerPath": "/home/user/app/output",
862
+ "readOnly": False
863
+ },
864
+ {
865
+ "sourceVolume": epheremal_storage_volume_name,
866
+ "containerPath": "/home/user/app/tmp",
867
+ "readOnly": False
868
+ },
869
+ {
870
+ "sourceVolume": epheremal_storage_volume_name,
871
+ "containerPath": "/home/user/app/config",
872
+ "readOnly": False
873
+ },
874
+ {
875
+ "sourceVolume": epheremal_storage_volume_name,
876
+ "containerPath": "/tmp/matplotlib_cache",
877
+ "readOnly": False
878
+ },
879
+ {
880
+ "sourceVolume": epheremal_storage_volume_name,
881
+ "containerPath": "/tmp",
882
+ "readOnly": False
883
+ },
884
+ {
885
+ "sourceVolume": epheremal_storage_volume_name,
886
+ "containerPath": "/var/tmp",
887
+ "readOnly": False
888
+ },
889
+ {
890
+ "sourceVolume": epheremal_storage_volume_name,
891
+ "containerPath": "/tmp/tld",
892
+ "readOnly": False
893
+ },
894
+ {
895
+ "sourceVolume": epheremal_storage_volume_name,
896
+ "containerPath": "/tmp/gradio_tmp",
897
+ "readOnly": False
898
+ }
899
+ ],
900
+ "readonlyRootFilesystem": read_only_file_system,
901
+ }
902
+ task_def_params['containerDefinitions'] = [container_def]
903
+
904
+
905
+ log_group_name_from_config=task_def_params['containerDefinitions'][0]['logConfiguration']['options']['awslogs-group']
906
+
907
+ cdk_managed_log_group = logs.LogGroup(self, "MyTaskLogGroup", # CDK Logical ID
908
+ log_group_name=log_group_name_from_config,
909
+ retention=logs.RetentionDays.ONE_MONTH, # Example: set retention
910
+ removal_policy=RemovalPolicy.DESTROY # If you want it deleted when stack is deleted
911
+ )
912
+
913
+ epheremal_storage_volume_cdk_obj = ecs.Volume(
914
+ name=epheremal_storage_volume_name
915
+ )
916
+
917
+ fargate_task_definition = ecs.FargateTaskDefinition(
918
+ self,
919
+ "FargateTaskDefinition", # Logical ID
920
+ family=fargate_task_definition_name,
921
+ cpu=int(task_def_params['cpu']),
922
+ memory_limit_mib=int(task_def_params['memory']),
923
+ task_role=task_role,
924
+ execution_role=execution_role,
925
+ runtime_platform=ecs.RuntimePlatform(
926
+ cpu_architecture=ecs.CpuArchitecture.X86_64,
927
+ operating_system_family=ecs.OperatingSystemFamily.LINUX
928
+ ),
929
+ # 1. Specify the total ephemeral storage for the task
930
+ ephemeral_storage_gib=21, # Minimum is 21 GiB
931
+ # 2. Define the volume at the task level
932
+ # This volume will use the ephemeral storage configured above.
933
+ volumes=[epheremal_storage_volume_cdk_obj]
934
+ )
935
+ print("Fargate task definition defined.")
936
+
937
+
938
+
939
+ # Add container definitions to the task definition object
940
+ if task_def_params['containerDefinitions']:
941
+ container_def_params = task_def_params['containerDefinitions'][0]
942
+
943
+ if container_def_params.get('environmentFiles'):
944
+ env_files = []
945
+ for env_file_param in container_def_params['environmentFiles']:
946
+ # Need to parse the ARN to get the bucket object and key
947
+ env_file_arn_parts = env_file_param['value'].split(":::")
948
+ bucket_name_and_key = env_file_arn_parts[-1]
949
+ env_bucket_name, env_key = bucket_name_and_key.split("/", 1)
950
+
951
+ env_file = ecs.EnvironmentFile.from_bucket(bucket, env_key)
952
+
953
+ env_files.append(env_file)
954
+
955
+ container = fargate_task_definition.add_container(
956
+ container_def_params['name'],
957
+ image=ecs.ContainerImage.from_registry(container_def_params['image']),
958
+
959
+ logging=ecs.LogDriver.aws_logs(
960
+ stream_prefix=container_def_params['logConfiguration']['options']['awslogs-stream-prefix'],
961
+ log_group=cdk_managed_log_group
962
+ ),
963
+ secrets={
964
+ "AWS_USER_POOL_ID": ecs.Secret.from_secrets_manager(secret, "REDACTION_USER_POOL_ID"),
965
+ "AWS_CLIENT_ID": ecs.Secret.from_secrets_manager(secret, "REDACTION_CLIENT_ID"),
966
+ "AWS_CLIENT_SECRET": ecs.Secret.from_secrets_manager(secret, "REDACTION_CLIENT_SECRET")
967
+ },
968
+ environment_files=env_files,
969
+ readonly_root_filesystem=read_only_file_system
970
+ )
971
+
972
+ for port_mapping in container_def_params['portMappings']:
973
+ container.add_port_mappings(
974
+ ecs.PortMapping(
975
+ container_port=int(port_mapping['containerPort']),
976
+ host_port=int(port_mapping['hostPort']),
977
+ name="port-" + str(port_mapping['containerPort']),
978
+ app_protocol=ecs.AppProtocol.http,
979
+ protocol=ecs.Protocol.TCP
980
+ )
981
+ )
982
+
983
+ container.add_port_mappings(ecs.PortMapping(
984
+ container_port=80,
985
+ host_port=80,
986
+ name="port-80",
987
+ app_protocol=ecs.AppProtocol.http,
988
+ protocol=ecs.Protocol.TCP
989
+ ))
990
+
991
+ if container_def_params.get('mountPoints'):
992
+ mount_points=[]
993
+ for mount_point in container_def_params['mountPoints']:
994
+ mount_points.append(ecs.MountPoint(container_path=mount_point['containerPath'], read_only=mount_point['readOnly'], source_volume=epheremal_storage_volume_name))
995
+ container.add_mount_points(*mount_points)
996
+
997
+ except Exception as e:
998
+ raise Exception("Could not handle Fargate task definition due to:", e)
999
+
1000
+
1001
+ # --- ECS Cluster ---
1002
+ try:
1003
+ cluster = ecs.Cluster(
1004
+ self,
1005
+ "ECSCluster", # Logical ID
1006
+ cluster_name=CLUSTER_NAME, # Explicit resource name
1007
+ enable_fargate_capacity_providers=True,
1008
+ vpc=vpc
1009
+ )
1010
+ print("Successfully created new ECS cluster")
1011
+ except Exception as e:
1012
+ raise Exception("Could not handle ECS cluster due to:", e)
1013
+
1014
+
1015
+ # --- ECS Service ---
1016
+ try:
1017
+ ecs_service_name = ECS_SERVICE_NAME
1018
+
1019
+ if ECS_USE_FARGATE_SPOT == 'True': use_fargate_spot = "FARGATE_SPOT"
1020
+ if ECS_USE_FARGATE_SPOT == 'False': use_fargate_spot = "FARGATE"
1021
+
1022
+ # Check if service exists - from_service_arn or from_service_name (needs cluster)
1023
+ try:
1024
+ # from_service_name is useful if you have the cluster object
1025
+ ecs_service = ecs.FargateService.from_service_attributes(
1026
+ self, "ECSService", # Logical ID
1027
+ cluster=cluster, # Requires the cluster object
1028
+ service_name=ecs_service_name
1029
+ )
1030
+ print(f"Using existing ECS service {ecs_service_name}.")
1031
+ except Exception:
1032
+ # Service will be created with a count of 0, because you haven't yet actually built the initial Docker container with CodeBuild
1033
+ ecs_service = ecs.FargateService(
1034
+ self,
1035
+ "ECSService", # Logical ID
1036
+ service_name=ecs_service_name, # Explicit resource name
1037
+ platform_version=ecs.FargatePlatformVersion.LATEST,
1038
+ capacity_provider_strategies=[ecs.CapacityProviderStrategy(capacity_provider=use_fargate_spot, base=0, weight=1)],
1039
+ cluster=cluster,
1040
+ task_definition=fargate_task_definition, # Link to TD
1041
+ security_groups=[ecs_security_group], # Link to SG
1042
+ vpc_subnets=ec2.SubnetSelection(subnets=self.private_subnets), # Link to subnets
1043
+ min_healthy_percent=0,
1044
+ max_healthy_percent=100,
1045
+ desired_count=0
1046
+ )
1047
+ print("Successfully created new ECS service")
1048
+
1049
+ # Note: Auto-scaling setup would typically go here if needed for the service
1050
+
1051
+ except Exception as e:
1052
+ raise Exception("Could not handle ECS service due to:", e)
1053
+
1054
+ # --- Grant Secret Read Access (Applies to both created and imported roles) ---
1055
+ try:
1056
+ secret.grant_read(task_role)
1057
+ secret.grant_read(execution_role)
1058
+ except Exception as e:
1059
+ raise Exception("Could not grant access to Secrets Manager due to:", e)
1060
+
1061
+ # --- ALB TARGET GROUPS AND LISTENERS ---
1062
+ # This section should primarily define the resources if they are managed by this stack.
1063
+ # CDK handles adding/removing targets and actions on updates.
1064
+ # If they might pre-exist outside the stack, you need lookups.
1065
+ cookie_duration = Duration.hours(12)
1066
+ target_group_name = ALB_TARGET_GROUP_NAME # Explicit resource name
1067
+ cloudfront_distribution_url = "cloudfront_placeholder.net" # Need to replace this afterwards with the actual cloudfront_distribution.domain_name
1068
+
1069
+ try:
1070
+ # --- CREATING TARGET GROUPS AND ADDING THE CLOUDFRONT LISTENER RULE ---
1071
+
1072
+ target_group = elbv2.ApplicationTargetGroup(
1073
+ self,
1074
+ "AppTargetGroup", # Logical ID
1075
+ target_group_name=target_group_name, # Explicit resource name
1076
+ port=int(GRADIO_SERVER_PORT), # Ensure port is int
1077
+ protocol=elbv2.ApplicationProtocol.HTTP,
1078
+ targets=[ecs_service], # Link to ECS Service
1079
+ stickiness_cookie_duration=cookie_duration,
1080
+ vpc=vpc, # Target Groups need VPC
1081
+ )
1082
+ print(f"ALB target group {target_group_name} defined.")
1083
+
1084
+ # First HTTP
1085
+ listener_port = 80
1086
+ # Check if Listener exists - from_listener_arn or lookup by port/ALB
1087
+
1088
+ http_listener = alb.add_listener(
1089
+ "HttpListener", # Logical ID
1090
+ port=listener_port,
1091
+ open=False, # Be cautious with open=True, usually restrict source SG
1092
+ )
1093
+ print(f"ALB listener on port {listener_port} defined.")
1094
+
1095
+
1096
+ if ACM_CERTIFICATE_ARN:
1097
+ http_listener.add_action(
1098
+ "DefaultAction", # Logical ID for the default action
1099
+ action=elbv2.ListenerAction.redirect(protocol='HTTPS',
1100
+ host='#{host}',
1101
+ port='443',
1102
+ path='/#{path}',
1103
+ query='#{query}')
1104
+ )
1105
+ else:
1106
+ if USE_CLOUDFRONT == 'True':
1107
+
1108
+ # The following default action can be added for the listener after a host header rule is added to the listener manually in the Console as suggested in the above comments.
1109
+ http_listener.add_action(
1110
+ "DefaultAction", # Logical ID for the default action
1111
+ action=elbv2.ListenerAction.fixed_response(
1112
+ status_code=403,
1113
+ content_type="text/plain",
1114
+ message_body="Access denied",
1115
+ ),
1116
+ )
1117
+
1118
+ # Add the Listener Rule for the specific CloudFront Host Header
1119
+ http_listener.add_action(
1120
+ "CloudFrontHostHeaderRule",
1121
+ action=elbv2.ListenerAction.forward(target_groups=[target_group],stickiness_duration=cookie_duration),
1122
+ priority=1, # Example priority. Adjust as needed. Lower is evaluated first.
1123
+ conditions=[
1124
+ elbv2.ListenerCondition.host_headers([cloudfront_distribution_url]) # May have to redefine url in console afterwards if not specified in config file
1125
+ ]
1126
+ )
1127
+
1128
+ else:
1129
+ # Add the Listener Rule for the specific CloudFront Host Header
1130
+ http_listener.add_action(
1131
+ "CloudFrontHostHeaderRule",
1132
+ action=elbv2.ListenerAction.forward(target_groups=[target_group],stickiness_duration=cookie_duration)
1133
+ )
1134
+
1135
+ print("Added targets and actions to ALB HTTP listener.")
1136
+
1137
+ # Now the same for HTTPS if you have an ACM certificate
1138
+ if ACM_CERTIFICATE_ARN:
1139
+ listener_port_https = 443
1140
+ # Check if Listener exists - from_listener_arn or lookup by port/ALB
1141
+
1142
+ https_listener = add_alb_https_listener_with_cert(
1143
+ self,
1144
+ "MyHttpsListener", # Logical ID for the HTTPS listener
1145
+ alb,
1146
+ acm_certificate_arn=ACM_CERTIFICATE_ARN,
1147
+ default_target_group=target_group,
1148
+ enable_cognito_auth=True,
1149
+ cognito_user_pool=user_pool,
1150
+ cognito_user_pool_client=user_pool_client,
1151
+ cognito_user_pool_domain=user_pool_domain,
1152
+ listener_open_to_internet=True,
1153
+ stickiness_cookie_duration=cookie_duration
1154
+ )
1155
+
1156
+ if https_listener:
1157
+ CfnOutput(self, "HttpsListenerArn", value=https_listener.listener_arn)
1158
+
1159
+ print(f"ALB listener on port {listener_port_https} defined.")
1160
+
1161
+ # if USE_CLOUDFRONT == 'True':
1162
+ # # Add default action to the listener
1163
+ # https_listener.add_action(
1164
+ # "DefaultAction", # Logical ID for the default action
1165
+ # action=elbv2.ListenerAction.fixed_response(
1166
+ # status_code=403,
1167
+ # content_type="text/plain",
1168
+ # message_body="Access denied",
1169
+ # ),
1170
+ # )
1171
+
1172
+ # # Add the Listener Rule for the specific CloudFront Host Header
1173
+ # https_listener.add_action(
1174
+ # "CloudFrontHostHeaderRuleHTTPS",
1175
+ # action=elbv2.ListenerAction.forward(target_groups=[target_group],stickiness_duration=cookie_duration),
1176
+ # priority=1, # Example priority. Adjust as needed. Lower is evaluated first.
1177
+ # conditions=[
1178
+ # elbv2.ListenerCondition.host_headers([cloudfront_distribution_url])
1179
+ # ]
1180
+ # )
1181
+ # else:
1182
+ # https_listener.add_action(
1183
+ # "CloudFrontHostHeaderRuleHTTPS",
1184
+ # action=elbv2.ListenerAction.forward(target_groups=[target_group],stickiness_duration=cookie_duration))
1185
+
1186
+ print("Added targets and actions to ALB HTTPS listener.")
1187
+
1188
+ except Exception as e:
1189
+ raise Exception("Could not handle ALB target groups and listeners due to:", e)
1190
+
1191
+ # Create WAF to attach to load balancer
1192
+ try:
1193
+ web_acl_name = LOAD_BALANCER_WEB_ACL_NAME
1194
+ if get_context_bool(f"exists:{web_acl_name}"):
1195
+ # Lookup WAF ACL by ARN from context
1196
+ web_acl_arn = get_context_str(f"arn:{web_acl_name}")
1197
+ if not web_acl_arn:
1198
+ raise ValueError(f"Context value 'arn:{web_acl_name}' is required if Web ACL exists.")
1199
+
1200
+ web_acl = create_web_acl_with_common_rules(self, web_acl_name, waf_scope="REGIONAL") # Assuming it takes scope and name
1201
+ print(f"Handled ALB WAF web ACL {web_acl_name}.")
1202
+ else:
1203
+ web_acl = create_web_acl_with_common_rules(self, web_acl_name, waf_scope="REGIONAL") # Assuming it takes scope and name
1204
+ print(f"Created ALB WAF web ACL {web_acl_name}.")
1205
+
1206
+ alb_waf_association = wafv2.CfnWebACLAssociation(self, id="alb_waf_association", resource_arn=alb.load_balancer_arn, web_acl_arn=web_acl.attr_arn)
1207
+
1208
+ except Exception as e:
1209
+ raise Exception("Could not handle create ALB WAF web ACL due to:", e)
1210
+
1211
+ # --- Outputs for other stacks/regions ---
1212
+
1213
+ self.params = dict()
1214
+ self.params["alb_arn_output"] = alb.load_balancer_arn
1215
+ self.params["alb_security_group_id"] = alb_security_group.security_group_id
1216
+ self.params["alb_dns_name"] = alb.load_balancer_dns_name
1217
+
1218
+ CfnOutput(self, "AlbArnOutput",
1219
+ value=alb.load_balancer_arn,
1220
+ description="ARN of the Application Load Balancer",
1221
+ export_name=f"{self.stack_name}-AlbArn") # Export name must be unique within the account/region
1222
+
1223
+ CfnOutput(self, "AlbSecurityGroupIdOutput",
1224
+ value=alb_security_group.security_group_id,
1225
+ description="ID of the ALB's Security Group",
1226
+ export_name=f"{self.stack_name}-AlbSgId")
1227
+ CfnOutput(self, "ALBName", value=alb.load_balancer_name)
1228
+
1229
+
1230
+ CfnOutput(self, "RegionalAlbDnsName", value=alb.load_balancer_dns_name)
1231
+
1232
+ CfnOutput(self, "CognitoPoolId", value=user_pool.user_pool_id)
1233
+ # Add other outputs if needed
1234
+
1235
+ CfnOutput(self, "ECRRepoUri", value=ecr_repo.repository_uri)
1236
+
1237
+ # --- CLOUDFRONT DISTRIBUTION in separate stack (us-east-1 required) ---
1238
+ class CdkStackCloudfront(Stack):
1239
+
1240
+ def __init__(self, scope: Construct, construct_id: str, alb_arn: str, alb_sec_group_id:str, alb_dns_name:str, **kwargs) -> None:
1241
+ super().__init__(scope, construct_id, **kwargs)
1242
+
1243
+ # --- Helper to get context values ---
1244
+ def get_context_bool(key: str, default: bool = False) -> bool:
1245
+ return self.node.try_get_context(key) or default
1246
+
1247
+ def get_context_str(key: str, default: str = None) -> str:
1248
+ return self.node.try_get_context(key) or default
1249
+
1250
+ def get_context_dict(scope: Construct, key: str, default: dict = None) -> dict:
1251
+ return scope.node.try_get_context(key) or default
1252
+
1253
+ print(f"CloudFront Stack: Received ALB ARN: {alb_arn}")
1254
+ print(f"CloudFront Stack: Received ALB Security Group ID: {alb_sec_group_id}")
1255
+
1256
+ if not alb_arn:
1257
+ raise ValueError("ALB ARN must be provided to CloudFront stack")
1258
+ if not alb_sec_group_id:
1259
+ raise ValueError("ALB Security Group ID must be provided to CloudFront stack")
1260
+
1261
+ # 2. Import the ALB using its ARN
1262
+ # This imports an existing ALB as a construct in the CloudFront stack's context.
1263
+ # CloudFormation will understand this reference at deploy time.
1264
+ alb = elbv2.ApplicationLoadBalancer.from_application_load_balancer_attributes(
1265
+ self, "ImportedAlb", load_balancer_arn=alb_arn, security_group_id=alb_sec_group_id, load_balancer_dns_name=alb_dns_name
1266
+ )
1267
+
1268
+ try:
1269
+ web_acl_name = WEB_ACL_NAME
1270
+ if get_context_bool(f"exists:{web_acl_name}"):
1271
+ # Lookup WAF ACL by ARN from context
1272
+ web_acl_arn = get_context_str(f"arn:{web_acl_name}")
1273
+ if not web_acl_arn:
1274
+ raise ValueError(f"Context value 'arn:{web_acl_name}' is required if Web ACL exists.")
1275
+
1276
+ web_acl = create_web_acl_with_common_rules(self, web_acl_name) # Assuming it takes scope and name
1277
+ print(f"Handled Cloudfront WAF web ACL {web_acl_name}.")
1278
+ else:
1279
+ web_acl = create_web_acl_with_common_rules(self, web_acl_name) # Assuming it takes scope and name
1280
+ print(f"Created Cloudfront WAF web ACL {web_acl_name}.")
1281
+
1282
+
1283
+ # Add ALB as CloudFront Origin
1284
+ origin = origins.LoadBalancerV2Origin(
1285
+ alb, # Use the created or looked-up ALB object
1286
+ custom_headers={CUSTOM_HEADER: CUSTOM_HEADER_VALUE},
1287
+ origin_shield_enabled=False,
1288
+ protocol_policy=cloudfront.OriginProtocolPolicy.HTTP_ONLY,
1289
+ )
1290
+
1291
+ if CLOUDFRONT_GEO_RESTRICTION: geo_restrict = cloudfront.GeoRestriction.allowlist(CLOUDFRONT_GEO_RESTRICTION)
1292
+ else: geo_restrict = None
1293
+
1294
+ cloudfront_distribution = cloudfront.Distribution(
1295
+ self,
1296
+ "CloudFrontDistribution", # Logical ID
1297
+ comment=CLOUDFRONT_DISTRIBUTION_NAME, # Use name as comment for easier identification
1298
+ geo_restriction=geo_restrict,
1299
+ default_behavior=cloudfront.BehaviorOptions(
1300
+ origin=origin,
1301
+ viewer_protocol_policy=cloudfront.ViewerProtocolPolicy.REDIRECT_TO_HTTPS,
1302
+ allowed_methods=cloudfront.AllowedMethods.ALLOW_ALL,
1303
+ cache_policy=cloudfront.CachePolicy.CACHING_DISABLED,
1304
+ origin_request_policy=cloudfront.OriginRequestPolicy.ALL_VIEWER,
1305
+ ),
1306
+ web_acl_id=web_acl.attr_arn
1307
+ )
1308
+ print(f"Cloudfront distribution {CLOUDFRONT_DISTRIBUTION_NAME} defined.")
1309
+
1310
+ except Exception as e:
1311
+ raise Exception("Could not handle Cloudfront distribution due to:", e)
1312
+
1313
+
1314
+ # --- Outputs ---
1315
+ CfnOutput(self, "CloudFrontDistributionURL",
1316
+ value=cloudfront_distribution.domain_name)
1317
+
cdk/check_resources.py ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from cdk_config import CDK_PREFIX, VPC_NAME, AWS_REGION, PUBLIC_SUBNETS_TO_USE, PRIVATE_SUBNETS_TO_USE, CODEBUILD_ROLE_NAME, ECS_TASK_ROLE_NAME, ECS_TASK_EXECUTION_ROLE_NAME, S3_LOG_CONFIG_BUCKET_NAME, S3_OUTPUT_BUCKET_NAME, ECR_CDK_REPO_NAME, CODEBUILD_PROJECT_NAME, ALB_NAME, COGNITO_USER_POOL_NAME, COGNITO_USER_POOL_CLIENT_NAME, COGNITO_USER_POOL_CLIENT_SECRET_NAME, WEB_ACL_NAME, CONTEXT_FILE, PUBLIC_SUBNET_CIDR_BLOCKS, PRIVATE_SUBNET_CIDR_BLOCKS, PUBLIC_SUBNET_AVAILABILITY_ZONES, PRIVATE_SUBNET_AVAILABILITY_ZONES, CDK_FOLDER, CDK_CONFIG_PATH # Import necessary config
4
+ from cdk_functions import ( # Import your check functions (assuming they use Boto3)
5
+ get_vpc_id_by_name,
6
+ check_subnet_exists_by_name,
7
+ check_for_existing_role,
8
+ check_s3_bucket_exists,
9
+ check_ecr_repo_exists,
10
+ check_codebuild_project_exists,
11
+ check_alb_exists,
12
+ check_for_existing_user_pool,
13
+ check_for_existing_user_pool_client,
14
+ check_for_secret,
15
+ check_cloudfront_distribution_exists,
16
+ check_web_acl_exists,
17
+ _get_existing_subnets_in_vpc,
18
+ validate_subnet_creation_parameters
19
+ # Add other check functions as needed
20
+ )
21
+
22
+ from typing import List, Dict, Any
23
+
24
+ cdk_folder = CDK_FOLDER #<FULL_PATH_TO_CDK_FOLDER_HERE>
25
+
26
+ # Full path needed to find config file
27
+ os.environ["CDK_CONFIG_PATH"] = cdk_folder + CDK_CONFIG_PATH
28
+
29
+ # --- Helper to parse environment variables into lists ---
30
+ def _get_env_list(env_var_name: str) -> List[str]:
31
+ """Parses a comma-separated environment variable into a list of strings."""
32
+ value = env_var_name[1:-1].strip().replace('\"', '').replace("\'","")
33
+ if not value:
34
+ return []
35
+ # Split by comma and filter out any empty strings that might result from extra commas
36
+ return [s.strip() for s in value.split(',') if s.strip()]
37
+
38
+
39
+ if PUBLIC_SUBNETS_TO_USE and not isinstance(PUBLIC_SUBNETS_TO_USE, list): PUBLIC_SUBNETS_TO_USE = _get_env_list(PUBLIC_SUBNETS_TO_USE)
40
+ if PRIVATE_SUBNETS_TO_USE and not isinstance(PRIVATE_SUBNETS_TO_USE, list): PRIVATE_SUBNETS_TO_USE = _get_env_list(PRIVATE_SUBNETS_TO_USE)
41
+ if PUBLIC_SUBNET_CIDR_BLOCKS and not isinstance(PUBLIC_SUBNET_CIDR_BLOCKS, list): PUBLIC_SUBNET_CIDR_BLOCKS = _get_env_list(PUBLIC_SUBNET_CIDR_BLOCKS)
42
+ if PUBLIC_SUBNET_AVAILABILITY_ZONES and not isinstance(PUBLIC_SUBNET_AVAILABILITY_ZONES, list): PUBLIC_SUBNET_AVAILABILITY_ZONES = _get_env_list(PUBLIC_SUBNET_AVAILABILITY_ZONES)
43
+ if PRIVATE_SUBNET_CIDR_BLOCKS and not isinstance(PRIVATE_SUBNET_CIDR_BLOCKS, list): PRIVATE_SUBNET_CIDR_BLOCKS = _get_env_list(PRIVATE_SUBNET_CIDR_BLOCKS)
44
+ if PRIVATE_SUBNET_AVAILABILITY_ZONES and not isinstance(PRIVATE_SUBNET_AVAILABILITY_ZONES, list): PRIVATE_SUBNET_AVAILABILITY_ZONES = _get_env_list(PRIVATE_SUBNET_AVAILABILITY_ZONES)
45
+
46
+ # Check for the existence of elements in your AWS environment to see if it's necessary to create new versions of the same
47
+
48
+ def check_and_set_context():
49
+ context_data = {}
50
+
51
+ # --- Find the VPC ID first ---
52
+ print("VPC_NAME:", VPC_NAME)
53
+ vpc_id, nat_gateways = get_vpc_id_by_name(VPC_NAME)
54
+
55
+ # If you expect only one, or one per AZ and you're creating one per AZ in CDK:
56
+ if nat_gateways:
57
+ # For simplicity, let's just check if *any* NAT exists in the VPC
58
+ # A more robust check would match by subnet, AZ, or a specific tag.
59
+ context_data["exists:NatGateway"] = True
60
+ context_data["id:NatGateway"] = nat_gateways[0]['NatGatewayId'] # Store the ID of the first one found
61
+ else:
62
+ context_data["exists:NatGateway"] = False
63
+ context_data["id:NatGateway"] = None
64
+
65
+ if not vpc_id:
66
+ # If the VPC doesn't exist, you might not be able to check/create subnets.
67
+ # Decide how to handle this: raise an error, set a flag, etc.
68
+ raise RuntimeError(f"Required VPC '{VPC_NAME}' not found. Cannot proceed with subnet checks.")
69
+
70
+ context_data["vpc_id"] = vpc_id # Store VPC ID in context
71
+
72
+ # SUBNET CHECKS
73
+ context_data: Dict[str, Any] = {}
74
+ all_proposed_subnets_data: List[Dict[str, str]] = []
75
+
76
+ # Flag to indicate if full validation mode (with CIDR/AZs) is active
77
+ full_validation_mode = False
78
+
79
+ # Determine if full validation mode is possible/desired
80
+ # It's 'desired' if CIDR/AZs are provided, and their lengths match the name lists.
81
+ public_ready_for_full_validation = (
82
+ len(PUBLIC_SUBNETS_TO_USE) > 0 and
83
+ len(PUBLIC_SUBNET_CIDR_BLOCKS) == len(PUBLIC_SUBNETS_TO_USE) and
84
+ len(PUBLIC_SUBNET_AVAILABILITY_ZONES) == len(PUBLIC_SUBNETS_TO_USE)
85
+ )
86
+ private_ready_for_full_validation = (
87
+ len(PRIVATE_SUBNETS_TO_USE) > 0 and
88
+ len(PRIVATE_SUBNET_CIDR_BLOCKS) == len(PRIVATE_SUBNETS_TO_USE) and
89
+ len(PRIVATE_SUBNET_AVAILABILITY_ZONES) == len(PRIVATE_SUBNETS_TO_USE)
90
+ )
91
+
92
+ # Activate full validation if *any* type of subnet (public or private) has its full details provided.
93
+ # You might adjust this logic if you require ALL subnet types to have CIDRs, or NONE.
94
+ if public_ready_for_full_validation or private_ready_for_full_validation:
95
+ full_validation_mode = True
96
+
97
+ # If some are ready but others aren't, print a warning or raise an error based on your strictness
98
+ if public_ready_for_full_validation and not private_ready_for_full_validation and PRIVATE_SUBNETS_TO_USE:
99
+ print("Warning: Public subnets have CIDRs/AZs, but private subnets do not. Only public will be fully validated/created with CIDRs.")
100
+ if private_ready_for_full_validation and not public_ready_for_full_validation and PUBLIC_SUBNETS_TO_USE:
101
+ print("Warning: Private subnets have CIDRs/AZs, but public subnets do not. Only private will be fully validated/created with CIDRs.")
102
+
103
+ # Prepare data for validate_subnet_creation_parameters for all subnets that have full details
104
+ if public_ready_for_full_validation:
105
+ for i, name in enumerate(PUBLIC_SUBNETS_TO_USE):
106
+ all_proposed_subnets_data.append({
107
+ 'name': name,
108
+ 'cidr': PUBLIC_SUBNET_CIDR_BLOCKS[i],
109
+ 'az': PUBLIC_SUBNET_AVAILABILITY_ZONES[i]
110
+ })
111
+ if private_ready_for_full_validation:
112
+ for i, name in enumerate(PRIVATE_SUBNETS_TO_USE):
113
+ all_proposed_subnets_data.append({
114
+ 'name': name,
115
+ 'cidr': PRIVATE_SUBNET_CIDR_BLOCKS[i],
116
+ 'az': PRIVATE_SUBNET_AVAILABILITY_ZONES[i]
117
+ })
118
+
119
+
120
+ print(f"Target VPC ID for Boto3 lookup: {vpc_id}")
121
+
122
+ # Fetch all existing subnets in the target VPC once to avoid repeated API calls
123
+ try:
124
+ existing_aws_subnets = _get_existing_subnets_in_vpc(vpc_id)
125
+ except Exception as e:
126
+ print(f"Failed to fetch existing VPC subnets. Aborting. Error: {e}")
127
+ raise SystemExit(1) # Exit immediately if we can't get baseline data
128
+
129
+ print("\n--- Running Name-Only Subnet Existence Check Mode ---")
130
+ # Fallback: check only by name using the existing data
131
+ checked_public_subnets = {}
132
+ if PUBLIC_SUBNETS_TO_USE:
133
+ for subnet_name in PUBLIC_SUBNETS_TO_USE:
134
+ print("subnet_name:", subnet_name)
135
+ exists, subnet_id = check_subnet_exists_by_name(subnet_name, existing_aws_subnets)
136
+ checked_public_subnets[subnet_name] = {"exists": exists, "id": subnet_id}
137
+
138
+ # If the subnet exists, remove it from the proposed subnets list
139
+ if checked_public_subnets[subnet_name]["exists"] == True:
140
+ all_proposed_subnets_data = [
141
+ subnet for subnet in all_proposed_subnets_data
142
+ if subnet['name'] != subnet_name
143
+ ]
144
+
145
+ context_data["checked_public_subnets"] = checked_public_subnets
146
+
147
+ checked_private_subnets = {}
148
+ if PRIVATE_SUBNETS_TO_USE:
149
+ for subnet_name in PRIVATE_SUBNETS_TO_USE:
150
+ print("subnet_name:", subnet_name)
151
+ exists, subnet_id = check_subnet_exists_by_name(subnet_name, existing_aws_subnets)
152
+ checked_private_subnets[subnet_name] = {"exists": exists, "id": subnet_id}
153
+
154
+ # If the subnet exists, remove it from the proposed subnets list
155
+ if checked_private_subnets[subnet_name]["exists"] == True:
156
+ all_proposed_subnets_data = [
157
+ subnet for subnet in all_proposed_subnets_data
158
+ if subnet['name'] != subnet_name
159
+ ]
160
+
161
+ context_data["checked_private_subnets"] = checked_private_subnets
162
+
163
+
164
+
165
+ print("\nName-only existence subnet check complete.\n")
166
+
167
+ if full_validation_mode:
168
+ print("\n--- Running in Full Subnet Validation Mode (CIDR/AZs provided) ---")
169
+ try:
170
+ validate_subnet_creation_parameters(vpc_id, all_proposed_subnets_data, existing_aws_subnets)
171
+ print("\nPre-synth validation successful. Proceeding with CDK synth.\n")
172
+
173
+ # Populate context_data for downstream CDK construct creation
174
+ context_data["public_subnets_to_create"] = []
175
+ if public_ready_for_full_validation:
176
+ for i, name in enumerate(PUBLIC_SUBNETS_TO_USE):
177
+ context_data["public_subnets_to_create"].append({
178
+ 'name': name,
179
+ 'cidr': PUBLIC_SUBNET_CIDR_BLOCKS[i],
180
+ 'az': PUBLIC_SUBNET_AVAILABILITY_ZONES[i],
181
+ 'is_public': True
182
+ })
183
+ context_data["private_subnets_to_create"] = []
184
+ if private_ready_for_full_validation:
185
+ for i, name in enumerate(PRIVATE_SUBNETS_TO_USE):
186
+ context_data["private_subnets_to_create"].append({
187
+ 'name': name,
188
+ 'cidr': PRIVATE_SUBNET_CIDR_BLOCKS[i],
189
+ 'az': PRIVATE_SUBNET_AVAILABILITY_ZONES[i],
190
+ 'is_public': False
191
+ })
192
+
193
+ except (ValueError, Exception) as e:
194
+ print(f"\nFATAL ERROR: Subnet parameter validation failed: {e}\n")
195
+ raise SystemExit(1) # Exit if validation fails
196
+
197
+ # Example checks and setting context values
198
+ # IAM Roles
199
+ role_name = CODEBUILD_ROLE_NAME
200
+ exists, _, _ = check_for_existing_role(role_name)
201
+ context_data[f"exists:{role_name}"] = exists # Use boolean
202
+ if exists:
203
+ _, role_arn, _ = check_for_existing_role(role_name) # Get ARN if needed
204
+ context_data[f"arn:{role_name}"] = role_arn
205
+
206
+ role_name = ECS_TASK_ROLE_NAME
207
+ exists, _, _ = check_for_existing_role(role_name)
208
+ context_data[f"exists:{role_name}"] = exists
209
+ if exists:
210
+ _, role_arn, _ = check_for_existing_role(role_name)
211
+ context_data[f"arn:{role_name}"] = role_arn
212
+
213
+ role_name = ECS_TASK_EXECUTION_ROLE_NAME
214
+ exists, _, _ = check_for_existing_role(role_name)
215
+ context_data[f"exists:{role_name}"] = exists
216
+ if exists:
217
+ _, role_arn, _ = check_for_existing_role(role_name)
218
+ context_data[f"arn:{role_name}"] = role_arn
219
+
220
+ # S3 Buckets
221
+ bucket_name = S3_LOG_CONFIG_BUCKET_NAME
222
+ exists, _ = check_s3_bucket_exists(bucket_name)
223
+ context_data[f"exists:{bucket_name}"] = exists
224
+ if exists:
225
+ # You might not need the ARN if using from_bucket_name
226
+ pass
227
+
228
+ output_bucket_name = S3_OUTPUT_BUCKET_NAME
229
+ exists, _ = check_s3_bucket_exists(output_bucket_name)
230
+ context_data[f"exists:{output_bucket_name}"] = exists
231
+ if exists:
232
+ pass
233
+
234
+ # ECR Repository
235
+ repo_name = ECR_CDK_REPO_NAME
236
+ exists, _ = check_ecr_repo_exists(repo_name)
237
+ context_data[f"exists:{repo_name}"] = exists
238
+ if exists:
239
+ pass # from_repository_name is sufficient
240
+
241
+ # CodeBuild Project
242
+ project_name = CODEBUILD_PROJECT_NAME
243
+ exists, _ = check_codebuild_project_exists(project_name)
244
+ context_data[f"exists:{project_name}"] = exists
245
+ if exists:
246
+ # Need a way to get the ARN from the check function
247
+ _, project_arn = check_codebuild_project_exists(project_name) # Assuming it returns ARN
248
+ context_data[f"arn:{project_name}"] = project_arn
249
+
250
+ # ALB (by name lookup)
251
+ alb_name = ALB_NAME
252
+ exists, _ = check_alb_exists(alb_name, region_name=AWS_REGION)
253
+ context_data[f"exists:{alb_name}"] = exists
254
+ if exists:
255
+ _, alb_object = check_alb_exists(alb_name, region_name=AWS_REGION) # Assuming check returns object
256
+ print("alb_object:", alb_object)
257
+ context_data[f"arn:{alb_name}"] = alb_object['LoadBalancerArn']
258
+
259
+
260
+ # Cognito User Pool (by name)
261
+ user_pool_name = COGNITO_USER_POOL_NAME
262
+ exists, user_pool_id, _ = check_for_existing_user_pool(user_pool_name)
263
+ context_data[f"exists:{user_pool_name}"] = exists
264
+ if exists:
265
+ context_data[f"id:{user_pool_name}"] = user_pool_id
266
+
267
+ # Cognito User Pool Client (by name and pool ID) - requires User Pool ID from check
268
+ if user_pool_id:
269
+ user_pool_id_for_client_check = user_pool_id #context_data.get(f"id:{user_pool_name}") # Use ID from context
270
+ user_pool_client_name = COGNITO_USER_POOL_CLIENT_NAME
271
+ if user_pool_id_for_client_check:
272
+ exists, client_id, _ = check_for_existing_user_pool_client(user_pool_client_name, user_pool_id_for_client_check)
273
+ context_data[f"exists:{user_pool_client_name}"] = exists
274
+ if exists:
275
+ context_data[f"id:{user_pool_client_name}"] = client_id
276
+
277
+ # Secrets Manager Secret (by name)
278
+ secret_name = COGNITO_USER_POOL_CLIENT_SECRET_NAME
279
+ exists, _ = check_for_secret(secret_name)
280
+ context_data[f"exists:{secret_name}"] = exists
281
+ # You might not need the ARN if using from_secret_name_v2
282
+
283
+
284
+ # WAF Web ACL (by name and scope)
285
+ web_acl_name = WEB_ACL_NAME
286
+ exists, _ = check_web_acl_exists(web_acl_name, scope="CLOUDFRONT") # Assuming check returns object
287
+ context_data[f"exists:{web_acl_name}"] = exists
288
+ if exists:
289
+ _, existing_web_acl = check_web_acl_exists(web_acl_name, scope="CLOUDFRONT")
290
+ context_data[f"arn:{web_acl_name}"] = existing_web_acl.attr_arn
291
+
292
+ # Write the context data to the file
293
+ with open(CONTEXT_FILE, "w") as f:
294
+ json.dump(context_data, f, indent=2)
295
+
296
+ print(f"Context data written to {CONTEXT_FILE}")
297
+
cdk/post_cdk_build_quickstart.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from cdk_config import CODEBUILD_PROJECT_NAME, S3_LOG_CONFIG_BUCKET_NAME, CLUSTER_NAME, ECS_SERVICE_NAME
3
+ from cdk_functions import start_codebuild_build, upload_file_to_s3, start_ecs_task, create_basic_config_env
4
+ from tqdm import tqdm
5
+
6
+ # Create basic config.env file that user can use to run the app later. Input is the folder it is saved into.
7
+ create_basic_config_env("config")
8
+
9
+ # Start codebuild build
10
+ print("Starting CodeBuild project.")
11
+ start_codebuild_build(PROJECT_NAME=CODEBUILD_PROJECT_NAME)
12
+
13
+ # Upload config.env file to S3 bucket
14
+ upload_file_to_s3(local_file_paths="config/config.env", s3_key="", s3_bucket=S3_LOG_CONFIG_BUCKET_NAME)
15
+
16
+ total_seconds = 480 # 8 minutes * 60 seconds/minute
17
+ update_interval = 1 # Update every second
18
+
19
+ print("Waiting eight minutes for the CodeBuild container to build.")
20
+
21
+ # tqdm iterates over a range, and you perform a small sleep in each iteration
22
+ for i in tqdm(range(total_seconds), desc="Building container"):
23
+ time.sleep(update_interval)
24
+
25
+ # Start task on ECS
26
+ print("Starting ECS task")
27
+ start_ecs_task(cluster_name=CLUSTER_NAME, service_name=ECS_SERVICE_NAME)
cdk/requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ aws-cdk-lib==2.200.2
2
+ boto3==1.38.35
3
+ pandas==2.2.3
4
+ nodejs==0.1.1
5
+ python-dotenv==1.0.1
tools/config.py CHANGED
@@ -64,10 +64,12 @@ def add_folder_to_path(folder_path: str):
64
  # LOAD CONFIG FROM ENV FILE
65
  ###
66
 
67
- ensure_folder_exists("config/")
 
 
68
 
69
  # If you have an aws_config env file in the config folder, you can load in app variables this way, e.g. 'config/app_config.env'
70
- APP_CONFIG_PATH = get_or_create_env_var('APP_CONFIG_PATH', 'config/app_config.env') # e.g. config/app_config.env
71
 
72
  if APP_CONFIG_PATH:
73
  if os.path.exists(APP_CONFIG_PATH):
@@ -75,10 +77,6 @@ if APP_CONFIG_PATH:
75
  load_dotenv(APP_CONFIG_PATH)
76
  else: print("App config file not found at location:", APP_CONFIG_PATH)
77
 
78
-
79
-
80
-
81
-
82
  ###
83
  # AWS OPTIONS
84
  ###
@@ -149,6 +147,12 @@ if OUTPUT_FOLDER == "TEMP" or INPUT_FOLDER == "TEMP":
149
  if INPUT_FOLDER == "TEMP": INPUT_FOLDER = temp_dir + "/"
150
 
151
 
 
 
 
 
 
 
152
  ###
153
  # LOGGING OPTIONS
154
  ###
@@ -182,7 +186,7 @@ DISPLAY_FILE_NAMES_IN_LOGS = get_or_create_env_var('DISPLAY_FILE_NAMES_IN_LOGS',
182
 
183
  CSV_ACCESS_LOG_HEADERS = get_or_create_env_var('CSV_ACCESS_LOG_HEADERS', '') # If blank, uses component labels
184
  CSV_FEEDBACK_LOG_HEADERS = get_or_create_env_var('CSV_FEEDBACK_LOG_HEADERS', '') # If blank, uses component labels
185
- CSV_USAGE_LOG_HEADERS = get_or_create_env_var('CSV_USAGE_LOG_HEADERS', '["session_hash_textbox", "doc_full_file_name_textbox", "data_full_file_name_textbox", "actual_time_taken_number", "total_page_count", "textract_query_number", "pii_detection_method", "comprehend_query_number", "cost_code", "textract_handwriting_signature", "host_name_textbox", "text_extraction_method", "is_this_a_textract_api_call"]') # If blank, uses component labels
186
 
187
  ### DYNAMODB logs. Whether to save to DynamoDB, and the headers of the table
188
 
@@ -310,7 +314,7 @@ COMPRESS_REDACTED_PDF = get_or_create_env_var("COMPRESS_REDACTED_PDF","False") #
310
  # APP RUN OPTIONS
311
  ###
312
 
313
- TLDEXTRACT_CACHE = get_or_create_env_var('TLDEXTRACT_CACHE', 'tld/.tld_set_snapshot')
314
  try:
315
  extract = TLDExtract(cache_dir=TLDEXTRACT_CACHE)
316
  except:
 
64
  # LOAD CONFIG FROM ENV FILE
65
  ###
66
 
67
+ CONFIG_FOLDER = get_or_create_env_var('CONFIG_FOLDER', 'config/')
68
+
69
+ ensure_folder_exists(CONFIG_FOLDER)
70
 
71
  # If you have an aws_config env file in the config folder, you can load in app variables this way, e.g. 'config/app_config.env'
72
+ APP_CONFIG_PATH = get_or_create_env_var('APP_CONFIG_PATH', CONFIG_FOLDER + 'app_config.env') # e.g. config/app_config.env
73
 
74
  if APP_CONFIG_PATH:
75
  if os.path.exists(APP_CONFIG_PATH):
 
77
  load_dotenv(APP_CONFIG_PATH)
78
  else: print("App config file not found at location:", APP_CONFIG_PATH)
79
 
 
 
 
 
80
  ###
81
  # AWS OPTIONS
82
  ###
 
147
  if INPUT_FOLDER == "TEMP": INPUT_FOLDER = temp_dir + "/"
148
 
149
 
150
+ GRADIO_TEMP_DIR = get_or_create_env_var('GRADIO_TEMP_DIR', 'tmp/gradio_tmp/') # Default Gradio temp folder
151
+ MPLCONFIGDIR = get_or_create_env_var('MPLCONFIGDIR', 'tmp/matplotlib_cache/') # Matplotlib cache folder
152
+
153
+ ensure_folder_exists(GRADIO_TEMP_DIR)
154
+ ensure_folder_exists(MPLCONFIGDIR)
155
+
156
  ###
157
  # LOGGING OPTIONS
158
  ###
 
186
 
187
  CSV_ACCESS_LOG_HEADERS = get_or_create_env_var('CSV_ACCESS_LOG_HEADERS', '') # If blank, uses component labels
188
  CSV_FEEDBACK_LOG_HEADERS = get_or_create_env_var('CSV_FEEDBACK_LOG_HEADERS', '') # If blank, uses component labels
189
+ CSV_USAGE_LOG_HEADERS = get_or_create_env_var('CSV_USAGE_LOG_HEADERS', '["session_hash_textbox", "doc_full_file_name_textbox", "data_full_file_name_textbox", "actual_time_taken_number", "total_page_count", "textract_query_number", "pii_detection_method", "comprehend_query_number", "cost_code", "textract_handwriting_signature", "host_name_textbox", "text_extraction_method", "is_this_a_textract_api_call"]') # If blank, uses component labels
190
 
191
  ### DYNAMODB logs. Whether to save to DynamoDB, and the headers of the table
192
 
 
314
  # APP RUN OPTIONS
315
  ###
316
 
317
+ TLDEXTRACT_CACHE = get_or_create_env_var('TLDEXTRACT_CACHE', 'tmp/tld/')
318
  try:
319
  extract = TLDExtract(cache_dir=TLDEXTRACT_CACHE)
320
  except: