seanpedrickcase commited on
Commit
bf7bb79
·
1 Parent(s): 68a91f4

Modified Dockerfile hopefully to not need Lambda overrides. Looking into custom headers from Cloudfront to try to get them to work

Browse files
Files changed (4) hide show
  1. Dockerfile +12 -6
  2. app.py +3 -4
  3. tools/auth.py +6 -2
  4. tools/helper_functions.py +23 -19
Dockerfile CHANGED
@@ -51,12 +51,15 @@ RUN mkdir -p /home/user/app/output \
51
  # Copy installed packages from builder stage
52
  COPY --from=builder /install /usr/local/lib/python3.11/site-packages/
53
 
54
- # Use a conditional entrypoint based on the APP_MODE argument
55
- RUN if [ "$APP_MODE" = "lambda" ]; then \
56
- echo '#!/bin/sh\nexec python -m awslambdaric' > /entrypoint.sh; \
57
- else \
58
- echo '#!/bin/sh\nexec python app.py' > /entrypoint.sh; \
59
- fi && chmod +x /entrypoint.sh
 
 
 
60
 
61
  # Switch to the "user" user
62
  USER user
@@ -71,6 +74,7 @@ ENV HOME=/home/user \
71
  GRADIO_NUM_PORTS=1 \
72
  GRADIO_SERVER_NAME=0.0.0.0 \
73
  GRADIO_SERVER_PORT=7860 \
 
74
  GRADIO_THEME=huggingface \
75
  TLDEXTRACT_CACHE=$HOME/app/tld/.tld_set_snapshot \
76
  SYSTEM=spaces
@@ -81,6 +85,8 @@ WORKDIR $HOME/app
81
  # Copy the app code to the container
82
  COPY --chown=user . $HOME/app
83
 
 
 
84
  ENTRYPOINT [ "/entrypoint.sh" ]
85
 
86
  # Default command for Lambda mode
 
51
  # Copy installed packages from builder stage
52
  COPY --from=builder /install /usr/local/lib/python3.11/site-packages/
53
 
54
+ # Use a conditional entrypoint based on the APP_MODE argument (deprecated, now created beforehand in folder)
55
+ # RUN if [ "$APP_MODE" = "lambda" ]; then \
56
+ # echo '#!/bin/sh\nexec python -m awslambdaric' > /entrypoint.sh; \
57
+ # else \
58
+ # echo '#!/bin/sh\nexec python app.py' > /entrypoint.sh; \
59
+ # fi && chmod +x /entrypoint.sh
60
+
61
+ # Entrypoint helps to switch between Gradio and Lambda mode
62
+ COPY entrypoint.sh /entrypoint.sh
63
 
64
  # Switch to the "user" user
65
  USER user
 
74
  GRADIO_NUM_PORTS=1 \
75
  GRADIO_SERVER_NAME=0.0.0.0 \
76
  GRADIO_SERVER_PORT=7860 \
77
+ GRADIO_ANALYTICS_ENABLED=False \
78
  GRADIO_THEME=huggingface \
79
  TLDEXTRACT_CACHE=$HOME/app/tld/.tld_set_snapshot \
80
  SYSTEM=spaces
 
85
  # Copy the app code to the container
86
  COPY --chown=user . $HOME/app
87
 
88
+ RUN chmod +x /entrypoint.sh
89
+
90
  ENTRYPOINT [ "/entrypoint.sh" ]
91
 
92
  # Default command for Lambda mode
app.py CHANGED
@@ -19,7 +19,6 @@ from tools.auth import authenticate_user
19
  from tools.load_spacy_model_custom_recognisers import custom_entities
20
  from tools.custom_csvlogger import CSVLogger_custom
21
 
22
-
23
  today_rev = datetime.now().strftime("%Y%m%d")
24
 
25
  add_folder_to_path("tesseract/")
@@ -286,7 +285,7 @@ with app:
286
  then(fn = prepare_image_or_pdf, inputs=[in_doc_files, in_redaction_method, in_allow_list, latest_file_completed_text, output_summary, first_loop_state, annotate_max_pages, current_loop_page_number, all_image_annotations_state], outputs=[output_summary, prepared_pdf_state, images_pdf_state, annotate_max_pages, annotate_max_pages_bottom, pdf_doc_state, all_image_annotations_state], api_name="prepare_doc").\
287
  then(fn = choose_and_run_redactor, inputs=[in_doc_files, prepared_pdf_state, images_pdf_state, in_redact_language, in_redact_entities, in_redact_comprehend_entities, in_redaction_method, in_allow_list_state, latest_file_completed_text, output_summary, output_file_list_state, log_files_output_list_state, first_loop_state, page_min, page_max, estimated_time_taken_number, handwrite_signature_checkbox, textract_metadata_textbox, all_image_annotations_state, all_line_level_ocr_results_df_state, all_decision_process_table_state, pdf_doc_state, current_loop_page_number, page_break_return, pii_identification_method_drop, comprehend_query_number],
288
  outputs=[output_summary, output_file, output_file_list_state, latest_file_completed_text, log_files_output, log_files_output_list_state, estimated_time_taken_number, textract_metadata_textbox, pdf_doc_state, all_image_annotations_state, current_loop_page_number, page_break_return, all_line_level_ocr_results_df_state, all_decision_process_table_state, comprehend_query_number], api_name="redact_doc").\
289
- then(fn=update_annotator, inputs=[all_image_annotations_state, page_min, annotator_zoom_number], outputs=[annotator, annotate_current_page, annotate_current_page_bottom])
290
 
291
  # If the app has completed a batch of pages, it will run this until the end of all pages in the document
292
  current_loop_page_number.change(fn = choose_and_run_redactor, inputs=[in_doc_files, prepared_pdf_state, images_pdf_state, in_redact_language, in_redact_entities, in_redact_comprehend_entities, in_redaction_method, in_allow_list_state, latest_file_completed_text, output_summary, output_file_list_state, log_files_output_list_state, second_loop_state, page_min, page_max, estimated_time_taken_number, handwrite_signature_checkbox, textract_metadata_textbox, all_image_annotations_state, all_line_level_ocr_results_df_state, all_decision_process_table_state, pdf_doc_state, current_loop_page_number, page_break_return, pii_identification_method_drop, comprehend_query_number],
@@ -397,7 +396,7 @@ with app:
397
  then(fn = upload_file_to_s3, inputs=[usage_logs_state, usage_s3_logs_loc_state], outputs=[s3_logs_output_textbox])
398
 
399
  # Launch the Gradio app
400
- COGNITO_AUTH = get_or_create_env_var('COGNITO_AUTH', '0')
401
  print(f'The value of COGNITO_AUTH is {COGNITO_AUTH}')
402
 
403
  RUN_DIRECT_MODE = get_or_create_env_var('RUN_DIRECT_MODE', '0')
@@ -411,7 +410,7 @@ if __name__ == "__main__":
411
  if RUN_DIRECT_MODE == "0":
412
 
413
  if os.environ['COGNITO_AUTH'] == "1":
414
- app.queue(max_size=max_queue_size).launch(show_error=True, auth=authenticate_user, max_file_size=max_file_size)
415
  else:
416
  app.queue(max_size=max_queue_size).launch(show_error=True, inbrowser=True, max_file_size=max_file_size)
417
 
 
19
  from tools.load_spacy_model_custom_recognisers import custom_entities
20
  from tools.custom_csvlogger import CSVLogger_custom
21
 
 
22
  today_rev = datetime.now().strftime("%Y%m%d")
23
 
24
  add_folder_to_path("tesseract/")
 
285
  then(fn = prepare_image_or_pdf, inputs=[in_doc_files, in_redaction_method, in_allow_list, latest_file_completed_text, output_summary, first_loop_state, annotate_max_pages, current_loop_page_number, all_image_annotations_state], outputs=[output_summary, prepared_pdf_state, images_pdf_state, annotate_max_pages, annotate_max_pages_bottom, pdf_doc_state, all_image_annotations_state], api_name="prepare_doc").\
286
  then(fn = choose_and_run_redactor, inputs=[in_doc_files, prepared_pdf_state, images_pdf_state, in_redact_language, in_redact_entities, in_redact_comprehend_entities, in_redaction_method, in_allow_list_state, latest_file_completed_text, output_summary, output_file_list_state, log_files_output_list_state, first_loop_state, page_min, page_max, estimated_time_taken_number, handwrite_signature_checkbox, textract_metadata_textbox, all_image_annotations_state, all_line_level_ocr_results_df_state, all_decision_process_table_state, pdf_doc_state, current_loop_page_number, page_break_return, pii_identification_method_drop, comprehend_query_number],
287
  outputs=[output_summary, output_file, output_file_list_state, latest_file_completed_text, log_files_output, log_files_output_list_state, estimated_time_taken_number, textract_metadata_textbox, pdf_doc_state, all_image_annotations_state, current_loop_page_number, page_break_return, all_line_level_ocr_results_df_state, all_decision_process_table_state, comprehend_query_number], api_name="redact_doc").\
288
+ then(fn=update_annotator, inputs=[all_image_annotations_state, page_min, annotator_zoom_number], outputs=[annotator, annotate_current_page, annotate_current_page_bottom, annotate_previous_page])
289
 
290
  # If the app has completed a batch of pages, it will run this until the end of all pages in the document
291
  current_loop_page_number.change(fn = choose_and_run_redactor, inputs=[in_doc_files, prepared_pdf_state, images_pdf_state, in_redact_language, in_redact_entities, in_redact_comprehend_entities, in_redaction_method, in_allow_list_state, latest_file_completed_text, output_summary, output_file_list_state, log_files_output_list_state, second_loop_state, page_min, page_max, estimated_time_taken_number, handwrite_signature_checkbox, textract_metadata_textbox, all_image_annotations_state, all_line_level_ocr_results_df_state, all_decision_process_table_state, pdf_doc_state, current_loop_page_number, page_break_return, pii_identification_method_drop, comprehend_query_number],
 
396
  then(fn = upload_file_to_s3, inputs=[usage_logs_state, usage_s3_logs_loc_state], outputs=[s3_logs_output_textbox])
397
 
398
  # Launch the Gradio app
399
+ COGNITO_AUTH = get_or_create_env_var('COGNITO_AUTH', '1')
400
  print(f'The value of COGNITO_AUTH is {COGNITO_AUTH}')
401
 
402
  RUN_DIRECT_MODE = get_or_create_env_var('RUN_DIRECT_MODE', '0')
 
410
  if RUN_DIRECT_MODE == "0":
411
 
412
  if os.environ['COGNITO_AUTH'] == "1":
413
+ app.queue(max_size=max_queue_size).launch(show_error=True, auth=authenticate_user, max_file_size=max_file_size, show_api=False)
414
  else:
415
  app.queue(max_size=max_queue_size).launch(show_error=True, inbrowser=True, max_file_size=max_file_size)
416
 
tools/auth.py CHANGED
@@ -1,5 +1,6 @@
1
 
2
  import boto3
 
3
  from tools.helper_functions import get_or_create_env_var
4
 
5
  client_id = get_or_create_env_var('AWS_CLIENT_ID', '') # This client id is borrowed from async gradio app client
@@ -8,7 +9,9 @@ print(f'The value of AWS_CLIENT_ID is {client_id}')
8
  user_pool_id = get_or_create_env_var('AWS_USER_POOL_ID', '')
9
  print(f'The value of AWS_USER_POOL_ID is {user_pool_id}')
10
 
11
- def authenticate_user(username, password, user_pool_id=user_pool_id, client_id=client_id):
 
 
12
  """Authenticates a user against an AWS Cognito user pool.
13
 
14
  Args:
@@ -24,6 +27,7 @@ def authenticate_user(username, password, user_pool_id=user_pool_id, client_id=c
24
  client = boto3.client('cognito-idp') # Cognito Identity Provider client
25
 
26
  try:
 
27
  response = client.initiate_auth(
28
  AuthFlow='USER_PASSWORD_AUTH',
29
  AuthParameters={
@@ -45,4 +49,4 @@ def authenticate_user(username, password, user_pool_id=user_pool_id, client_id=c
45
  return False
46
  except Exception as e:
47
  print(f"An error occurred: {e}")
48
- return False
 
1
 
2
  import boto3
3
+ import gradio as gr
4
  from tools.helper_functions import get_or_create_env_var
5
 
6
  client_id = get_or_create_env_var('AWS_CLIENT_ID', '') # This client id is borrowed from async gradio app client
 
9
  user_pool_id = get_or_create_env_var('AWS_USER_POOL_ID', '')
10
  print(f'The value of AWS_USER_POOL_ID is {user_pool_id}')
11
 
12
+
13
+
14
+ def authenticate_user(username:str, password:str, user_pool_id:str=user_pool_id, client_id:str=client_id):
15
  """Authenticates a user against an AWS Cognito user pool.
16
 
17
  Args:
 
27
  client = boto3.client('cognito-idp') # Cognito Identity Provider client
28
 
29
  try:
30
+
31
  response = client.initiate_auth(
32
  AuthFlow='USER_PASSWORD_AUTH',
33
  AuthParameters={
 
49
  return False
50
  except Exception as e:
51
  print(f"An error occurred: {e}")
52
+ return False
tools/helper_functions.py CHANGED
@@ -211,7 +211,15 @@ def wipe_logs(feedback_logs_loc, usage_logs_loc):
211
  os.remove(usage_logs_loc)
212
  except Exception as e:
213
  print("Could not remove usage logs file", e)
214
-
 
 
 
 
 
 
 
 
215
  async def get_connection_params(request: gr.Request):
216
  base_folder = ""
217
 
@@ -223,29 +231,25 @@ async def get_connection_params(request: gr.Request):
223
  #if 'context' in request_data:
224
  # print("Request context dictionary:", request_data['context'])
225
 
226
- # print("Request headers dictionary:", request.headers)
227
- # print("All host elements", request.client)
228
- # print("IP address:", request.client.host)
229
- # print("Query parameters:", dict(request.query_params))
230
  # To get the underlying FastAPI items you would need to use await and some fancy @ stuff for a live query: https://fastapi.tiangolo.com/vi/reference/request/
231
  #print("Request dictionary to object:", request.request.body())
232
  print("Session hash:", request.session_hash)
233
 
234
- # Retrieving or setting CUSTOM_CLOUDFRONT_HEADER
235
- CUSTOM_CLOUDFRONT_HEADER_var = get_or_create_env_var('CUSTOM_CLOUDFRONT_HEADER', '')
236
- #print(f'The value of CUSTOM_CLOUDFRONT_HEADER is {CUSTOM_CLOUDFRONT_HEADER_var}')
237
-
238
- # Retrieving or setting CUSTOM_CLOUDFRONT_HEADER_VALUE
239
- CUSTOM_CLOUDFRONT_HEADER_VALUE_var = get_or_create_env_var('CUSTOM_CLOUDFRONT_HEADER_VALUE', '')
240
- #print(f'The value of CUSTOM_CLOUDFRONT_HEADER_VALUE_var is {CUSTOM_CLOUDFRONT_HEADER_VALUE_var}')
241
-
242
- if CUSTOM_CLOUDFRONT_HEADER_var and CUSTOM_CLOUDFRONT_HEADER_VALUE_var:
243
- if CUSTOM_CLOUDFRONT_HEADER_var in request.headers:
244
- supplied_cloudfront_custom_value = request.headers[CUSTOM_CLOUDFRONT_HEADER_var]
245
- if supplied_cloudfront_custom_value == CUSTOM_CLOUDFRONT_HEADER_VALUE_var:
246
- print("Custom Cloudfront header found:", supplied_cloudfront_custom_value)
247
  else:
248
- raise(ValueError, "Custom Cloudfront header value does not match expected value.")
 
249
 
250
  # Get output save folder from 1 - username passed in from direct Cognito login, 2 - Cognito ID header passed through a Lambda authenticator, 3 - the session hash.
251
 
 
211
  os.remove(usage_logs_loc)
212
  except Exception as e:
213
  print("Could not remove usage logs file", e)
214
+
215
+ # Retrieving or setting CUSTOM_HEADER
216
+ CUSTOM_HEADER = get_or_create_env_var('CUSTOM_HEADER', 'custom_header')
217
+ print(f'CUSTOM_HEADER found')
218
+
219
+ # Retrieving or setting CUSTOM_HEADER_VALUE
220
+ CUSTOM_HEADER_VALUE = get_or_create_env_var('CUSTOM_HEADER_VALUE', 'custom_header_value')
221
+ print(f'CUSTOM_HEADER_VALUE found')
222
+
223
  async def get_connection_params(request: gr.Request):
224
  base_folder = ""
225
 
 
231
  #if 'context' in request_data:
232
  # print("Request context dictionary:", request_data['context'])
233
 
234
+ print("Request headers dictionary:", request.headers)
235
+ print("All host elements", request.client)
236
+ print("IP address:", request.client.host)
237
+ print("Query parameters:", dict(request.query_params))
238
  # To get the underlying FastAPI items you would need to use await and some fancy @ stuff for a live query: https://fastapi.tiangolo.com/vi/reference/request/
239
  #print("Request dictionary to object:", request.request.body())
240
  print("Session hash:", request.session_hash)
241
 
242
+ if CUSTOM_HEADER and CUSTOM_HEADER_VALUE:
243
+ if CUSTOM_HEADER in request.headers:
244
+ supplied_custom_header_value = request.headers[CUSTOM_HEADER]
245
+ if supplied_custom_header_value == CUSTOM_HEADER_VALUE:
246
+ print("Custom header supplied and matches CUSTOM_HEADER_VALUE")
247
+ else:
248
+ print("Custom header value does not match expected value.")
249
+ raise ValueError("Custom header value does not match expected value.")
 
 
 
 
 
250
  else:
251
+ print("Custom header value not found.")
252
+ raise ValueError("Custom header value not found.")
253
 
254
  # Get output save folder from 1 - username passed in from direct Cognito login, 2 - Cognito ID header passed through a Lambda authenticator, 3 - the session hash.
255