Spaces:
Sleeping
Sleeping
Commit
·
bf7bb79
1
Parent(s):
68a91f4
Modified Dockerfile hopefully to not need Lambda overrides. Looking into custom headers from Cloudfront to try to get them to work
Browse files- Dockerfile +12 -6
- app.py +3 -4
- tools/auth.py +6 -2
- tools/helper_functions.py +23 -19
Dockerfile
CHANGED
@@ -51,12 +51,15 @@ RUN mkdir -p /home/user/app/output \
|
|
51 |
# Copy installed packages from builder stage
|
52 |
COPY --from=builder /install /usr/local/lib/python3.11/site-packages/
|
53 |
|
54 |
-
# Use a conditional entrypoint based on the APP_MODE argument
|
55 |
-
RUN if [ "$APP_MODE" = "lambda" ]; then \
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
60 |
|
61 |
# Switch to the "user" user
|
62 |
USER user
|
@@ -71,6 +74,7 @@ ENV HOME=/home/user \
|
|
71 |
GRADIO_NUM_PORTS=1 \
|
72 |
GRADIO_SERVER_NAME=0.0.0.0 \
|
73 |
GRADIO_SERVER_PORT=7860 \
|
|
|
74 |
GRADIO_THEME=huggingface \
|
75 |
TLDEXTRACT_CACHE=$HOME/app/tld/.tld_set_snapshot \
|
76 |
SYSTEM=spaces
|
@@ -81,6 +85,8 @@ WORKDIR $HOME/app
|
|
81 |
# Copy the app code to the container
|
82 |
COPY --chown=user . $HOME/app
|
83 |
|
|
|
|
|
84 |
ENTRYPOINT [ "/entrypoint.sh" ]
|
85 |
|
86 |
# Default command for Lambda mode
|
|
|
51 |
# Copy installed packages from builder stage
|
52 |
COPY --from=builder /install /usr/local/lib/python3.11/site-packages/
|
53 |
|
54 |
+
# Use a conditional entrypoint based on the APP_MODE argument (deprecated, now created beforehand in folder)
|
55 |
+
# RUN if [ "$APP_MODE" = "lambda" ]; then \
|
56 |
+
# echo '#!/bin/sh\nexec python -m awslambdaric' > /entrypoint.sh; \
|
57 |
+
# else \
|
58 |
+
# echo '#!/bin/sh\nexec python app.py' > /entrypoint.sh; \
|
59 |
+
# fi && chmod +x /entrypoint.sh
|
60 |
+
|
61 |
+
# Entrypoint helps to switch between Gradio and Lambda mode
|
62 |
+
COPY entrypoint.sh /entrypoint.sh
|
63 |
|
64 |
# Switch to the "user" user
|
65 |
USER user
|
|
|
74 |
GRADIO_NUM_PORTS=1 \
|
75 |
GRADIO_SERVER_NAME=0.0.0.0 \
|
76 |
GRADIO_SERVER_PORT=7860 \
|
77 |
+
GRADIO_ANALYTICS_ENABLED=False \
|
78 |
GRADIO_THEME=huggingface \
|
79 |
TLDEXTRACT_CACHE=$HOME/app/tld/.tld_set_snapshot \
|
80 |
SYSTEM=spaces
|
|
|
85 |
# Copy the app code to the container
|
86 |
COPY --chown=user . $HOME/app
|
87 |
|
88 |
+
RUN chmod +x /entrypoint.sh
|
89 |
+
|
90 |
ENTRYPOINT [ "/entrypoint.sh" ]
|
91 |
|
92 |
# Default command for Lambda mode
|
app.py
CHANGED
@@ -19,7 +19,6 @@ from tools.auth import authenticate_user
|
|
19 |
from tools.load_spacy_model_custom_recognisers import custom_entities
|
20 |
from tools.custom_csvlogger import CSVLogger_custom
|
21 |
|
22 |
-
|
23 |
today_rev = datetime.now().strftime("%Y%m%d")
|
24 |
|
25 |
add_folder_to_path("tesseract/")
|
@@ -286,7 +285,7 @@ with app:
|
|
286 |
then(fn = prepare_image_or_pdf, inputs=[in_doc_files, in_redaction_method, in_allow_list, latest_file_completed_text, output_summary, first_loop_state, annotate_max_pages, current_loop_page_number, all_image_annotations_state], outputs=[output_summary, prepared_pdf_state, images_pdf_state, annotate_max_pages, annotate_max_pages_bottom, pdf_doc_state, all_image_annotations_state], api_name="prepare_doc").\
|
287 |
then(fn = choose_and_run_redactor, inputs=[in_doc_files, prepared_pdf_state, images_pdf_state, in_redact_language, in_redact_entities, in_redact_comprehend_entities, in_redaction_method, in_allow_list_state, latest_file_completed_text, output_summary, output_file_list_state, log_files_output_list_state, first_loop_state, page_min, page_max, estimated_time_taken_number, handwrite_signature_checkbox, textract_metadata_textbox, all_image_annotations_state, all_line_level_ocr_results_df_state, all_decision_process_table_state, pdf_doc_state, current_loop_page_number, page_break_return, pii_identification_method_drop, comprehend_query_number],
|
288 |
outputs=[output_summary, output_file, output_file_list_state, latest_file_completed_text, log_files_output, log_files_output_list_state, estimated_time_taken_number, textract_metadata_textbox, pdf_doc_state, all_image_annotations_state, current_loop_page_number, page_break_return, all_line_level_ocr_results_df_state, all_decision_process_table_state, comprehend_query_number], api_name="redact_doc").\
|
289 |
-
then(fn=update_annotator, inputs=[all_image_annotations_state, page_min, annotator_zoom_number], outputs=[annotator, annotate_current_page, annotate_current_page_bottom])
|
290 |
|
291 |
# If the app has completed a batch of pages, it will run this until the end of all pages in the document
|
292 |
current_loop_page_number.change(fn = choose_and_run_redactor, inputs=[in_doc_files, prepared_pdf_state, images_pdf_state, in_redact_language, in_redact_entities, in_redact_comprehend_entities, in_redaction_method, in_allow_list_state, latest_file_completed_text, output_summary, output_file_list_state, log_files_output_list_state, second_loop_state, page_min, page_max, estimated_time_taken_number, handwrite_signature_checkbox, textract_metadata_textbox, all_image_annotations_state, all_line_level_ocr_results_df_state, all_decision_process_table_state, pdf_doc_state, current_loop_page_number, page_break_return, pii_identification_method_drop, comprehend_query_number],
|
@@ -397,7 +396,7 @@ with app:
|
|
397 |
then(fn = upload_file_to_s3, inputs=[usage_logs_state, usage_s3_logs_loc_state], outputs=[s3_logs_output_textbox])
|
398 |
|
399 |
# Launch the Gradio app
|
400 |
-
COGNITO_AUTH = get_or_create_env_var('COGNITO_AUTH', '
|
401 |
print(f'The value of COGNITO_AUTH is {COGNITO_AUTH}')
|
402 |
|
403 |
RUN_DIRECT_MODE = get_or_create_env_var('RUN_DIRECT_MODE', '0')
|
@@ -411,7 +410,7 @@ if __name__ == "__main__":
|
|
411 |
if RUN_DIRECT_MODE == "0":
|
412 |
|
413 |
if os.environ['COGNITO_AUTH'] == "1":
|
414 |
-
app.queue(max_size=max_queue_size).launch(show_error=True, auth=authenticate_user, max_file_size=max_file_size)
|
415 |
else:
|
416 |
app.queue(max_size=max_queue_size).launch(show_error=True, inbrowser=True, max_file_size=max_file_size)
|
417 |
|
|
|
19 |
from tools.load_spacy_model_custom_recognisers import custom_entities
|
20 |
from tools.custom_csvlogger import CSVLogger_custom
|
21 |
|
|
|
22 |
today_rev = datetime.now().strftime("%Y%m%d")
|
23 |
|
24 |
add_folder_to_path("tesseract/")
|
|
|
285 |
then(fn = prepare_image_or_pdf, inputs=[in_doc_files, in_redaction_method, in_allow_list, latest_file_completed_text, output_summary, first_loop_state, annotate_max_pages, current_loop_page_number, all_image_annotations_state], outputs=[output_summary, prepared_pdf_state, images_pdf_state, annotate_max_pages, annotate_max_pages_bottom, pdf_doc_state, all_image_annotations_state], api_name="prepare_doc").\
|
286 |
then(fn = choose_and_run_redactor, inputs=[in_doc_files, prepared_pdf_state, images_pdf_state, in_redact_language, in_redact_entities, in_redact_comprehend_entities, in_redaction_method, in_allow_list_state, latest_file_completed_text, output_summary, output_file_list_state, log_files_output_list_state, first_loop_state, page_min, page_max, estimated_time_taken_number, handwrite_signature_checkbox, textract_metadata_textbox, all_image_annotations_state, all_line_level_ocr_results_df_state, all_decision_process_table_state, pdf_doc_state, current_loop_page_number, page_break_return, pii_identification_method_drop, comprehend_query_number],
|
287 |
outputs=[output_summary, output_file, output_file_list_state, latest_file_completed_text, log_files_output, log_files_output_list_state, estimated_time_taken_number, textract_metadata_textbox, pdf_doc_state, all_image_annotations_state, current_loop_page_number, page_break_return, all_line_level_ocr_results_df_state, all_decision_process_table_state, comprehend_query_number], api_name="redact_doc").\
|
288 |
+
then(fn=update_annotator, inputs=[all_image_annotations_state, page_min, annotator_zoom_number], outputs=[annotator, annotate_current_page, annotate_current_page_bottom, annotate_previous_page])
|
289 |
|
290 |
# If the app has completed a batch of pages, it will run this until the end of all pages in the document
|
291 |
current_loop_page_number.change(fn = choose_and_run_redactor, inputs=[in_doc_files, prepared_pdf_state, images_pdf_state, in_redact_language, in_redact_entities, in_redact_comprehend_entities, in_redaction_method, in_allow_list_state, latest_file_completed_text, output_summary, output_file_list_state, log_files_output_list_state, second_loop_state, page_min, page_max, estimated_time_taken_number, handwrite_signature_checkbox, textract_metadata_textbox, all_image_annotations_state, all_line_level_ocr_results_df_state, all_decision_process_table_state, pdf_doc_state, current_loop_page_number, page_break_return, pii_identification_method_drop, comprehend_query_number],
|
|
|
396 |
then(fn = upload_file_to_s3, inputs=[usage_logs_state, usage_s3_logs_loc_state], outputs=[s3_logs_output_textbox])
|
397 |
|
398 |
# Launch the Gradio app
|
399 |
+
COGNITO_AUTH = get_or_create_env_var('COGNITO_AUTH', '1')
|
400 |
print(f'The value of COGNITO_AUTH is {COGNITO_AUTH}')
|
401 |
|
402 |
RUN_DIRECT_MODE = get_or_create_env_var('RUN_DIRECT_MODE', '0')
|
|
|
410 |
if RUN_DIRECT_MODE == "0":
|
411 |
|
412 |
if os.environ['COGNITO_AUTH'] == "1":
|
413 |
+
app.queue(max_size=max_queue_size).launch(show_error=True, auth=authenticate_user, max_file_size=max_file_size, show_api=False)
|
414 |
else:
|
415 |
app.queue(max_size=max_queue_size).launch(show_error=True, inbrowser=True, max_file_size=max_file_size)
|
416 |
|
tools/auth.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
|
2 |
import boto3
|
|
|
3 |
from tools.helper_functions import get_or_create_env_var
|
4 |
|
5 |
client_id = get_or_create_env_var('AWS_CLIENT_ID', '') # This client id is borrowed from async gradio app client
|
@@ -8,7 +9,9 @@ print(f'The value of AWS_CLIENT_ID is {client_id}')
|
|
8 |
user_pool_id = get_or_create_env_var('AWS_USER_POOL_ID', '')
|
9 |
print(f'The value of AWS_USER_POOL_ID is {user_pool_id}')
|
10 |
|
11 |
-
|
|
|
|
|
12 |
"""Authenticates a user against an AWS Cognito user pool.
|
13 |
|
14 |
Args:
|
@@ -24,6 +27,7 @@ def authenticate_user(username, password, user_pool_id=user_pool_id, client_id=c
|
|
24 |
client = boto3.client('cognito-idp') # Cognito Identity Provider client
|
25 |
|
26 |
try:
|
|
|
27 |
response = client.initiate_auth(
|
28 |
AuthFlow='USER_PASSWORD_AUTH',
|
29 |
AuthParameters={
|
@@ -45,4 +49,4 @@ def authenticate_user(username, password, user_pool_id=user_pool_id, client_id=c
|
|
45 |
return False
|
46 |
except Exception as e:
|
47 |
print(f"An error occurred: {e}")
|
48 |
-
return False
|
|
|
1 |
|
2 |
import boto3
|
3 |
+
import gradio as gr
|
4 |
from tools.helper_functions import get_or_create_env_var
|
5 |
|
6 |
client_id = get_or_create_env_var('AWS_CLIENT_ID', '') # This client id is borrowed from async gradio app client
|
|
|
9 |
user_pool_id = get_or_create_env_var('AWS_USER_POOL_ID', '')
|
10 |
print(f'The value of AWS_USER_POOL_ID is {user_pool_id}')
|
11 |
|
12 |
+
|
13 |
+
|
14 |
+
def authenticate_user(username:str, password:str, user_pool_id:str=user_pool_id, client_id:str=client_id):
|
15 |
"""Authenticates a user against an AWS Cognito user pool.
|
16 |
|
17 |
Args:
|
|
|
27 |
client = boto3.client('cognito-idp') # Cognito Identity Provider client
|
28 |
|
29 |
try:
|
30 |
+
|
31 |
response = client.initiate_auth(
|
32 |
AuthFlow='USER_PASSWORD_AUTH',
|
33 |
AuthParameters={
|
|
|
49 |
return False
|
50 |
except Exception as e:
|
51 |
print(f"An error occurred: {e}")
|
52 |
+
return False
|
tools/helper_functions.py
CHANGED
@@ -211,7 +211,15 @@ def wipe_logs(feedback_logs_loc, usage_logs_loc):
|
|
211 |
os.remove(usage_logs_loc)
|
212 |
except Exception as e:
|
213 |
print("Could not remove usage logs file", e)
|
214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
async def get_connection_params(request: gr.Request):
|
216 |
base_folder = ""
|
217 |
|
@@ -223,29 +231,25 @@ async def get_connection_params(request: gr.Request):
|
|
223 |
#if 'context' in request_data:
|
224 |
# print("Request context dictionary:", request_data['context'])
|
225 |
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
# To get the underlying FastAPI items you would need to use await and some fancy @ stuff for a live query: https://fastapi.tiangolo.com/vi/reference/request/
|
231 |
#print("Request dictionary to object:", request.request.body())
|
232 |
print("Session hash:", request.session_hash)
|
233 |
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
if CUSTOM_CLOUDFRONT_HEADER_var and CUSTOM_CLOUDFRONT_HEADER_VALUE_var:
|
243 |
-
if CUSTOM_CLOUDFRONT_HEADER_var in request.headers:
|
244 |
-
supplied_cloudfront_custom_value = request.headers[CUSTOM_CLOUDFRONT_HEADER_var]
|
245 |
-
if supplied_cloudfront_custom_value == CUSTOM_CLOUDFRONT_HEADER_VALUE_var:
|
246 |
-
print("Custom Cloudfront header found:", supplied_cloudfront_custom_value)
|
247 |
else:
|
248 |
-
|
|
|
249 |
|
250 |
# Get output save folder from 1 - username passed in from direct Cognito login, 2 - Cognito ID header passed through a Lambda authenticator, 3 - the session hash.
|
251 |
|
|
|
211 |
os.remove(usage_logs_loc)
|
212 |
except Exception as e:
|
213 |
print("Could not remove usage logs file", e)
|
214 |
+
|
215 |
+
# Retrieving or setting CUSTOM_HEADER
|
216 |
+
CUSTOM_HEADER = get_or_create_env_var('CUSTOM_HEADER', 'custom_header')
|
217 |
+
print(f'CUSTOM_HEADER found')
|
218 |
+
|
219 |
+
# Retrieving or setting CUSTOM_HEADER_VALUE
|
220 |
+
CUSTOM_HEADER_VALUE = get_or_create_env_var('CUSTOM_HEADER_VALUE', 'custom_header_value')
|
221 |
+
print(f'CUSTOM_HEADER_VALUE found')
|
222 |
+
|
223 |
async def get_connection_params(request: gr.Request):
|
224 |
base_folder = ""
|
225 |
|
|
|
231 |
#if 'context' in request_data:
|
232 |
# print("Request context dictionary:", request_data['context'])
|
233 |
|
234 |
+
print("Request headers dictionary:", request.headers)
|
235 |
+
print("All host elements", request.client)
|
236 |
+
print("IP address:", request.client.host)
|
237 |
+
print("Query parameters:", dict(request.query_params))
|
238 |
# To get the underlying FastAPI items you would need to use await and some fancy @ stuff for a live query: https://fastapi.tiangolo.com/vi/reference/request/
|
239 |
#print("Request dictionary to object:", request.request.body())
|
240 |
print("Session hash:", request.session_hash)
|
241 |
|
242 |
+
if CUSTOM_HEADER and CUSTOM_HEADER_VALUE:
|
243 |
+
if CUSTOM_HEADER in request.headers:
|
244 |
+
supplied_custom_header_value = request.headers[CUSTOM_HEADER]
|
245 |
+
if supplied_custom_header_value == CUSTOM_HEADER_VALUE:
|
246 |
+
print("Custom header supplied and matches CUSTOM_HEADER_VALUE")
|
247 |
+
else:
|
248 |
+
print("Custom header value does not match expected value.")
|
249 |
+
raise ValueError("Custom header value does not match expected value.")
|
|
|
|
|
|
|
|
|
|
|
250 |
else:
|
251 |
+
print("Custom header value not found.")
|
252 |
+
raise ValueError("Custom header value not found.")
|
253 |
|
254 |
# Get output save folder from 1 - username passed in from direct Cognito login, 2 - Cognito ID header passed through a Lambda authenticator, 3 - the session hash.
|
255 |
|