Spaces:
Sleeping
Sleeping
Commit
·
c9e23cb
1
Parent(s):
a03496e
Adapted text join options to review file to be more resilient to changes in image size. Added possibility of using client secret with AWS login
Browse files- app.py +2 -2
- tools/auth.py +39 -9
- tools/file_conversion.py +52 -2
- tools/redaction_review.py +1 -1
app.py
CHANGED
@@ -458,9 +458,9 @@ with app:
|
|
458 |
then(fn = upload_file_to_s3, inputs=[usage_logs_state, usage_s3_logs_loc_state], outputs=[s3_logs_output_textbox])
|
459 |
|
460 |
# Get some environment variables and Launch the Gradio app
|
461 |
-
COGNITO_AUTH = get_or_create_env_var('COGNITO_AUTH', '
|
462 |
print(f'The value of COGNITO_AUTH is {COGNITO_AUTH}')
|
463 |
-
|
464 |
RUN_DIRECT_MODE = get_or_create_env_var('RUN_DIRECT_MODE', '0')
|
465 |
print(f'The value of RUN_DIRECT_MODE is {RUN_DIRECT_MODE}')
|
466 |
|
|
|
458 |
then(fn = upload_file_to_s3, inputs=[usage_logs_state, usage_s3_logs_loc_state], outputs=[s3_logs_output_textbox])
|
459 |
|
460 |
# Get some environment variables and Launch the Gradio app
|
461 |
+
COGNITO_AUTH = get_or_create_env_var('COGNITO_AUTH', '1')
|
462 |
print(f'The value of COGNITO_AUTH is {COGNITO_AUTH}')
|
463 |
+
1
|
464 |
RUN_DIRECT_MODE = get_or_create_env_var('RUN_DIRECT_MODE', '0')
|
465 |
print(f'The value of RUN_DIRECT_MODE is {RUN_DIRECT_MODE}')
|
466 |
|
tools/auth.py
CHANGED
@@ -1,15 +1,31 @@
|
|
1 |
|
2 |
import boto3
|
3 |
import gradio as gr
|
|
|
|
|
|
|
4 |
from tools.helper_functions import get_or_create_env_var
|
5 |
|
6 |
-
client_id = get_or_create_env_var('AWS_CLIENT_ID', '')
|
7 |
print(f'The value of AWS_CLIENT_ID is {client_id}')
|
8 |
|
9 |
-
|
|
|
|
|
|
|
10 |
print(f'The value of AWS_USER_POOL_ID is {user_pool_id}')
|
11 |
|
12 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
"""Authenticates a user against an AWS Cognito user pool.
|
14 |
|
15 |
Args:
|
@@ -17,6 +33,7 @@ def authenticate_user(username:str, password:str, user_pool_id:str=user_pool_id,
|
|
17 |
client_id (str): The ID of the Cognito user pool client.
|
18 |
username (str): The username of the user.
|
19 |
password (str): The password of the user.
|
|
|
20 |
|
21 |
Returns:
|
22 |
bool: True if the user is authenticated, False otherwise.
|
@@ -24,15 +41,28 @@ def authenticate_user(username:str, password:str, user_pool_id:str=user_pool_id,
|
|
24 |
|
25 |
client = boto3.client('cognito-idp') # Cognito Identity Provider client
|
26 |
|
|
|
|
|
|
|
27 |
try:
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
response = client.initiate_auth(
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
36 |
)
|
37 |
|
38 |
# If successful, you'll receive an AuthenticationResult in the response
|
|
|
1 |
|
2 |
import boto3
|
3 |
import gradio as gr
|
4 |
+
import hmac
|
5 |
+
import hashlib
|
6 |
+
import base64
|
7 |
from tools.helper_functions import get_or_create_env_var
|
8 |
|
9 |
+
client_id = get_or_create_env_var('AWS_CLIENT_ID', '3qs30degqvip8ade8iv44c4edf')
|
10 |
print(f'The value of AWS_CLIENT_ID is {client_id}')
|
11 |
|
12 |
+
client_secret = get_or_create_env_var('AWS_CLIENT_SECRET', 'cvgd27dihp88jktc71lmjaq2kgntjdkt6703m63mdfjv9j58mqo')
|
13 |
+
print(f'The value of AWS_CLIENT_SECRET is {client_secret}')
|
14 |
+
|
15 |
+
user_pool_id = get_or_create_env_var('AWS_USER_POOL_ID', 'eu-west-2_7Jhnih7D1')
|
16 |
print(f'The value of AWS_USER_POOL_ID is {user_pool_id}')
|
17 |
|
18 |
+
def calculate_secret_hash(client_id, client_secret, username):
|
19 |
+
message = username + client_id
|
20 |
+
dig = hmac.new(
|
21 |
+
str(client_secret).encode('utf-8'),
|
22 |
+
msg=str(message).encode('utf-8'),
|
23 |
+
digestmod=hashlib.sha256
|
24 |
+
).digest()
|
25 |
+
secret_hash = base64.b64encode(dig).decode()
|
26 |
+
return secret_hash
|
27 |
+
|
28 |
+
def authenticate_user(username:str, password:str, user_pool_id:str=user_pool_id, client_id:str=client_id, client_secret:str=client_secret):
|
29 |
"""Authenticates a user against an AWS Cognito user pool.
|
30 |
|
31 |
Args:
|
|
|
33 |
client_id (str): The ID of the Cognito user pool client.
|
34 |
username (str): The username of the user.
|
35 |
password (str): The password of the user.
|
36 |
+
client_secret (str): The client secret of the app client
|
37 |
|
38 |
Returns:
|
39 |
bool: True if the user is authenticated, False otherwise.
|
|
|
41 |
|
42 |
client = boto3.client('cognito-idp') # Cognito Identity Provider client
|
43 |
|
44 |
+
# Compute the secret hash
|
45 |
+
secret_hash = calculate_secret_hash(client_id, client_secret, username)
|
46 |
+
|
47 |
try:
|
48 |
|
49 |
+
# response = client.initiate_auth(
|
50 |
+
# AuthFlow='USER_PASSWORD_AUTH',
|
51 |
+
# AuthParameters={
|
52 |
+
# 'USERNAME': username,
|
53 |
+
# 'PASSWORD': password,
|
54 |
+
# },
|
55 |
+
# ClientId=client_id
|
56 |
+
# )
|
57 |
+
|
58 |
response = client.initiate_auth(
|
59 |
+
AuthFlow='USER_PASSWORD_AUTH',
|
60 |
+
AuthParameters={
|
61 |
+
'USERNAME': username,
|
62 |
+
'PASSWORD': password,
|
63 |
+
'SECRET_HASH': secret_hash
|
64 |
+
},
|
65 |
+
ClientId=client_id
|
66 |
)
|
67 |
|
68 |
# If successful, you'll receive an AuthenticationResult in the response
|
tools/file_conversion.py
CHANGED
@@ -650,6 +650,53 @@ def convert_text_pdf_to_img_pdf(in_file_path:str, out_text_file_path:List[str],
|
|
650 |
|
651 |
return out_message, out_file_paths
|
652 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
653 |
|
654 |
def convert_review_json_to_pandas_df(data:List[dict], text_join_data=pd.DataFrame) -> pd.DataFrame:
|
655 |
# Flatten the data
|
@@ -691,8 +738,11 @@ def convert_review_json_to_pandas_df(data:List[dict], text_join_data=pd.DataFram
|
|
691 |
text_join_data['page'] = text_join_data['page'].astype(str)
|
692 |
df['page'] = df['page'].astype(str)
|
693 |
text_join_data = text_join_data[['xmin', 'ymin', 'xmax', 'ymax', 'label', 'page', 'text']]
|
694 |
-
|
695 |
-
|
|
|
|
|
|
|
696 |
|
697 |
df = df.merge(text_join_data, left_on = ['xmin1', 'ymin1', 'xmax1', 'ymax1', 'label', 'page'], right_on = ['xmin', 'ymin', 'xmax', 'ymax', 'label', 'page'], how = "left", suffixes=("", "_y"))
|
698 |
|
|
|
650 |
|
651 |
return out_message, out_file_paths
|
652 |
|
653 |
+
# Example DataFrames
|
654 |
+
# df1 = pd.DataFrame({
|
655 |
+
# 'xmin': [10, 20, 30],
|
656 |
+
# 'xmax': [15, 25, 35],
|
657 |
+
# 'ymin': [40, 50, 60],
|
658 |
+
# 'ymax': [45, 55, 65],
|
659 |
+
# 'info1': ['A', 'B', 'C']
|
660 |
+
# })
|
661 |
+
|
662 |
+
# df2 = pd.DataFrame({
|
663 |
+
# 'xmin': [12, 18, 32],
|
664 |
+
# 'xmax': [14, 24, 34],
|
665 |
+
# 'ymin': [42, 48, 62],
|
666 |
+
# 'ymax': [44, 54, 66],
|
667 |
+
# 'info2': ['X', 'Y', 'Z']
|
668 |
+
# })
|
669 |
+
|
670 |
+
def join_values_within_threshold(df1, df2):
|
671 |
+
# Threshold for matching
|
672 |
+
threshold = 5
|
673 |
+
|
674 |
+
# Perform a cross join
|
675 |
+
df1['key'] = 1
|
676 |
+
df2['key'] = 1
|
677 |
+
merged = pd.merge(df1, df2, on='key').drop(columns=['key'])
|
678 |
+
|
679 |
+
# Apply conditions for all columns
|
680 |
+
conditions = (
|
681 |
+
(abs(merged['xmin_x'] - merged['xmin_y']) <= threshold) &
|
682 |
+
(abs(merged['xmax_x'] - merged['xmax_y']) <= threshold) &
|
683 |
+
(abs(merged['ymin_x'] - merged['ymin_y']) <= threshold) &
|
684 |
+
(abs(merged['ymax_x'] - merged['ymax_y']) <= threshold)
|
685 |
+
)
|
686 |
+
|
687 |
+
# Filter rows that satisfy all conditions
|
688 |
+
filtered = merged[conditions]
|
689 |
+
|
690 |
+
# Drop duplicates if needed (e.g., keep only the first match for each row in df1)
|
691 |
+
result = filtered.drop_duplicates(subset=['xmin_x', 'xmax_x', 'ymin_x', 'ymax_x'])
|
692 |
+
|
693 |
+
# Merge back into the original DataFrame (if necessary)
|
694 |
+
final_df = pd.merge(df1, result, left_on=['xmin', 'xmax', 'ymin', 'ymax'], right_on=['xmin_x', 'xmax_x', 'ymin_x', 'ymax_x'], how='left')
|
695 |
+
|
696 |
+
# Clean up extra columns
|
697 |
+
final_df = final_df.drop(columns=['key'])
|
698 |
+
print(final_df)
|
699 |
+
|
700 |
|
701 |
def convert_review_json_to_pandas_df(data:List[dict], text_join_data=pd.DataFrame) -> pd.DataFrame:
|
702 |
# Flatten the data
|
|
|
738 |
text_join_data['page'] = text_join_data['page'].astype(str)
|
739 |
df['page'] = df['page'].astype(str)
|
740 |
text_join_data = text_join_data[['xmin', 'ymin', 'xmax', 'ymax', 'label', 'page', 'text']]
|
741 |
+
# Round to the closest number divisible by 5
|
742 |
+
text_join_data[['xmin', 'ymin', 'xmax', 'ymax']] = (text_join_data[['xmin', 'ymin', 'xmax', 'ymax']].astype(float) / 5).round() * 5
|
743 |
+
text_join_data = text_join_data.drop_duplicates(['xmin', 'ymin', 'xmax', 'ymax', 'label', 'page'])
|
744 |
+
|
745 |
+
df[['xmin1', 'ymin1', 'xmax1', 'ymax1']] = (df[['xmin', 'ymin', 'xmax', 'ymax']].astype(float) / 5).round() * 5
|
746 |
|
747 |
df = df.merge(text_join_data, left_on = ['xmin1', 'ymin1', 'xmax1', 'ymax1', 'label', 'page'], right_on = ['xmin', 'ymin', 'xmax', 'ymax', 'label', 'page'], how = "left", suffixes=("", "_y"))
|
748 |
|
tools/redaction_review.py
CHANGED
@@ -41,7 +41,7 @@ def increase_page(number:int, image_annotator_object:AnnotatedImageData):
|
|
41 |
|
42 |
def update_zoom(current_zoom_level:int, annotate_current_page:int, decrease:bool=True):
|
43 |
if decrease == False:
|
44 |
-
if current_zoom_level >=
|
45 |
current_zoom_level -= 10
|
46 |
else:
|
47 |
if current_zoom_level < 100:
|
|
|
41 |
|
42 |
def update_zoom(current_zoom_level:int, annotate_current_page:int, decrease:bool=True):
|
43 |
if decrease == False:
|
44 |
+
if current_zoom_level >= 70:
|
45 |
current_zoom_level -= 10
|
46 |
else:
|
47 |
if current_zoom_level < 100:
|