seanpedrickcase commited on
Commit
c9e23cb
·
1 Parent(s): a03496e

Adapted text join options to review file to be more resilient to changes in image size. Added possibility of using client secret with AWS login

Browse files
Files changed (4) hide show
  1. app.py +2 -2
  2. tools/auth.py +39 -9
  3. tools/file_conversion.py +52 -2
  4. tools/redaction_review.py +1 -1
app.py CHANGED
@@ -458,9 +458,9 @@ with app:
458
  then(fn = upload_file_to_s3, inputs=[usage_logs_state, usage_s3_logs_loc_state], outputs=[s3_logs_output_textbox])
459
 
460
  # Get some environment variables and Launch the Gradio app
461
- COGNITO_AUTH = get_or_create_env_var('COGNITO_AUTH', '0')
462
  print(f'The value of COGNITO_AUTH is {COGNITO_AUTH}')
463
-
464
  RUN_DIRECT_MODE = get_or_create_env_var('RUN_DIRECT_MODE', '0')
465
  print(f'The value of RUN_DIRECT_MODE is {RUN_DIRECT_MODE}')
466
 
 
458
  then(fn = upload_file_to_s3, inputs=[usage_logs_state, usage_s3_logs_loc_state], outputs=[s3_logs_output_textbox])
459
 
460
  # Get some environment variables and Launch the Gradio app
461
+ COGNITO_AUTH = get_or_create_env_var('COGNITO_AUTH', '1')
462
  print(f'The value of COGNITO_AUTH is {COGNITO_AUTH}')
463
+ 1
464
  RUN_DIRECT_MODE = get_or_create_env_var('RUN_DIRECT_MODE', '0')
465
  print(f'The value of RUN_DIRECT_MODE is {RUN_DIRECT_MODE}')
466
 
tools/auth.py CHANGED
@@ -1,15 +1,31 @@
1
 
2
  import boto3
3
  import gradio as gr
 
 
 
4
  from tools.helper_functions import get_or_create_env_var
5
 
6
- client_id = get_or_create_env_var('AWS_CLIENT_ID', '') # This client id is borrowed from async gradio app client
7
  print(f'The value of AWS_CLIENT_ID is {client_id}')
8
 
9
- user_pool_id = get_or_create_env_var('AWS_USER_POOL_ID', '')
 
 
 
10
  print(f'The value of AWS_USER_POOL_ID is {user_pool_id}')
11
 
12
- def authenticate_user(username:str, password:str, user_pool_id:str=user_pool_id, client_id:str=client_id):
 
 
 
 
 
 
 
 
 
 
13
  """Authenticates a user against an AWS Cognito user pool.
14
 
15
  Args:
@@ -17,6 +33,7 @@ def authenticate_user(username:str, password:str, user_pool_id:str=user_pool_id,
17
  client_id (str): The ID of the Cognito user pool client.
18
  username (str): The username of the user.
19
  password (str): The password of the user.
 
20
 
21
  Returns:
22
  bool: True if the user is authenticated, False otherwise.
@@ -24,15 +41,28 @@ def authenticate_user(username:str, password:str, user_pool_id:str=user_pool_id,
24
 
25
  client = boto3.client('cognito-idp') # Cognito Identity Provider client
26
 
 
 
 
27
  try:
28
 
 
 
 
 
 
 
 
 
 
29
  response = client.initiate_auth(
30
- AuthFlow='USER_PASSWORD_AUTH',
31
- AuthParameters={
32
- 'USERNAME': username,
33
- 'PASSWORD': password,
34
- },
35
- ClientId=client_id
 
36
  )
37
 
38
  # If successful, you'll receive an AuthenticationResult in the response
 
1
 
2
  import boto3
3
  import gradio as gr
4
+ import hmac
5
+ import hashlib
6
+ import base64
7
  from tools.helper_functions import get_or_create_env_var
8
 
9
+ client_id = get_or_create_env_var('AWS_CLIENT_ID', '3qs30degqvip8ade8iv44c4edf')
10
  print(f'The value of AWS_CLIENT_ID is {client_id}')
11
 
12
+ client_secret = get_or_create_env_var('AWS_CLIENT_SECRET', 'cvgd27dihp88jktc71lmjaq2kgntjdkt6703m63mdfjv9j58mqo')
13
+ print(f'The value of AWS_CLIENT_SECRET is {client_secret}')
14
+
15
+ user_pool_id = get_or_create_env_var('AWS_USER_POOL_ID', 'eu-west-2_7Jhnih7D1')
16
  print(f'The value of AWS_USER_POOL_ID is {user_pool_id}')
17
 
18
+ def calculate_secret_hash(client_id, client_secret, username):
19
+ message = username + client_id
20
+ dig = hmac.new(
21
+ str(client_secret).encode('utf-8'),
22
+ msg=str(message).encode('utf-8'),
23
+ digestmod=hashlib.sha256
24
+ ).digest()
25
+ secret_hash = base64.b64encode(dig).decode()
26
+ return secret_hash
27
+
28
+ def authenticate_user(username:str, password:str, user_pool_id:str=user_pool_id, client_id:str=client_id, client_secret:str=client_secret):
29
  """Authenticates a user against an AWS Cognito user pool.
30
 
31
  Args:
 
33
  client_id (str): The ID of the Cognito user pool client.
34
  username (str): The username of the user.
35
  password (str): The password of the user.
36
+ client_secret (str): The client secret of the app client
37
 
38
  Returns:
39
  bool: True if the user is authenticated, False otherwise.
 
41
 
42
  client = boto3.client('cognito-idp') # Cognito Identity Provider client
43
 
44
+ # Compute the secret hash
45
+ secret_hash = calculate_secret_hash(client_id, client_secret, username)
46
+
47
  try:
48
 
49
+ # response = client.initiate_auth(
50
+ # AuthFlow='USER_PASSWORD_AUTH',
51
+ # AuthParameters={
52
+ # 'USERNAME': username,
53
+ # 'PASSWORD': password,
54
+ # },
55
+ # ClientId=client_id
56
+ # )
57
+
58
  response = client.initiate_auth(
59
+ AuthFlow='USER_PASSWORD_AUTH',
60
+ AuthParameters={
61
+ 'USERNAME': username,
62
+ 'PASSWORD': password,
63
+ 'SECRET_HASH': secret_hash
64
+ },
65
+ ClientId=client_id
66
  )
67
 
68
  # If successful, you'll receive an AuthenticationResult in the response
tools/file_conversion.py CHANGED
@@ -650,6 +650,53 @@ def convert_text_pdf_to_img_pdf(in_file_path:str, out_text_file_path:List[str],
650
 
651
  return out_message, out_file_paths
652
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
653
 
654
  def convert_review_json_to_pandas_df(data:List[dict], text_join_data=pd.DataFrame) -> pd.DataFrame:
655
  # Flatten the data
@@ -691,8 +738,11 @@ def convert_review_json_to_pandas_df(data:List[dict], text_join_data=pd.DataFram
691
  text_join_data['page'] = text_join_data['page'].astype(str)
692
  df['page'] = df['page'].astype(str)
693
  text_join_data = text_join_data[['xmin', 'ymin', 'xmax', 'ymax', 'label', 'page', 'text']]
694
- text_join_data[['xmin', 'ymin', 'xmax', 'ymax']] = text_join_data[['xmin', 'ymin', 'xmax', 'ymax']].astype(float).round(0)
695
- df[['xmin1', 'ymin1', 'xmax1', 'ymax1']] = df[['xmin', 'ymin', 'xmax', 'ymax']].astype(float).round(0)
 
 
 
696
 
697
  df = df.merge(text_join_data, left_on = ['xmin1', 'ymin1', 'xmax1', 'ymax1', 'label', 'page'], right_on = ['xmin', 'ymin', 'xmax', 'ymax', 'label', 'page'], how = "left", suffixes=("", "_y"))
698
 
 
650
 
651
  return out_message, out_file_paths
652
 
653
+ # Example DataFrames
654
+ # df1 = pd.DataFrame({
655
+ # 'xmin': [10, 20, 30],
656
+ # 'xmax': [15, 25, 35],
657
+ # 'ymin': [40, 50, 60],
658
+ # 'ymax': [45, 55, 65],
659
+ # 'info1': ['A', 'B', 'C']
660
+ # })
661
+
662
+ # df2 = pd.DataFrame({
663
+ # 'xmin': [12, 18, 32],
664
+ # 'xmax': [14, 24, 34],
665
+ # 'ymin': [42, 48, 62],
666
+ # 'ymax': [44, 54, 66],
667
+ # 'info2': ['X', 'Y', 'Z']
668
+ # })
669
+
670
+ def join_values_within_threshold(df1, df2):
671
+ # Threshold for matching
672
+ threshold = 5
673
+
674
+ # Perform a cross join
675
+ df1['key'] = 1
676
+ df2['key'] = 1
677
+ merged = pd.merge(df1, df2, on='key').drop(columns=['key'])
678
+
679
+ # Apply conditions for all columns
680
+ conditions = (
681
+ (abs(merged['xmin_x'] - merged['xmin_y']) <= threshold) &
682
+ (abs(merged['xmax_x'] - merged['xmax_y']) <= threshold) &
683
+ (abs(merged['ymin_x'] - merged['ymin_y']) <= threshold) &
684
+ (abs(merged['ymax_x'] - merged['ymax_y']) <= threshold)
685
+ )
686
+
687
+ # Filter rows that satisfy all conditions
688
+ filtered = merged[conditions]
689
+
690
+ # Drop duplicates if needed (e.g., keep only the first match for each row in df1)
691
+ result = filtered.drop_duplicates(subset=['xmin_x', 'xmax_x', 'ymin_x', 'ymax_x'])
692
+
693
+ # Merge back into the original DataFrame (if necessary)
694
+ final_df = pd.merge(df1, result, left_on=['xmin', 'xmax', 'ymin', 'ymax'], right_on=['xmin_x', 'xmax_x', 'ymin_x', 'ymax_x'], how='left')
695
+
696
+ # Clean up extra columns
697
+ final_df = final_df.drop(columns=['key'])
698
+ print(final_df)
699
+
700
 
701
  def convert_review_json_to_pandas_df(data:List[dict], text_join_data=pd.DataFrame) -> pd.DataFrame:
702
  # Flatten the data
 
738
  text_join_data['page'] = text_join_data['page'].astype(str)
739
  df['page'] = df['page'].astype(str)
740
  text_join_data = text_join_data[['xmin', 'ymin', 'xmax', 'ymax', 'label', 'page', 'text']]
741
+ # Round to the closest number divisible by 5
742
+ text_join_data[['xmin', 'ymin', 'xmax', 'ymax']] = (text_join_data[['xmin', 'ymin', 'xmax', 'ymax']].astype(float) / 5).round() * 5
743
+ text_join_data = text_join_data.drop_duplicates(['xmin', 'ymin', 'xmax', 'ymax', 'label', 'page'])
744
+
745
+ df[['xmin1', 'ymin1', 'xmax1', 'ymax1']] = (df[['xmin', 'ymin', 'xmax', 'ymax']].astype(float) / 5).round() * 5
746
 
747
  df = df.merge(text_join_data, left_on = ['xmin1', 'ymin1', 'xmax1', 'ymax1', 'label', 'page'], right_on = ['xmin', 'ymin', 'xmax', 'ymax', 'label', 'page'], how = "left", suffixes=("", "_y"))
748
 
tools/redaction_review.py CHANGED
@@ -41,7 +41,7 @@ def increase_page(number:int, image_annotator_object:AnnotatedImageData):
41
 
42
  def update_zoom(current_zoom_level:int, annotate_current_page:int, decrease:bool=True):
43
  if decrease == False:
44
- if current_zoom_level >= 50:
45
  current_zoom_level -= 10
46
  else:
47
  if current_zoom_level < 100:
 
41
 
42
  def update_zoom(current_zoom_level:int, annotate_current_page:int, decrease:bool=True):
43
  if decrease == False:
44
+ if current_zoom_level >= 70:
45
  current_zoom_level -= 10
46
  else:
47
  if current_zoom_level < 100: