seanpedrickcase commited on
Commit
bbf844d
·
1 Parent(s): 6a6aac2

Added examples to tops of various tabs to demonstrate basic functions (optional). Minor changes to example csv ocr output

Browse files
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import os
2
- import time
3
 
4
  import gradio as gr
5
  import pandas as pd
@@ -95,6 +94,7 @@ from tools.config import (
95
  SAVE_LOGS_TO_DYNAMODB,
96
  SESSION_OUTPUT_FOLDER,
97
  SHOW_COSTS,
 
98
  SHOW_LANGUAGE_SELECTION,
99
  SHOW_WHOLE_DOCUMENT_TEXTRACT_CALL_OPTIONS,
100
  TABULAR_PII_DETECTION_MODELS,
@@ -206,8 +206,11 @@ pd.set_option("future.no_silent_downcasting", True)
206
  ensure_folder_exists(CONFIG_FOLDER)
207
  ensure_folder_exists(OUTPUT_FOLDER)
208
  ensure_folder_exists(INPUT_FOLDER)
209
- ensure_folder_exists(GRADIO_TEMP_DIR)
210
- ensure_folder_exists(MPLCONFIGDIR)
 
 
 
211
  ensure_folder_exists(FEEDBACK_LOGS_FOLDER)
212
  ensure_folder_exists(ACCESS_LOGS_FOLDER)
213
  ensure_folder_exists(USAGE_LOGS_FOLDER)
@@ -291,79 +294,116 @@ if DEFAULT_HANDWRITE_SIGNATURE_CHECKBOX:
291
  CHOSEN_COMPREHEND_ENTITIES.extend(custom_entities)
292
  FULL_COMPREHEND_ENTITY_LIST.extend(custom_entities)
293
 
294
- FILE_INPUT_HEIGHT = int(FILE_INPUT_HEIGHT)
295
-
296
-
297
- # Wrapper functions to add timing to deduplication functions
298
- def run_duplicate_analysis_with_timing(
299
- files,
300
- threshold,
301
- min_words,
302
- min_consecutive,
303
- greedy_match,
304
- combine_pages,
305
- output_folder,
306
- ):
307
- """
308
- Wrapper for run_duplicate_analysis that adds timing and returns time taken.
309
- """
310
- start_time = time.time()
311
- results_df, output_paths, full_data_by_file = run_duplicate_analysis(
312
- files=files,
313
- threshold=threshold,
314
- min_words=min_words,
315
- min_consecutive=min_consecutive,
316
- greedy_match=greedy_match,
317
- combine_pages=combine_pages,
318
- output_folder=output_folder,
319
- )
320
- end_time = time.time()
321
- processing_time = end_time - start_time
322
-
323
- # Store the time taken in a global variable for logging
324
- global duplicate_analysis_time_taken
325
- duplicate_analysis_time_taken = processing_time
326
-
327
- return results_df, output_paths, full_data_by_file
328
-
329
-
330
- def run_tabular_duplicate_detection_with_timing(
331
- files,
332
- threshold,
333
- min_words,
334
- text_columns,
335
- output_folder,
336
- do_initial_clean_dup,
337
- in_excel_tabular_sheets,
338
- remove_duplicate_rows,
339
- ):
340
- """
341
- Wrapper for run_tabular_duplicate_detection that adds timing and returns time taken.
342
- """
343
- start_time = time.time()
344
- results_df, output_paths, file_choices = run_tabular_duplicate_detection(
345
- files=files,
346
- threshold=threshold,
347
- min_words=min_words,
348
- text_columns=text_columns,
349
- output_folder=output_folder,
350
- do_initial_clean_dup=do_initial_clean_dup,
351
- in_excel_tabular_sheets=in_excel_tabular_sheets,
352
- remove_duplicate_rows=remove_duplicate_rows,
353
- )
354
- end_time = time.time()
355
- processing_time = end_time - start_time
356
-
357
- # Store the time taken in a global variable for logging
358
- global tabular_duplicate_analysis_time_taken
359
- tabular_duplicate_analysis_time_taken = processing_time
360
-
361
- return results_df, output_paths, file_choices
362
-
363
-
364
- # Initialize global variables for timing
365
- duplicate_analysis_time_taken = 0.0
366
- tabular_duplicate_analysis_time_taken = 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
 
368
  # Create the gradio interface
369
  app = gr.Blocks(
@@ -967,35 +1007,105 @@ with app:
967
  # REDACTION PDF/IMAGES TABLE
968
  ###
969
  with gr.Tab("Redact PDFs/images"):
970
- with gr.Accordion("Redact document", open=True):
971
- in_doc_files = gr.File(
972
- label="Choose a PDF document or image file (PDF, JPG, PNG)",
973
- file_count="multiple",
974
- file_types=[".pdf", ".jpg", ".png", ".json", ".zip"],
975
- height=FILE_INPUT_HEIGHT,
976
- )
977
 
978
- text_extract_method_radio = gr.Radio(
979
- label="""Choose text extraction method. Local options are lower quality but cost nothing - they may be worth a try if you are willing to spend some time reviewing outputs. AWS Textract has a cost per page - £2.66 ($3.50) per 1,000 pages with signature detection (default), £1.14 ($1.50) without. Change the settings in the tab below (AWS Textract signature detection) to change this.""",
980
- value=DEFAULT_TEXT_EXTRACTION_MODEL,
981
- choices=TEXT_EXTRACTION_MODELS,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
982
  )
983
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
984
  with gr.Accordion(
985
  "Enable AWS Textract signature detection (default is off)", open=False
986
  ):
987
- handwrite_signature_checkbox = gr.CheckboxGroup(
988
- label="AWS Textract extraction settings",
989
- choices=HANDWRITE_SIGNATURE_TEXTBOX_FULL_OPTIONS,
990
- value=DEFAULT_HANDWRITE_SIGNATURE_CHECKBOX,
991
- )
 
992
 
993
  with gr.Row(equal_height=True):
994
- pii_identification_method_drop = gr.Radio(
995
- label="""Choose personal information detection method. The local model is lower quality but costs nothing - it may be worth a try if you are willing to spend some time reviewing outputs, or if you are only interested in searching for custom search terms (see Redaction settings - custom deny list). AWS Comprehend has a cost of around £0.0075 ($0.01) per 10,000 characters.""",
996
- value=DEFAULT_PII_DETECTION_MODEL,
997
- choices=PII_DETECTION_MODELS,
998
- )
 
999
 
1000
  if SHOW_COSTS == "True":
1001
  with gr.Accordion(
@@ -1536,30 +1646,72 @@ with app:
1536
  "Search for duplicate pages/subdocuments in your ocr_output files. By default, this function will search for duplicate text across multiple pages, and then join consecutive matching pages together into matched 'subdocuments'. The results can be reviewed below, false positives removed, and then the verified results applied to a document you have loaded in on the 'Review redactions' tab."
1537
  )
1538
 
1539
- with gr.Accordion("Step 1: Configure and run analysis", open=True):
1540
- in_duplicate_pages = gr.File(
1541
- label="Upload one or multiple 'ocr_output.csv' files to find duplicate pages and subdocuments",
1542
- file_count="multiple",
1543
- height=FILE_INPUT_HEIGHT,
1544
- file_types=[".csv"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1545
  )
1546
 
 
 
 
 
 
 
 
 
 
1547
  with gr.Accordion("Duplicate matching parameters", open=False):
1548
  with gr.Row():
1549
- duplicate_threshold_input = gr.Number(
1550
- value=DEFAULT_DUPLICATE_DETECTION_THRESHOLD,
1551
- label="Similarity threshold",
1552
- info="Score (0-1) to consider pages a match.",
1553
- )
1554
- min_word_count_input = gr.Number(
1555
- value=DEFAULT_MIN_WORD_COUNT,
1556
- label="Minimum word count",
1557
- info="Pages with fewer words than this value are ignored.",
1558
- )
1559
- combine_page_text_for_duplicates_bool = gr.Checkbox(
1560
- value=True,
1561
- label="Analyse duplicate text by page (off for by line)",
1562
- )
 
 
 
 
 
1563
 
1564
  gr.Markdown("#### Matching Strategy")
1565
  greedy_match_input = gr.Checkbox(
@@ -1653,14 +1805,62 @@ with app:
1653
  """Choose Word or a tabular data file (xlsx or csv) to redact. Note that when redacting complex Word files with e.g. images, some content/formatting will be removed, and it may not attempt to redact headers. You may prefer to convert the doc file to PDF in Word, and then run it through the first tab of this app (Print to PDF in print settings). Alternatively, an xlsx file output is provided when redacting docx files directly to allow for copying and pasting outputs back into the original document if preferred."""
1654
  )
1655
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1656
  with gr.Accordion("Redact Word or Excel/csv files", open=True):
1657
  with gr.Accordion("Upload docx, xlsx, or csv files", open=True):
1658
- in_data_files = gr.File(
1659
- label="Choose Excel or csv files",
1660
- file_count="multiple",
1661
- file_types=[".xlsx", ".xls", ".csv", ".parquet", ".docx"],
1662
- height=FILE_INPUT_HEIGHT,
1663
- )
 
1664
  with gr.Accordion("Redact open text", open=False):
1665
  in_text = gr.Textbox(
1666
  label="Enter open text",
@@ -1676,34 +1876,39 @@ with app:
1676
  allow_custom_value=True,
1677
  )
1678
 
1679
- in_colnames = gr.Dropdown(
1680
- choices=["Choose columns to anonymise"],
1681
- multiselect=True,
1682
- label="Select columns that you want to anonymise (showing columns present across all files).",
1683
- )
1684
-
1685
- pii_identification_method_drop_tabular = gr.Radio(
1686
- label="Choose PII detection method. AWS Comprehend has a cost of approximately $0.01 per 10,000 characters.",
1687
- value=DEFAULT_PII_DETECTION_MODEL,
1688
- choices=TABULAR_PII_DETECTION_MODELS,
1689
- )
 
 
 
1690
 
1691
  with gr.Accordion(
1692
  "Anonymisation output format - by default will replace PII with a blank space",
1693
  open=False,
1694
  ):
1695
  with gr.Row():
1696
- anon_strategy = gr.Radio(
1697
- choices=[
1698
- "replace with 'REDACTED'",
1699
- "replace with <ENTITY_NAME>",
1700
- "redact completely",
1701
- "hash",
1702
- "mask",
1703
- ],
1704
- label="Select an anonymisation method.",
1705
- value=DEFAULT_TABULAR_ANONYMISATION_STRATEGY,
1706
- ) # , "encrypt", "fake_first_name" are also available, but are not currently included as not that useful in current form
 
 
1707
  do_initial_clean = gr.Checkbox(
1708
  label="Do initial clean of text (remove URLs, HTML tags, and non-ASCII characters)",
1709
  value=DO_INITIAL_TABULAR_DATA_CLEAN,
@@ -1713,15 +1918,15 @@ with app:
1713
  "Redact text/data files", variant="primary"
1714
  )
1715
 
1716
- with gr.Row():
1717
- text_output_summary = gr.Textbox(label="Output result", lines=4)
1718
- text_output_file = gr.File(label="Output files")
1719
- text_tabular_files_done = gr.Number(
1720
- value=0,
1721
- label="Number of tabular files redacted",
1722
- interactive=False,
1723
- visible=False,
1724
- )
1725
 
1726
  ###
1727
  # TABULAR DUPLICATE DETECTION
@@ -1732,12 +1937,13 @@ with app:
1732
  )
1733
 
1734
  with gr.Accordion("Step 1: Upload files and configure analysis", open=True):
1735
- in_tabular_duplicate_files = gr.File(
1736
- label="Upload CSV, Excel, or Parquet files to find duplicate cells/rows. Note that the app will remove duplicates from later cells/files that are found in earlier cells/files and not vice versa.",
1737
- file_count="multiple",
1738
- file_types=[".csv", ".xlsx", ".xls", ".parquet"],
1739
- height=FILE_INPUT_HEIGHT,
1740
- )
 
1741
 
1742
  with gr.Row(equal_height=True):
1743
  tabular_duplicate_threshold = gr.Number(
@@ -1768,12 +1974,13 @@ with app:
1768
  allow_custom_value=True,
1769
  )
1770
 
1771
- tabular_text_columns = gr.Dropdown(
1772
- choices=DEFAULT_TEXT_COLUMNS,
1773
- multiselect=True,
1774
- label="Select specific columns to analyse (leave empty to analyse all text columns simultaneously - i.e. all text is joined together)",
1775
- info="If no columns selected, all text columns will combined together and analysed",
1776
- )
 
1777
 
1778
  find_tabular_duplicates_btn = gr.Button(
1779
  value="Find duplicate cells/rows", variant="primary"
@@ -1937,18 +2144,20 @@ with app:
1937
  )
1938
 
1939
  with gr.Accordion("Select entity types to redact", open=True):
1940
- in_redact_entities = gr.Dropdown(
1941
- value=CHOSEN_REDACT_ENTITIES,
1942
- choices=FULL_ENTITY_LIST,
1943
- multiselect=True,
1944
- label="Local PII identification model (click empty space in box for full list)",
1945
- )
1946
- in_redact_comprehend_entities = gr.Dropdown(
1947
- value=CHOSEN_COMPREHEND_ENTITIES,
1948
- choices=FULL_COMPREHEND_ENTITY_LIST,
1949
- multiselect=True,
1950
- label="AWS Comprehend PII identification model (click empty space in box for full list)",
1951
- )
 
 
1952
 
1953
  with gr.Row():
1954
  max_fuzzy_spelling_mistakes_num = gr.Number(
@@ -5013,6 +5222,7 @@ with app:
5013
  comprehend_query_number,
5014
  ],
5015
  api_name="redact_data",
 
5016
  )
5017
 
5018
  # If the output file count text box changes, keep going with redacting each data file until done
@@ -5053,6 +5263,7 @@ with app:
5053
  actual_time_taken_number,
5054
  comprehend_query_number,
5055
  ],
 
5056
  ).success(
5057
  fn=reveal_feedback_buttons,
5058
  outputs=[
@@ -5085,6 +5296,7 @@ with app:
5085
  actual_time_taken_number,
5086
  task_textbox,
5087
  ],
 
5088
  )
5089
 
5090
  # full_duplicated_data_df,
 
1
  import os
 
2
 
3
  import gradio as gr
4
  import pandas as pd
 
94
  SAVE_LOGS_TO_DYNAMODB,
95
  SESSION_OUTPUT_FOLDER,
96
  SHOW_COSTS,
97
+ SHOW_EXAMPLES,
98
  SHOW_LANGUAGE_SELECTION,
99
  SHOW_WHOLE_DOCUMENT_TEXTRACT_CALL_OPTIONS,
100
  TABULAR_PII_DETECTION_MODELS,
 
206
  ensure_folder_exists(CONFIG_FOLDER)
207
  ensure_folder_exists(OUTPUT_FOLDER)
208
  ensure_folder_exists(INPUT_FOLDER)
209
+ if GRADIO_TEMP_DIR:
210
+ ensure_folder_exists(GRADIO_TEMP_DIR)
211
+ if MPLCONFIGDIR:
212
+ ensure_folder_exists(MPLCONFIGDIR)
213
+
214
  ensure_folder_exists(FEEDBACK_LOGS_FOLDER)
215
  ensure_folder_exists(ACCESS_LOGS_FOLDER)
216
  ensure_folder_exists(USAGE_LOGS_FOLDER)
 
294
  CHOSEN_COMPREHEND_ENTITIES.extend(custom_entities)
295
  FULL_COMPREHEND_ENTITY_LIST.extend(custom_entities)
296
 
297
+ # Load some components outside of blocks context that are used for examples
298
+ ## Redaction examples
299
+ in_doc_files = gr.File(
300
+ label="Choose a PDF document or image file (PDF, JPG, PNG)",
301
+ file_count="multiple",
302
+ file_types=[".pdf", ".jpg", ".png", ".json", ".zip"],
303
+ height=FILE_INPUT_HEIGHT,
304
+ )
305
+
306
+ text_extract_method_radio = gr.Radio(
307
+ label="""Choose text extraction method. Local options are lower quality but cost nothing - they may be worth a try if you are willing to spend some time reviewing outputs. AWS Textract has a cost per page - £2.66 ($3.50) per 1,000 pages with signature detection (default), £1.14 ($1.50) without. Change the settings in the tab below (AWS Textract signature detection) to change this.""",
308
+ value=DEFAULT_TEXT_EXTRACTION_MODEL,
309
+ choices=TEXT_EXTRACTION_MODELS,
310
+ )
311
+
312
+ pii_identification_method_drop = gr.Radio(
313
+ label="""Choose personal information detection method. The local model is lower quality but costs nothing - it may be worth a try if you are willing to spend some time reviewing outputs, or if you are only interested in searching for custom search terms (see Redaction settings - custom deny list). AWS Comprehend has a cost of around £0.0075 ($0.01) per 10,000 characters.""",
314
+ value=DEFAULT_PII_DETECTION_MODEL,
315
+ choices=PII_DETECTION_MODELS,
316
+ )
317
+
318
+ handwrite_signature_checkbox = gr.CheckboxGroup(
319
+ label="AWS Textract extraction settings",
320
+ choices=HANDWRITE_SIGNATURE_TEXTBOX_FULL_OPTIONS,
321
+ value=DEFAULT_HANDWRITE_SIGNATURE_CHECKBOX,
322
+ )
323
+
324
+ in_redact_entities = gr.Dropdown(
325
+ value=CHOSEN_REDACT_ENTITIES,
326
+ choices=FULL_ENTITY_LIST,
327
+ multiselect=True,
328
+ label="Local PII identification model (click empty space in box for full list)",
329
+ )
330
+ in_redact_comprehend_entities = gr.Dropdown(
331
+ value=CHOSEN_COMPREHEND_ENTITIES,
332
+ choices=FULL_COMPREHEND_ENTITY_LIST,
333
+ multiselect=True,
334
+ label="AWS Comprehend PII identification model (click empty space in box for full list)",
335
+ )
336
+
337
+ ## Deduplication examples
338
+ in_duplicate_pages = gr.File(
339
+ label="Upload one or multiple 'ocr_output.csv' files to find duplicate pages and subdocuments",
340
+ file_count="multiple",
341
+ height=FILE_INPUT_HEIGHT,
342
+ file_types=[".csv"],
343
+ )
344
+
345
+ duplicate_threshold_input = gr.Number(
346
+ value=DEFAULT_DUPLICATE_DETECTION_THRESHOLD,
347
+ label="Similarity threshold",
348
+ info="Score (0-1) to consider pages a match.",
349
+ )
350
+
351
+ min_word_count_input = gr.Number(
352
+ value=DEFAULT_MIN_WORD_COUNT,
353
+ label="Minimum word count",
354
+ info="Pages with fewer words than this value are ignored.",
355
+ )
356
+
357
+ combine_page_text_for_duplicates_bool = gr.Checkbox(
358
+ value=True,
359
+ label="Analyse duplicate text by page (off for by line)",
360
+ )
361
+
362
+ ## Tabular examples
363
+ in_data_files = gr.File(
364
+ label="Choose Excel or csv files",
365
+ file_count="multiple",
366
+ file_types=[".xlsx", ".xls", ".csv", ".parquet", ".docx"],
367
+ height=FILE_INPUT_HEIGHT,
368
+ )
369
+
370
+ in_colnames = gr.Dropdown(
371
+ choices=["Choose columns to anonymise"],
372
+ multiselect=True,
373
+ allow_custom_value=True,
374
+ label="Select columns that you want to anonymise (showing columns present across all files).",
375
+ )
376
+
377
+ pii_identification_method_drop_tabular = gr.Radio(
378
+ label="Choose PII detection method. AWS Comprehend has a cost of approximately $0.01 per 10,000 characters.",
379
+ value=DEFAULT_PII_DETECTION_MODEL,
380
+ choices=TABULAR_PII_DETECTION_MODELS,
381
+ )
382
+
383
+ anon_strategy = gr.Radio(
384
+ choices=[
385
+ "replace with 'REDACTED'",
386
+ "replace with <ENTITY_NAME>",
387
+ "redact completely",
388
+ "hash",
389
+ "mask",
390
+ ],
391
+ label="Select an anonymisation method.",
392
+ value=DEFAULT_TABULAR_ANONYMISATION_STRATEGY,
393
+ ) # , "encrypt", "fake_first_name" are also available, but are not currently included as not that useful in current form
394
+
395
+ in_tabular_duplicate_files = gr.File(
396
+ label="Upload CSV, Excel, or Parquet files to find duplicate cells/rows. Note that the app will remove duplicates from later cells/files that are found in earlier cells/files and not vice versa.",
397
+ file_count="multiple",
398
+ file_types=[".csv", ".xlsx", ".xls", ".parquet"],
399
+ height=FILE_INPUT_HEIGHT,
400
+ )
401
+
402
+ tabular_text_columns = gr.Dropdown(
403
+ label="Choose columns to deduplicate",
404
+ multiselect=True,
405
+ allow_custom_value=True,
406
+ )
407
 
408
  # Create the gradio interface
409
  app = gr.Blocks(
 
1007
  # REDACTION PDF/IMAGES TABLE
1008
  ###
1009
  with gr.Tab("Redact PDFs/images"):
 
 
 
 
 
 
 
1010
 
1011
+ # Examples for PDF/image redaction
1012
+ if SHOW_EXAMPLES == "True":
1013
+ gr.Markdown(
1014
+ "### Try an example - Click on an example below and then the 'Extract text and redact document' button:"
1015
+ )
1016
+ redaction_examples = gr.Examples(
1017
+ examples=[
1018
+ [
1019
+ [
1020
+ "example_data/example_of_emails_sent_to_a_professor_before_applying.pdf"
1021
+ ],
1022
+ "Local model - selectable text",
1023
+ "Local",
1024
+ [],
1025
+ CHOSEN_REDACT_ENTITIES,
1026
+ CHOSEN_COMPREHEND_ENTITIES,
1027
+ [
1028
+ "example_data/example_of_emails_sent_to_a_professor_before_applying.pdf"
1029
+ ],
1030
+ ],
1031
+ [
1032
+ ["example_data/example_complaint_letter.jpg"],
1033
+ "Local OCR model - PDFs without selectable text",
1034
+ "Local",
1035
+ [],
1036
+ CHOSEN_REDACT_ENTITIES,
1037
+ CHOSEN_COMPREHEND_ENTITIES,
1038
+ ["example_data/example_complaint_letter.jpg"],
1039
+ ],
1040
+ [
1041
+ ["example_data/graduate-job-example-cover-letter.pdf"],
1042
+ "Local OCR model - PDFs without selectable text",
1043
+ "Local",
1044
+ [],
1045
+ ["TITLES", "PERSON", "DATE_TIME"],
1046
+ CHOSEN_COMPREHEND_ENTITIES,
1047
+ ["example_data/graduate-job-example-cover-letter.pdf"],
1048
+ ],
1049
+ [
1050
+ ["example_data/Partnership-Agreement-Toolkit_0_0.pdf"],
1051
+ "AWS Textract service - all PDF types",
1052
+ "AWS Comprehend",
1053
+ ["Extract handwriting", "Extract signatures"],
1054
+ CHOSEN_REDACT_ENTITIES,
1055
+ CHOSEN_COMPREHEND_ENTITIES,
1056
+ ["example_data/Partnership-Agreement-Toolkit_0_0.pdf"],
1057
+ ],
1058
+ ],
1059
+ inputs=[
1060
+ in_doc_files,
1061
+ text_extract_method_radio,
1062
+ pii_identification_method_drop,
1063
+ handwrite_signature_checkbox,
1064
+ in_redact_entities,
1065
+ in_redact_comprehend_entities,
1066
+ prepared_pdf_state,
1067
+ ],
1068
+ example_labels=[
1069
+ "PDF with selectable text redaction",
1070
+ "Image redaction with local OCR",
1071
+ "PDF redaction with custom entities (TITLES, PERSON, DATE_TIME)",
1072
+ "PDF redaction with AWS services and signature detection",
1073
+ ],
1074
  )
1075
 
1076
+ with gr.Accordion("Redact document", open=True):
1077
+ # in_doc_files = gr.File(
1078
+ # label="Choose a PDF document or image file (PDF, JPG, PNG)",
1079
+ # file_count="multiple",
1080
+ # file_types=[".pdf", ".jpg", ".png", ".json", ".zip"],
1081
+ # height=FILE_INPUT_HEIGHT,
1082
+ # )
1083
+ in_doc_files.render()
1084
+
1085
+ # text_extract_method_radio = gr.Radio(
1086
+ # label="""Choose text extraction method. Local options are lower quality but cost nothing - they may be worth a try if you are willing to spend some time reviewing outputs. AWS Textract has a cost per page - £2.66 ($3.50) per 1,000 pages with signature detection (default), £1.14 ($1.50) without. Change the settings in the tab below (AWS Textract signature detection) to change this.""",
1087
+ # value=DEFAULT_TEXT_EXTRACTION_MODEL,
1088
+ # choices=TEXT_EXTRACTION_MODELS,
1089
+ # )
1090
+ text_extract_method_radio.render()
1091
+
1092
  with gr.Accordion(
1093
  "Enable AWS Textract signature detection (default is off)", open=False
1094
  ):
1095
+ # handwrite_signature_checkbox = gr.CheckboxGroup(
1096
+ # label="AWS Textract extraction settings",
1097
+ # choices=HANDWRITE_SIGNATURE_TEXTBOX_FULL_OPTIONS,
1098
+ # value=DEFAULT_HANDWRITE_SIGNATURE_CHECKBOX,
1099
+ # )
1100
+ handwrite_signature_checkbox.render()
1101
 
1102
  with gr.Row(equal_height=True):
1103
+ # pii_identification_method_drop = gr.Radio(
1104
+ # label="""Choose personal information detection method. The local model is lower quality but costs nothing - it may be worth a try if you are willing to spend some time reviewing outputs, or if you are only interested in searching for custom search terms (see Redaction settings - custom deny list). AWS Comprehend has a cost of around £0.0075 ($0.01) per 10,000 characters.""",
1105
+ # value=DEFAULT_PII_DETECTION_MODEL,
1106
+ # choices=PII_DETECTION_MODELS,
1107
+ # )
1108
+ pii_identification_method_drop.render()
1109
 
1110
  if SHOW_COSTS == "True":
1111
  with gr.Accordion(
 
1646
  "Search for duplicate pages/subdocuments in your ocr_output files. By default, this function will search for duplicate text across multiple pages, and then join consecutive matching pages together into matched 'subdocuments'. The results can be reviewed below, false positives removed, and then the verified results applied to a document you have loaded in on the 'Review redactions' tab."
1647
  )
1648
 
1649
+ # Examples for duplicate page detection
1650
+ if SHOW_EXAMPLES == "True":
1651
+ gr.Markdown(
1652
+ "### Try an example - Click on an example below and then the 'Identify duplicate pages/subdocuments' button:"
1653
+ )
1654
+ duplicate_examples = gr.Examples(
1655
+ examples=[
1656
+ [
1657
+ [
1658
+ "example_data/example_outputs/doubled_output_joined.pdf_ocr_output.csv"
1659
+ ],
1660
+ 0.95,
1661
+ 10,
1662
+ True,
1663
+ ],
1664
+ [
1665
+ [
1666
+ "example_data/example_outputs/doubled_output_joined.pdf_ocr_output.csv"
1667
+ ],
1668
+ 0.95,
1669
+ 3,
1670
+ False,
1671
+ ],
1672
+ ],
1673
+ inputs=[
1674
+ in_duplicate_pages,
1675
+ duplicate_threshold_input,
1676
+ min_word_count_input,
1677
+ combine_page_text_for_duplicates_bool,
1678
+ ],
1679
+ example_labels=[
1680
+ "Find duplicate pages of text in document OCR outputs",
1681
+ "Find duplicate text lines in document OCR outputs",
1682
+ ],
1683
  )
1684
 
1685
+ with gr.Accordion("Step 1: Configure and run analysis", open=True):
1686
+ # in_duplicate_pages = gr.File(
1687
+ # label="Upload one or multiple 'ocr_output.csv' files to find duplicate pages and subdocuments",
1688
+ # file_count="multiple",
1689
+ # height=FILE_INPUT_HEIGHT,
1690
+ # file_types=[".csv"],
1691
+ # )
1692
+ in_duplicate_pages.render()
1693
+
1694
  with gr.Accordion("Duplicate matching parameters", open=False):
1695
  with gr.Row():
1696
+ # duplicate_threshold_input = gr.Number(
1697
+ # value=DEFAULT_DUPLICATE_DETECTION_THRESHOLD,
1698
+ # label="Similarity threshold",
1699
+ # info="Score (0-1) to consider pages a match.",
1700
+ # )
1701
+ duplicate_threshold_input.render()
1702
+
1703
+ # min_word_count_input = gr.Number(
1704
+ # value=DEFAULT_MIN_WORD_COUNT,
1705
+ # label="Minimum word count",
1706
+ # info="Pages with fewer words than this value are ignored.",
1707
+ # )
1708
+ min_word_count_input.render()
1709
+
1710
+ # combine_page_text_for_duplicates_bool = gr.Checkbox(
1711
+ # value=True,
1712
+ # label="Analyse duplicate text by page (off for by line)",
1713
+ # )
1714
+ combine_page_text_for_duplicates_bool.render()
1715
 
1716
  gr.Markdown("#### Matching Strategy")
1717
  greedy_match_input = gr.Checkbox(
 
1805
  """Choose Word or a tabular data file (xlsx or csv) to redact. Note that when redacting complex Word files with e.g. images, some content/formatting will be removed, and it may not attempt to redact headers. You may prefer to convert the doc file to PDF in Word, and then run it through the first tab of this app (Print to PDF in print settings). Alternatively, an xlsx file output is provided when redacting docx files directly to allow for copying and pasting outputs back into the original document if preferred."""
1806
  )
1807
 
1808
+ # Examples for Word/Excel/csv redaction and tabular duplicate detection
1809
+ if SHOW_EXAMPLES == "True":
1810
+ gr.Markdown(
1811
+ "### Try an example - Click on an example below and then the 'Redact text/data files' button for redaction, or the 'Find duplicate cells/rows' button for duplicate detection:"
1812
+ )
1813
+ tabular_examples = gr.Examples(
1814
+ examples=[
1815
+ [
1816
+ ["example_data/combined_case_notes.csv"],
1817
+ ["Case Note", "Client"],
1818
+ "Local",
1819
+ "replace with 'REDACTED'",
1820
+ ["example_data/combined_case_notes.csv"],
1821
+ ["Case Note"],
1822
+ ],
1823
+ [
1824
+ ["example_data/Bold minimalist professional cover letter.docx"],
1825
+ [],
1826
+ "Local",
1827
+ "replace with 'REDACTED'",
1828
+ [],
1829
+ [],
1830
+ ],
1831
+ [
1832
+ ["example_data/Lambeth_2030-Our_Future_Our_Lambeth.pdf.csv"],
1833
+ ["text"],
1834
+ "Local",
1835
+ "replace with 'REDACTED'",
1836
+ ["example_data/Lambeth_2030-Our_Future_Our_Lambeth.pdf.csv"],
1837
+ ["text"],
1838
+ ],
1839
+ ],
1840
+ inputs=[
1841
+ in_data_files,
1842
+ in_colnames,
1843
+ pii_identification_method_drop_tabular,
1844
+ anon_strategy,
1845
+ in_tabular_duplicate_files,
1846
+ tabular_text_columns,
1847
+ ],
1848
+ example_labels=[
1849
+ "CSV file redaction with specific columns - remove text",
1850
+ "Word document redaction - replace with REDACTED",
1851
+ "Tabular duplicate detection in CSV files",
1852
+ ],
1853
+ )
1854
+
1855
  with gr.Accordion("Redact Word or Excel/csv files", open=True):
1856
  with gr.Accordion("Upload docx, xlsx, or csv files", open=True):
1857
+ # in_data_files = gr.File(
1858
+ # label="Choose Excel or csv files",
1859
+ # file_count="multiple",
1860
+ # file_types=[".xlsx", ".xls", ".csv", ".parquet", ".docx"],
1861
+ # height=FILE_INPUT_HEIGHT,
1862
+ # )
1863
+ in_data_files.render()
1864
  with gr.Accordion("Redact open text", open=False):
1865
  in_text = gr.Textbox(
1866
  label="Enter open text",
 
1876
  allow_custom_value=True,
1877
  )
1878
 
1879
+ # in_colnames = gr.Dropdown(
1880
+ # choices=["Choose columns to anonymise"],
1881
+ # multiselect=True,
1882
+ # allow_custom_value=True,
1883
+ # label="Select columns that you want to anonymise (showing columns present across all files).",
1884
+ # )
1885
+ in_colnames.render()
1886
+
1887
+ # pii_identification_method_drop_tabular = gr.Radio(
1888
+ # label="Choose PII detection method. AWS Comprehend has a cost of approximately $0.01 per 10,000 characters.",
1889
+ # value=DEFAULT_PII_DETECTION_MODEL,
1890
+ # choices=TABULAR_PII_DETECTION_MODELS,
1891
+ # )
1892
+ pii_identification_method_drop_tabular.render()
1893
 
1894
  with gr.Accordion(
1895
  "Anonymisation output format - by default will replace PII with a blank space",
1896
  open=False,
1897
  ):
1898
  with gr.Row():
1899
+ # anon_strategy = gr.Radio(
1900
+ # choices=[
1901
+ # "replace with 'REDACTED'",
1902
+ # "replace with <ENTITY_NAME>",
1903
+ # "redact completely",
1904
+ # "hash",
1905
+ # "mask",
1906
+ # ],
1907
+ # label="Select an anonymisation method.",
1908
+ # value=DEFAULT_TABULAR_ANONYMISATION_STRATEGY,
1909
+ # ) # , "encrypt", "fake_first_name" are also available, but are not currently included as not that useful in current form
1910
+ anon_strategy.render()
1911
+
1912
  do_initial_clean = gr.Checkbox(
1913
  label="Do initial clean of text (remove URLs, HTML tags, and non-ASCII characters)",
1914
  value=DO_INITIAL_TABULAR_DATA_CLEAN,
 
1918
  "Redact text/data files", variant="primary"
1919
  )
1920
 
1921
+ with gr.Row():
1922
+ text_output_summary = gr.Textbox(label="Output result", lines=4)
1923
+ text_output_file = gr.File(label="Output files")
1924
+ text_tabular_files_done = gr.Number(
1925
+ value=0,
1926
+ label="Number of tabular files redacted",
1927
+ interactive=False,
1928
+ visible=False,
1929
+ )
1930
 
1931
  ###
1932
  # TABULAR DUPLICATE DETECTION
 
1937
  )
1938
 
1939
  with gr.Accordion("Step 1: Upload files and configure analysis", open=True):
1940
+ # in_tabular_duplicate_files = gr.File(
1941
+ # label="Upload CSV, Excel, or Parquet files to find duplicate cells/rows. Note that the app will remove duplicates from later cells/files that are found in earlier cells/files and not vice versa.",
1942
+ # file_count="multiple",
1943
+ # file_types=[".csv", ".xlsx", ".xls", ".parquet"],
1944
+ # height=FILE_INPUT_HEIGHT,
1945
+ # )
1946
+ in_tabular_duplicate_files.render()
1947
 
1948
  with gr.Row(equal_height=True):
1949
  tabular_duplicate_threshold = gr.Number(
 
1974
  allow_custom_value=True,
1975
  )
1976
 
1977
+ # tabular_text_columns = gr.Dropdown(
1978
+ # choices=DEFAULT_TEXT_COLUMNS,
1979
+ # multiselect=True,
1980
+ # label="Select specific columns to analyse (leave empty to analyse all text columns simultaneously - i.e. all text is joined together)",
1981
+ # info="If no columns selected, all text columns will combined together and analysed",
1982
+ # )
1983
+ tabular_text_columns.render()
1984
 
1985
  find_tabular_duplicates_btn = gr.Button(
1986
  value="Find duplicate cells/rows", variant="primary"
 
2144
  )
2145
 
2146
  with gr.Accordion("Select entity types to redact", open=True):
2147
+ # in_redact_entities = gr.Dropdown(
2148
+ # value=CHOSEN_REDACT_ENTITIES,
2149
+ # choices=FULL_ENTITY_LIST,
2150
+ # multiselect=True,
2151
+ # label="Local PII identification model (click empty space in box for full list)",
2152
+ # )
2153
+ # in_redact_comprehend_entities = gr.Dropdown(
2154
+ # value=CHOSEN_COMPREHEND_ENTITIES,
2155
+ # choices=FULL_COMPREHEND_ENTITY_LIST,
2156
+ # multiselect=True,
2157
+ # label="AWS Comprehend PII identification model (click empty space in box for full list)",
2158
+ # )
2159
+ in_redact_entities.render()
2160
+ in_redact_comprehend_entities.render()
2161
 
2162
  with gr.Row():
2163
  max_fuzzy_spelling_mistakes_num = gr.Number(
 
5222
  comprehend_query_number,
5223
  ],
5224
  api_name="redact_data",
5225
+ show_progress_on=[text_output_summary],
5226
  )
5227
 
5228
  # If the output file count text box changes, keep going with redacting each data file until done
 
5263
  actual_time_taken_number,
5264
  comprehend_query_number,
5265
  ],
5266
+ show_progress_on=[text_output_summary],
5267
  ).success(
5268
  fn=reveal_feedback_buttons,
5269
  outputs=[
 
5296
  actual_time_taken_number,
5297
  task_textbox,
5298
  ],
5299
+ show_progress_on=[results_df_preview],
5300
  )
5301
 
5302
  # full_duplicated_data_df,
example_data/example_outputs/Partnership-Agreement-Toolkit_0_0.pdf_ocr_output.csv CHANGED
@@ -1,277 +1,277 @@
1
- page,text,left,top,width,height
2
- 1,Partnership Agreement,0.516078,0.027879,0.440784,0.032424
3
- 1,SisterCities,0.169804,0.033333,0.238431,0.028182
4
- 1,INTERNATIONAL,0.170196,0.06697,0.237647,0.008788
5
- 1,Toolkit,0.830588,0.07303,0.126667,0.025152
6
- 1,Connect globally. Thrive locally.,0.169804,0.08697,0.238824,0.01303
7
- 1,Types of Affiliations,0.117255,0.157576,0.241961,0.02
8
- 1,Sister City Relationship,0.117647,0.187273,0.196863,0.013939
9
- 1,"A Sister City relationship is formed when the mayor or highest elected official (or, if elections",0.117255,0.211212,0.738824,0.013636
10
- 1,"do not take place, highest appointed official) from a U.S. community and a community in",0.117647,0.227273,0.70902,0.013939
11
- 1,another country or territory sign a formal agreement on behalf of their communities endorsing a,0.117647,0.243636,0.761961,0.013636
12
- 1,"""sister city/sister cities"" relationship. Sister city agreements shall be considered active/valid",0.118039,0.259697,0.731373,0.013939
13
- 1,unless otherwise indicated by one or both of the respective communities.,0.118039,0.276061,0.58549,0.013636
14
- 1,Sister Cities International shall formally recognize only those relationships by cities/members in,0.118039,0.299697,0.758824,0.013636
15
- 1,good standing (i.e. who are current on membership dues) in its Membership Directory or on its,0.117647,0.316061,0.754902,0.013636
16
- 1,"website. However, Sister Cities International shall not assert as invalid or otherwise impugn the",0.116863,0.332121,0.760784,0.013636
17
- 1,legitimacy of those relationships formed by non-members.,0.118039,0.348485,0.466275,0.013636
18
- 1,Friendship City,0.118039,0.372121,0.127059,0.013939
19
- 1,"A Friendship City or Friendship Cities relationship is often formed by cities as a ""stepping",0.117255,0.395758,0.714118,0.013636
20
- 1,"stone"" to a more formal ""Sister City"" agreement. Typically Friendship City agreements are",0.117647,0.411515,0.720392,0.014242
21
- 1,referred to as such in the formal documents that are signed. Sister Cities International shall,0.118039,0.428182,0.72549,0.013636
22
- 1,recognize Friendship City relationships by members in its Membership Directory and website.,0.118039,0.444242,0.747843,0.013636
23
- 1,As per Sister Cities International Board of Directors:,0.117255,0.467879,0.413333,0.013636
24
- 1,Sister Cities International will recognize a new sister cities affiliation between a,0.169412,0.492121,0.626667,0.013333
25
- 1,"U.S. and an international community, even though another affiliation may exist",0.169412,0.507879,0.625098,0.013636
26
- 1,"between that international community and a different U.S. community, only if a",0.169412,0.524545,0.62902,0.013636
27
- 1,cooperative agreement among all involved communities is filed with Sister Cities,0.16902,0.540606,0.643137,0.013636
28
- 1,"International. If a cooperative agreement is denied, or no response to the request",0.170196,0.556667,0.647843,0.013333
29
- 1,"is received within a reasonable amount of time, Sister Cities International will",0.169412,0.57303,0.612157,0.012727
30
- 1,recognize the partnership as a friendship city and it will be delineated as such,0.169412,0.589091,0.621176,0.013636
31
- 1,with a symbol in the membership directories.,0.168627,0.605455,0.358824,0.013333
32
- 1,The cooperative agreement must be sent by the Mayor/County,0.168627,0.628788,0.509412,0.013939
33
- 1,"Executive/Governor of the requesting community, and must be sent to the",0.169804,0.645152,0.595294,0.014242
34
- 1,Mayor/County Executive/Governor of each of the existing partnership,0.169804,0.661212,0.555294,0.013636
35
- 1,communities. Although the Mayor/County Executive/Governor may request input,0.16902,0.677879,0.647451,0.013636
36
- 1,"from, or may be given input by, the sister cities program, it is up to the discretion",0.168627,0.693939,0.647059,0.013939
37
- 1,of the Mayor/County Executive/Governor to sign the cooperative agreement.,0.16902,0.709697,0.612941,0.013939
38
- 1,Although Sister Cities International will help with the cooperative agreement,0.168627,0.726364,0.605882,0.013636
39
- 1,"process, it is up to the requesting community to get the agreement signed. Sister",0.169412,0.742121,0.650196,0.013939
40
- 1,"Cities International will not, in any way, force a community to ""share"" and sign",0.16902,0.758182,0.623922,0.014242
41
- 1,the cooperative agreement.,0.168627,0.774848,0.219216,0.013333
42
- 1,"To place a relationship into Emeritus status, the mayor or highest elected official of the U.S.",0.117255,0.798485,0.736471,0.013939
43
- 1,community must write a letter to the mayor of the foreign city indicating that they wish to,0.118039,0.814545,0.70902,0.013636
44
- 1,"remain sister cities, but understand that the relationship will remain inactive until such time as",0.118039,0.831212,0.747451,0.013333
45
- 1,both cities are able to sustain an active relationship. Sister Cities International should be,0.118039,0.847273,0.705098,0.013636
46
- 1,informed in writing by the mayor of the U.S. city of the situation. Sister Cities International will,0.118039,0.863333,0.746275,0.013636
47
- 2,Partnership Agreement,0.516078,0.027879,0.440784,0.032424
48
- 2,SisterCities,0.169804,0.033333,0.238824,0.028182
49
- 2,INTERNATIONAL,0.170196,0.06697,0.237647,0.008788
50
- 2,Toolkit,0.83098,0.072727,0.127059,0.025455
51
- 2,Connect globally. Thrive locally.,0.169804,0.08697,0.239216,0.01303
52
- 2,then place the partnership into Emeritus Status and will reflect this status in directories and all,0.117255,0.132424,0.751373,0.013333
53
- 2,lists of sister city programs.,0.118039,0.148788,0.218431,0.013333
54
- 2,"If a community wishes to terminate a sister city relationship, then a letter from the mayor or",0.118431,0.172424,0.732549,0.013333
55
- 2,highest elected official of the U.S. city should be sent to the mayor of the sister city. Sister,0.118039,0.188485,0.721569,0.013636
56
- 2,Cities International should be informed of this action in writing by the mayor of the U.S. city,0.118039,0.204848,0.72902,0.013333
57
- 2,and Sister Cities International will then remove the partnership from its directories and all lists,0.117647,0.221212,0.746275,0.013333
58
- 2,of sister city programs. We do not recommend terminating a relationship simply because it is,0.117647,0.237273,0.743529,0.013333
59
- 2,"dormant. Many partnerships wax and wane over the years, and in many cases a dormant",0.117647,0.253939,0.713333,0.013333
60
- 2,partnership may be reinvigorated by local members years after it has been inactive.,0.118039,0.269697,0.664314,0.013636
61
- 2,General Guidelines,0.118039,0.295152,0.231765,0.016061
62
- 2,In order for a sister city/county/state partnership to be recognized by Sister Cities International,0.118431,0.324242,0.754902,0.013636
63
- 2,"(SCI), the two communities must sign formal documents which clearly endorse the link. This",0.118039,0.340606,0.74,0.013636
64
- 2,presumes several key items: that the U.S. community is already a member of SCI and has,0.118039,0.35697,0.718039,0.013636
65
- 2,followed proper procedures (e.g. passed a city council resolution declaring the intent to twin,0.117255,0.373333,0.737647,0.013636
66
- 2,with the specific city); that both communities share a mutual commitment to the relationship;,0.117255,0.389394,0.740784,0.013636
67
- 2,and that both have secured the necessary support structure to build a lasting relationship. You,0.117647,0.405455,0.758039,0.013333
68
- 2,should check with your local sister city program to see if they have any additional requirements,0.117647,0.421818,0.760784,0.013636
69
- 2,before pursuing a sister city relationship.,0.118039,0.437879,0.323137,0.013636
70
- 2,"SCI often refers to these agreements as a ""Sister City Agreement"" or ""Memorandum of",0.118039,0.461515,0.696863,0.013939
71
- 2,"Understanding."" However, as the following examples show, the actual name and format of",0.118039,0.477576,0.729804,0.013636
72
- 2,your documents is left up to you.,0.117255,0.494242,0.262745,0.013636
73
- 2,A few things to keep in mind as you draft your agreement:,0.117255,0.517879,0.463137,0.013636
74
- 2,"Your agreement can range from the ceremonial, with language focusing on each city's",0.176471,0.542121,0.69098,0.013939
75
- 2,"commitment to fostering understanding, cooperation, and mutual benefit to the precise,",0.176471,0.558485,0.701961,0.013333
76
- 2,"with particular areas of interest, specific programs/activities, or more concrete goals",0.176078,0.574848,0.673725,0.013636
77
- 2,related to anything from numbers of exchanges to economic development.,0.176863,0.591212,0.596863,0.013636
78
- 2,"Don't try to include everything you plan to do. Some specifics, like particular areas of",0.177255,0.620303,0.681176,0.013939
79
- 2,"interest or participating institutions are good to include. However, there's no need to",0.176471,0.636667,0.675686,0.013636
80
- 2,include all the programs you plan to do if it makes the document too lengthy or limits,0.176863,0.652727,0.678824,0.013939
81
- 2,the scope of projects. This is a formal document to establish the relationship; specific,0.176078,0.668788,0.684706,0.013636
82
- 2,"tasks, responsibilities, or other nuts-and-bolts text related to implementation or",0.176078,0.685455,0.635686,0.013333
83
- 2,administration of the partnership can be expressed more fully in a separate,0.176471,0.701212,0.600392,0.013636
84
- 2,memorandum between the respective sister city committees. Your partnership,0.177255,0.717576,0.626667,0.013636
85
- 2,agreement is a historical document and should not be dated or limited by being aligned,0.176471,0.733636,0.699216,0.013636
86
- 2,with very specific tasks.,0.176078,0.750606,0.190196,0.013333
87
- 2,Work with your counterparts. Remember that this is signed by both cities. You should,0.176078,0.779697,0.68549,0.013636
88
- 2,share drafts of your agreement with your international partners and solicit feedback on,0.176471,0.795758,0.691765,0.013333
89
- 2,what they'd like to see in the agreement. Be flexible to cultural or municipal priorities.,0.176471,0.811818,0.679216,0.013939
90
- 2,Ask your counterparts to translate the agreement if it is drafted in English. It is,0.176078,0.841515,0.623137,0.013636
91
- 2,important for the citizens of your partner community to be able to read and understand,0.176863,0.857576,0.693725,0.013939
92
- 2,the commitment their city has made. Have someone in your own community who,0.176078,0.873939,0.649804,0.013636
93
- 3,Partnership Agreement,0.516078,0.027879,0.441176,0.032121
94
- 3,SisterCities,0.169804,0.033333,0.239216,0.028182
95
- 3,INTERNATIONAL,0.170196,0.06697,0.237255,0.008788
96
- 3,Toolkit,0.83098,0.07303,0.126667,0.025152
97
- 3,Connect globally. Thrive locally.,0.169804,0.08697,0.239216,0.01303
98
- 3,speaks that language check the foreign-language version to make sure it mirrors what,0.176471,0.132424,0.688235,0.013333
99
- 3,you have in your own agreement.,0.176471,0.148788,0.264706,0.013333
100
- 3,Keep it to one page. Ceremonial documents such as these partnership agreements,0.176863,0.178485,0.66549,0.013636
101
- 3,work best if they can be posted in their entirety.,0.176078,0.194545,0.380392,0.013636
102
- 3,Most sister city agreements include some acknowledgement of the founding principles,0.177255,0.224242,0.694902,0.013636
103
- 3,"of the sister city movement- to promote peace through mutual respect, understanding,",0.176471,0.240303,0.698431,0.013333
104
- 3,and cooperation.,0.176471,0.25697,0.13451,0.013333
105
- 3,Consider using official letterhead and/or other embellishments such as city seals or,0.176863,0.286061,0.665882,0.013333
106
- 3,logos to reflect your enhance the document. Sister city agreements are often posted at,0.176863,0.302121,0.695686,0.013636
107
- 3,city hall or other municipal offices and should reflect their historical importance,0.176471,0.318485,0.630588,0.013333
108
- 3,Look at other agreements your city has signed. These agreements may give you an idea,0.177255,0.347879,0.705098,0.013636
109
- 3,"of what is acceptable or possible, and they may be in an easily replicable format. If you",0.176471,0.364242,0.695686,0.013636
110
- 3,"cannot access older agreements please contact Sister Cities International, we may",0.176863,0.380303,0.663137,0.013636
111
- 3,"have them on file, although we do not have copies of all partnership agreements.",0.176863,0.396667,0.64549,0.013636
112
- 3,Documents must be signed by the top elected official of both communities.,0.177255,0.426364,0.601569,0.013333
113
- 3,"Check with your mayor, city council, town clerk, et al. to make sure that the agreement",0.176863,0.455758,0.694118,0.013636
114
- 3,"is OK with them. The mayor is the one putting his or her name on the paper, and you",0.176863,0.471818,0.677255,0.013333
115
- 3,don't want to spend time developing an agreement which will never be signed.,0.176863,0.488182,0.629412,0.013636
116
- 3,Official documents are usually signed during a formal ceremony recognizing the,0.176863,0.517576,0.638431,0.013636
117
- 3,partnership. Be sure both communities receive a signed set of the official documents,0.177255,0.533939,0.683922,0.013636
118
- 3,for their records.,0.176078,0.550606,0.131373,0.010606
119
- 3,Remember to send your signed agreement to Sister Cities International. After we,0.177255,0.579697,0.645098,0.013636
120
- 3,receive your agreement we will post the relationship in the City Directory and make sure,0.176863,0.595758,0.703137,0.013636
121
- 3,it is included in our Annual Membership Directory.,0.176863,0.612121,0.398039,0.013333
122
- 3,Remember that each city's sister city program is independent and can impose requirements,0.118431,0.640606,0.736471,0.013939
123
- 3,"like the establishment of a committee, a review period, sustainability/funding plan, among",0.118039,0.65697,0.715686,0.013636
124
- 3,"others, before sanctioning a sister city agreement. Check with your local program or mayor's",0.117647,0.672727,0.743529,0.014242
125
- 3,office to see if this is the case.,0.117647,0.689091,0.241176,0.011515
126
- 3,On the following pages you'll find a series of partnership agreements to give you an idea of,0.118039,0.717879,0.728627,0.013939
127
- 3,"what is possible. While you should feel free to use some of the formatting and language, we",0.117255,0.734242,0.73451,0.013636
128
- 3,encourage you to make your agreement your own and be creative with what you produce. If,0.117647,0.750606,0.737647,0.013636
129
- 3,you are unsure about your agreement or want advice you can always solicit feedback by,0.117647,0.766667,0.708627,0.013636
130
- 3,sending it to our Membership Director at [email protected] or contacting us at (202),0.117647,0.782727,0.732157,0.013636
131
- 3,347-8630.,0.117647,0.799394,0.080392,0.010303
132
- 4,Partnership Agreement,0.516471,0.027879,0.440784,0.032727
133
- 4,SisterCities,0.169412,0.033333,0.239608,0.028485
134
- 4,INTERNATIONAL,0.170196,0.066667,0.238431,0.009091
135
- 4,Toolkit,0.830588,0.072727,0.127843,0.025758
136
- 4,Connect globally. Thrive locally.,0.169412,0.08697,0.239608,0.013333
137
- 4,"jull bubzig 2000 3,312",0.378039,0.291212,0.32549,0.019394
138
- 4,ABU DHABI MUNICIPALITY & TOWN PLANNING,0.376471,0.316667,0.327451,0.016667
139
- 4,AN AGREEMENT FOR THE ESTABLISHMENT OF,0.260784,0.373636,0.52549,0.012727
140
- 4,SISTER CITIES RELATIONSHIP,0.337647,0.393636,0.342745,0.012121
141
- 4,BETWEEN,0.454902,0.413636,0.110588,0.011212
142
- 4,THE CITY OF ABU DHABI ( U. A.E),0.337255,0.432727,0.375686,0.013939
143
- 4,AND,0.487843,0.452727,0.048235,0.011212
144
- 4,"HOUSTON, TEXAS ( U.S.A)",0.385882,0.471515,0.298039,0.014848
145
- 4,"The Sister City Program, administered by Sister Cities International, was initiated",0.221961,0.525455,0.597255,0.01303
146
- 4,By the President of the United States of America in 1956 to encourage greater,0.222745,0.539394,0.561961,0.012727
147
- 4,Friendship and understanding between the United States and other nations through,0.222745,0.553333,0.608235,0.012727
148
- 4,Direct personal contact: and,0.222745,0.567576,0.20549,0.012424
149
- 4,"In order to foster those goals, the people of Abu Dhabi and Houston, in a gesture of",0.222353,0.594242,0.603529,0.012424
150
- 4,"Friendship and goodwill, agree to collaborate for the mutual benefit of their",0.222745,0.608182,0.547843,0.01303
151
- 4,"Communities by exploring education, economic and cultural opportunities.",0.222353,0.622121,0.541961,0.012121
152
- 4,"Abu Dhabi and Houston, sharing a common interest in energy, technology and",0.221569,0.648788,0.574118,0.012424
153
- 4,"medicine, and the desire to promote mutual understanding among our citizens do",0.222353,0.66303,0.588235,0.012121
154
- 4,"hereby proclaim themselves Sister Cities beginning on the 13th day of March 2001,",0.221961,0.673636,0.594118,0.015758
155
- 4,the date of Houston City Council resolution estatblishing the Sister City,0.221961,0.690303,0.519608,0.01303
156
- 4,relationship became effective.,0.221569,0.705152,0.217647,0.012424
157
- 4,"Signed on this 26 of October 2002, in duplicate in the Arabic and English",0.221569,0.732121,0.533333,0.01303
158
- 4,"Languages, both text being equally authentic.",0.221961,0.746667,0.328627,0.012727
159
- 4,A,0.344314,0.768485,0.084706,0.030303
160
- 4,Sheikh Mohammed bin Butti AI Hamed,0.245882,0.806364,0.366275,0.010909
161
- 4,Lee P.Brown,0.729412,0.806364,0.118824,0.010303
162
- 4,Mayor of Houston,0.704706,0.823333,0.166667,0.012424
163
- 4,Chairman of Abu Dhabi Municipality,0.24549,0.823636,0.342353,0.012727
164
- 4,&Town Planning,0.324314,0.841212,0.155686,0.012424
165
- 5,Partnership Agreement,0.516078,0.027879,0.441176,0.032424
166
- 5,SisterCities,0.169412,0.033333,0.239608,0.028485
167
- 5,INTERNATIONAL,0.17098,0.066667,0.237255,0.009091
168
- 5,Toolkit,0.83098,0.072727,0.127059,0.025758
169
- 5,Connect globally. Thrive locally.,0.169412,0.08697,0.239216,0.013333
170
- 5,THE CITY OF NEW YORK,0.438824,0.262121,0.240784,0.009697
171
- 5,OFFICE OF THE MAYOR,0.450196,0.27697,0.220392,0.009697
172
- 5,"NEW YORK, N.Y. 10007",0.461176,0.29303,0.196863,0.010303
173
- 5,THE NEW YORK CITY-LONDON SISTER CITY PARTNERSHIP,0.267451,0.355758,0.582745,0.011818
174
- 5,Memorandum of Understanding,0.420392,0.371212,0.274902,0.013333
175
- 5,The Sister City partnership between New York City and London will foster mutually,0.201176,0.402121,0.674118,0.014242
176
- 5,beneficial solutions to common challenges for these two great cosmopolitan entities.,0.201176,0.417273,0.66902,0.013636
177
- 5,"Consequently, the Sister City relationship between the two will be one of the most",0.201176,0.432727,0.652549,0.015152
178
- 5,"important in their network of global partnerships, as it strives to:",0.201176,0.448182,0.50902,0.015455
179
- 5,Encourage and publicize existing exchanges between London and New York City so,0.230588,0.480303,0.671373,0.015152
180
- 5,that they can flourish to benefit a wider cross-section of the citizens of both;,0.230588,0.496061,0.602353,0.015152
181
- 5,"Support and promote the development of new social, economic, academic and",0.230196,0.512424,0.618431,0.015455
182
- 5,community programs to encourage both cities' citizens to share their experiences as a,0.229804,0.527879,0.678039,0.014848
183
- 5,medium for learning from one another;,0.229804,0.543636,0.309412,0.013939
184
- 5,Generate an improvement of the operation of the cities' various government agencies,0.229804,0.56,0.676078,0.014545
185
- 5,by serving as a conduit of information;,0.22902,0.575758,0.307843,0.014848
186
- 5,"Identify themes, common to both, that can generate new initiatives to further and",0.229412,0.591818,0.640784,0.015152
187
- 5,"nurture the increasingly powerful financial, social and cultural relationships between",0.22902,0.607576,0.671373,0.014242
188
- 5,the cities;,0.22902,0.624545,0.076471,0.012424
189
- 5,Promote key mayoral priorities relevant to both London and New York City;,0.228627,0.639394,0.608627,0.015152
190
- 5,Provide financial or in kind support to community-led programs that advance the,0.228627,0.656061,0.641569,0.013636
191
- 5,aims of the Sister City partnership;,0.22902,0.672121,0.275294,0.013636
192
- 5,"With the above purposes in mind, the Mayor of the City of New York and the Mayor of",0.198824,0.702424,0.697647,0.014848
193
- 5,London solemnly confirm that these two cities are united by an official partnership by the,0.198824,0.718182,0.710196,0.014545
194
- 5,protocol of this Memorandum of Understanding.,0.198431,0.733939,0.384314,0.015152
195
- 5,This agreement will go into effect from the date of signatures.,0.310196,0.780606,0.488235,0.014545
196
- 5,Thedder Rudolph W. Giuliani,0.178824,0.795455,0.244314,0.100909
197
- 5,Signed in March of 2001,0.455686,0.796364,0.19451,0.013636
198
- 5,Ken Mayor Livingstone,0.672157,0.877576,0.132941,0.029091
199
- 5,Mayor,0.311373,0.894848,0.053333,0.012727
200
- 5,New York City,0.287843,0.909091,0.121176,0.013333
201
- 5,London,0.701961,0.909091,0.061569,0.010606
202
- 6,Partnership Agreement,0.515686,0.027576,0.441961,0.03303
203
- 6,SisterCities,0.169412,0.03303,0.24,0.028182
204
- 6,INTERNATIONAL,0.169804,0.066667,0.238431,0.009091
205
- 6,Toolkit,0.83098,0.072727,0.127451,0.025758
206
- 6,Connect globally. Thrive locally.,0.169412,0.08697,0.239608,0.013333
207
- 6,CHIC OF STATE,0.247451,0.190606,0.141961,0.036364
208
- 6,City of Long Beach,0.388627,0.196667,0.476471,0.066364
209
- 6,California,0.551373,0.257273,0.136471,0.033333
210
- 6,Sister City Agreement,0.321961,0.305455,0.378431,0.035152
211
- 6,between the,0.464706,0.352727,0.084314,0.009697
212
- 6,City of Long Beach,0.38,0.378485,0.252549,0.01697
213
- 6,"California, USA",0.4,0.397576,0.21098,0.016061
214
- 6,and the,0.48,0.415152,0.053333,0.009091
215
- 6,City of San Pablo de Manta,0.321569,0.428788,0.369804,0.01697
216
- 6,"Ecuador, South America",0.347451,0.447879,0.317255,0.015152
217
- 6,"In accordance with the authorization and approval expressed by the City of Long Beach,",0.261569,0.482121,0.536863,0.012121
218
- 6,"California, USA, and the City of San Pablo de Manta, Ecundor, South America, it is declared",0.217647,0.492727,0.581176,0.01303
219
- 6,"that a ""Sister City Agreement between the two cities is hereby established for the following",0.217647,0.502727,0.581569,0.012121
220
- 6,purposes:,0.216863,0.516061,0.058039,0.009394
221
- 6,(1) to promote and expand the effective and mutually beneficial cooperation between,0.278824,0.532727,0.520392,0.012424
222
- 6,the people of Long Beach and the people of San Pablo de Manta; and,0.218039,0.543636,0.40549,0.012424
223
- 6,"(2) to promote international goodwill, understanding, and expanded business",0.279216,0.56303,0.520784,0.012424
224
- 6,"relations between the two cities and their respective nations by the exchange of people, ideas, and",0.218039,0.573636,0.581569,0.012121
225
- 6,"information in a unide variety of economic, social, cultural, municipal, environmental,",0.218039,0.584242,0.581176,0.012121
226
- 6,"professional, technical, youth, and other endeavors; and",0.217647,0.594848,0.333333,0.012121
227
- 6,"(3) to foster and encourage charitable, scientific, trade and commerce, literary and",0.279608,0.613939,0.520784,0.012727
228
- 6,educational activities between the two cities;,0.218039,0.625455,0.265882,0.009697
229
- 6,This Sister City Agreement shall be officially established and shall become effective when,0.263137,0.644545,0.536863,0.012727
230
- 6,"this document has been duly executed by the Mayor of Long Beach, California, USA, and the",0.218824,0.654848,0.581961,0.012424
231
- 6,"Mayor of San Pablo de Manta, Ecundor, South America.",0.218431,0.665758,0.338824,0.012121
232
- 6,STATE OFFICE,0.276471,0.713636,0.050588,0.048788
233
- 6,Beverly 0 Neill,0.587451,0.736667,0.121961,0.013636
234
- 6,"Mayor, City of Long Beach",0.542353,0.751212,0.21098,0.013636
235
- 6,"California, USA",0.582745,0.765758,0.125098,0.01303
236
- 6,10.2aulus,0.490588,0.771818,0.220392,0.062424
237
- 6,Ing. Jorge O. Zambrano Cedeño,0.527059,0.825152,0.242745,0.013333
238
- 6,"Mayor, City of San Pablo de Manta",0.505098,0.839394,0.277647,0.013636
239
- 6,"Ecuador, South America",0.551765,0.854242,0.188235,0.011818
240
- 6,"Dated: September 19, 2000",0.544706,0.883333,0.202745,0.01303
241
- 7,Partnership Agreement,0.516078,0.027879,0.441176,0.032424
242
- 7,SisterCities,0.169412,0.03303,0.24,0.028485
243
- 7,INTERNATIONAL,0.170196,0.066667,0.237647,0.009091
244
- 7,Toolkit,0.83098,0.072727,0.127451,0.025758
245
- 7,Connect globally. Thrive locally.,0.169412,0.08697,0.239216,0.013333
246
- 7,REAFFIRMATION OF SISTER CITIES DECLARATION,0.324706,0.165152,0.483529,0.013939
247
- 7,adopted by,0.2,0.213333,0.080392,0.013636
248
- 7,THE HONORABLE RICHARD M. DALEY,0.396078,0.214242,0.335686,0.012424
249
- 7,MAYOR OF CHICAGO,0.472549,0.231212,0.18549,0.011515
250
- 7,and,0.199608,0.260909,0.026275,0.010606
251
- 7,THE HONORABLE ZHANG RONGMAO,0.401961,0.261212,0.323137,0.011212
252
- 7,MAYOR OF SHENYANG,0.463529,0.273636,0.202353,0.011212
253
- 7,ON,0.551765,0.298182,0.026667,0.011515
254
- 7,"JUNE 5, 1995",0.500392,0.323636,0.128235,0.014848
255
- 7,"On this the tenth anniversary of the signing of a sister city agreement, in order to further",0.255686,0.36303,0.67098,0.015152
256
- 7,the traditional links of friendship between Chicago and Shenyang and to reaffirm their mutual,0.198824,0.378788,0.727843,0.015455
257
- 7,"aspiration to work in unison for the benefit of their cities and nations, the Honorable Mayor",0.199608,0.394848,0.727843,0.014848
258
- 7,"Richard M. Daley, Mayor of the City of Chicago, and the Honorable Zhang Rongmao, Mayor",0.199216,0.411212,0.727451,0.014242
259
- 7,"of the City of Shenyang, on this fifth day of June 1995, do hereby acknowledge and reaffirm the",0.199216,0.42697,0.72549,0.014848
260
- 7,sister cities agreement between the City of Chicago and the City of Shenyang.,0.199608,0.443636,0.57451,0.014242
261
- 7,"The City of Chicago and the City of Shenyang on the basis of friendly cooperation,",0.256078,0.473939,0.665098,0.015152
262
- 7,equality and mutual benefit will continue to develop a sister cities relationship to promote and,0.2,0.490303,0.724706,0.014242
263
- 7,broaden economic cooperation and cultural exchanges between the two cities.,0.199216,0.506061,0.57451,0.014242
264
- 7,The two cities do hereby declare their interest in exploring the establishment of business,0.255294,0.537273,0.668235,0.015455
265
- 7,and trade relations between Chicago and Shenyang.,0.198824,0.554545,0.387843,0.013636
266
- 7,"In addition, exchanges will be promoted in the area of the arts such as exhibits, music,",0.254118,0.583939,0.666667,0.015455
267
- 7,dance and other cultural activities.,0.198431,0.601212,0.256471,0.010606
268
- 7,"In addition, exchanges will be promoted in education and the establishment of contacts",0.254118,0.630303,0.668627,0.015758
269
- 7,within educational institutions encouraged.,0.198824,0.647273,0.32,0.014242
270
- 7,"In addition, we declare our intention to promote exchanges in such fields as science and",0.253725,0.678182,0.668627,0.014848
271
- 7,"technology, sports, health, youth and any areas that will contribute to the prosperity and the",0.198039,0.693636,0.722745,0.015152
272
- 7,further development of friendship between the people of our two cities.,0.194902,0.711515,0.525098,0.013636
273
- 7,3h.5.,0.593725,0.750606,0.218039,0.06303
274
- 7,THE HONORABLE ZHANG RONGMAO,0.588627,0.819394,0.287843,0.011818
275
- 7,THE HONORABLE RICHARD M. DALEY,0.197255,0.821515,0.303529,0.010606
276
- 7,MAYOR OF SHENYANG,0.587451,0.835455,0.177647,0.010303
277
- 7,MAYOR OF CHICAGO,0.195686,0.835758,0.164706,0.010606
 
1
+ page,text,left,top,width,height,line
2
+ 1,Partnership Agreement,0.516078,0.027879,0.440784,0.032424,1
3
+ 1,SisterCities,0.169804,0.033333,0.238431,0.028182,2
4
+ 1,INTERNATIONAL,0.170196,0.06697,0.237647,0.008788,3
5
+ 1,Toolkit,0.830588,0.07303,0.126667,0.025152,4
6
+ 1,Connect globally. Thrive locally.,0.169804,0.08697,0.238824,0.01303,5
7
+ 1,Types of Affiliations,0.117255,0.157576,0.241961,0.02,6
8
+ 1,Sister City Relationship,0.117647,0.187273,0.196863,0.013939,7
9
+ 1,"A Sister City relationship is formed when the mayor or highest elected official (or, if elections",0.117255,0.211212,0.738824,0.013636,8
10
+ 1,"do not take place, highest appointed official) from a U.S. community and a community in",0.117647,0.227273,0.70902,0.013939,9
11
+ 1,another country or territory sign a formal agreement on behalf of their communities endorsing a,0.117647,0.243636,0.761961,0.013636,10
12
+ 1,"""sister city/sister cities"" relationship. Sister city agreements shall be considered active/valid",0.118039,0.259697,0.731373,0.013939,11
13
+ 1,unless otherwise indicated by one or both of the respective communities.,0.118039,0.276061,0.58549,0.013636,12
14
+ 1,Sister Cities International shall formally recognize only those relationships by cities/members in,0.118039,0.299697,0.758824,0.013636,13
15
+ 1,good standing (i.e. who are current on membership dues) in its Membership Directory or on its,0.117647,0.316061,0.754902,0.013636,14
16
+ 1,"website. However, Sister Cities International shall not assert as invalid or otherwise impugn the",0.116863,0.332121,0.760784,0.013636,15
17
+ 1,legitimacy of those relationships formed by non-members.,0.118039,0.348485,0.466275,0.013636,16
18
+ 1,Friendship City,0.118039,0.372121,0.127059,0.013939,17
19
+ 1,"A Friendship City or Friendship Cities relationship is often formed by cities as a ""stepping",0.117255,0.395758,0.714118,0.013636,18
20
+ 1,"stone"" to a more formal ""Sister City"" agreement. Typically Friendship City agreements are",0.117647,0.411515,0.720392,0.014242,19
21
+ 1,referred to as such in the formal documents that are signed. Sister Cities International shall,0.118039,0.428182,0.72549,0.013636,20
22
+ 1,recognize Friendship City relationships by members in its Membership Directory and website.,0.118039,0.444242,0.747843,0.013636,21
23
+ 1,As per Sister Cities International Board of Directors:,0.117255,0.467879,0.413333,0.013636,22
24
+ 1,Sister Cities International will recognize a new sister cities affiliation between a,0.169412,0.492121,0.626667,0.013333,23
25
+ 1,"U.S. and an international community, even though another affiliation may exist",0.169412,0.507879,0.625098,0.013636,24
26
+ 1,"between that international community and a different U.S. community, only if a",0.169412,0.524545,0.62902,0.013636,25
27
+ 1,cooperative agreement among all involved communities is filed with Sister Cities,0.16902,0.540606,0.643137,0.013636,26
28
+ 1,"International. If a cooperative agreement is denied, or no response to the request",0.170196,0.556667,0.647843,0.013333,27
29
+ 1,"is received within a reasonable amount of time, Sister Cities International will",0.169412,0.57303,0.612157,0.012727,28
30
+ 1,recognize the partnership as a friendship city and it will be delineated as such,0.169412,0.589091,0.621176,0.013636,29
31
+ 1,with a symbol in the membership directories.,0.168627,0.605455,0.358824,0.013333,30
32
+ 1,The cooperative agreement must be sent by the Mayor/County,0.168627,0.628788,0.509412,0.013939,31
33
+ 1,"Executive/Governor of the requesting community, and must be sent to the",0.169804,0.645152,0.595294,0.014242,32
34
+ 1,Mayor/County Executive/Governor of each of the existing partnership,0.169804,0.661212,0.555294,0.013636,33
35
+ 1,communities. Although the Mayor/County Executive/Governor may request input,0.16902,0.677879,0.647451,0.013636,34
36
+ 1,"from, or may be given input by, the sister cities program, it is up to the discretion",0.168627,0.693939,0.647059,0.013939,35
37
+ 1,of the Mayor/County Executive/Governor to sign the cooperative agreement.,0.16902,0.709697,0.612941,0.013939,36
38
+ 1,Although Sister Cities International will help with the cooperative agreement,0.168627,0.726364,0.605882,0.013636,37
39
+ 1,"process, it is up to the requesting community to get the agreement signed. Sister",0.169412,0.742121,0.650196,0.013939,38
40
+ 1,"Cities International will not, in any way, force a community to ""share"" and sign",0.16902,0.758182,0.623922,0.014242,39
41
+ 1,the cooperative agreement.,0.168627,0.774848,0.219216,0.013333,40
42
+ 1,"To place a relationship into Emeritus status, the mayor or highest elected official of the U.S.",0.117255,0.798485,0.736471,0.013939,41
43
+ 1,community must write a letter to the mayor of the foreign city indicating that they wish to,0.118039,0.814545,0.70902,0.013636,42
44
+ 1,"remain sister cities, but understand that the relationship will remain inactive until such time as",0.118039,0.831212,0.747451,0.013333,43
45
+ 1,both cities are able to sustain an active relationship. Sister Cities International should be,0.118039,0.847273,0.705098,0.013636,44
46
+ 1,informed in writing by the mayor of the U.S. city of the situation. Sister Cities International will,0.118039,0.863333,0.746275,0.013636,45
47
+ 2,Partnership Agreement,0.516078,0.027879,0.440784,0.032424,1
48
+ 2,SisterCities,0.169804,0.033333,0.238824,0.028182,2
49
+ 2,INTERNATIONAL,0.170196,0.06697,0.237647,0.008788,3
50
+ 2,Toolkit,0.83098,0.072727,0.127059,0.025455,4
51
+ 2,Connect globally. Thrive locally.,0.169804,0.08697,0.239216,0.01303,5
52
+ 2,then place the partnership into Emeritus Status and will reflect this status in directories and all,0.117255,0.132424,0.751373,0.013333,6
53
+ 2,lists of sister city programs.,0.118039,0.148788,0.218431,0.013333,7
54
+ 2,"If a community wishes to terminate a sister city relationship, then a letter from the mayor or",0.118431,0.172424,0.732549,0.013333,8
55
+ 2,highest elected official of the U.S. city should be sent to the mayor of the sister city. Sister,0.118039,0.188485,0.721569,0.013636,9
56
+ 2,Cities International should be informed of this action in writing by the mayor of the U.S. city,0.118039,0.204848,0.72902,0.013333,10
57
+ 2,and Sister Cities International will then remove the partnership from its directories and all lists,0.117647,0.221212,0.746275,0.013333,11
58
+ 2,of sister city programs. We do not recommend terminating a relationship simply because it is,0.117647,0.237273,0.743529,0.013333,12
59
+ 2,"dormant. Many partnerships wax and wane over the years, and in many cases a dormant",0.117647,0.253939,0.713333,0.013333,13
60
+ 2,partnership may be reinvigorated by local members years after it has been inactive.,0.118039,0.269697,0.664314,0.013636,14
61
+ 2,General Guidelines,0.118039,0.295152,0.231765,0.016061,15
62
+ 2,In order for a sister city/county/state partnership to be recognized by Sister Cities International,0.118431,0.324242,0.754902,0.013636,16
63
+ 2,"(SCI), the two communities must sign formal documents which clearly endorse the link. This",0.118039,0.340606,0.74,0.013636,17
64
+ 2,presumes several key items: that the U.S. community is already a member of SCI and has,0.118039,0.35697,0.718039,0.013636,18
65
+ 2,followed proper procedures (e.g. passed a city council resolution declaring the intent to twin,0.117255,0.373333,0.737647,0.013636,19
66
+ 2,with the specific city); that both communities share a mutual commitment to the relationship;,0.117255,0.389394,0.740784,0.013636,20
67
+ 2,and that both have secured the necessary support structure to build a lasting relationship. You,0.117647,0.405455,0.758039,0.013333,21
68
+ 2,should check with your local sister city program to see if they have any additional requirements,0.117647,0.421818,0.760784,0.013636,22
69
+ 2,before pursuing a sister city relationship.,0.118039,0.437879,0.323137,0.013636,23
70
+ 2,"SCI often refers to these agreements as a ""Sister City Agreement"" or ""Memorandum of",0.118039,0.461515,0.696863,0.013939,24
71
+ 2,"Understanding."" However, as the following examples show, the actual name and format of",0.118039,0.477576,0.729804,0.013636,25
72
+ 2,your documents is left up to you.,0.117255,0.494242,0.262745,0.013636,26
73
+ 2,A few things to keep in mind as you draft your agreement:,0.117255,0.517879,0.463137,0.013636,27
74
+ 2,"Your agreement can range from the ceremonial, with language focusing on each city's",0.176471,0.542121,0.69098,0.013939,28
75
+ 2,"commitment to fostering understanding, cooperation, and mutual benefit to the precise,",0.176471,0.558485,0.701961,0.013333,29
76
+ 2,"with particular areas of interest, specific programs/activities, or more concrete goals",0.176078,0.574848,0.673725,0.013636,30
77
+ 2,related to anything from numbers of exchanges to economic development.,0.176863,0.591212,0.596863,0.013636,31
78
+ 2,"Don't try to include everything you plan to do. Some specifics, like particular areas of",0.177255,0.620303,0.681176,0.013939,32
79
+ 2,"interest or participating institutions are good to include. However, there's no need to",0.176471,0.636667,0.675686,0.013636,33
80
+ 2,include all the programs you plan to do if it makes the document too lengthy or limits,0.176863,0.652727,0.678824,0.013939,34
81
+ 2,the scope of projects. This is a formal document to establish the relationship; specific,0.176078,0.668788,0.684706,0.013636,35
82
+ 2,"tasks, responsibilities, or other nuts-and-bolts text related to implementation or",0.176078,0.685455,0.635686,0.013333,36
83
+ 2,administration of the partnership can be expressed more fully in a separate,0.176471,0.701212,0.600392,0.013636,37
84
+ 2,memorandum between the respective sister city committees. Your partnership,0.177255,0.717576,0.626667,0.013636,38
85
+ 2,agreement is a historical document and should not be dated or limited by being aligned,0.176471,0.733636,0.699216,0.013636,39
86
+ 2,with very specific tasks.,0.176078,0.750606,0.190196,0.013333,40
87
+ 2,Work with your counterparts. Remember that this is signed by both cities. You should,0.176078,0.779697,0.68549,0.013636,41
88
+ 2,share drafts of your agreement with your international partners and solicit feedback on,0.176471,0.795758,0.691765,0.013333,42
89
+ 2,what they'd like to see in the agreement. Be flexible to cultural or municipal priorities.,0.176471,0.811818,0.679216,0.013939,43
90
+ 2,Ask your counterparts to translate the agreement if it is drafted in English. It is,0.176078,0.841515,0.623137,0.013636,44
91
+ 2,important for the citizens of your partner community to be able to read and understand,0.176863,0.857576,0.693725,0.013939,1
92
+ 2,the commitment their city has made. Have someone in your own community who,0.176078,0.873939,0.649804,0.013636,2
93
+ 3,Partnership Agreement,0.516078,0.027879,0.441176,0.032121,3
94
+ 3,SisterCities,0.169804,0.033333,0.239216,0.028182,4
95
+ 3,INTERNATIONAL,0.170196,0.06697,0.237255,0.008788,5
96
+ 3,Toolkit,0.83098,0.07303,0.126667,0.025152,6
97
+ 3,Connect globally. Thrive locally.,0.169804,0.08697,0.239216,0.01303,7
98
+ 3,speaks that language check the foreign-language version to make sure it mirrors what,0.176471,0.132424,0.688235,0.013333,8
99
+ 3,you have in your own agreement.,0.176471,0.148788,0.264706,0.013333,9
100
+ 3,Keep it to one page. Ceremonial documents such as these partnership agreements,0.176863,0.178485,0.66549,0.013636,10
101
+ 3,work best if they can be posted in their entirety.,0.176078,0.194545,0.380392,0.013636,11
102
+ 3,Most sister city agreements include some acknowledgement of the founding principles,0.177255,0.224242,0.694902,0.013636,12
103
+ 3,"of the sister city movement- to promote peace through mutual respect, understanding,",0.176471,0.240303,0.698431,0.013333,13
104
+ 3,and cooperation.,0.176471,0.25697,0.13451,0.013333,14
105
+ 3,Consider using official letterhead and/or other embellishments such as city seals or,0.176863,0.286061,0.665882,0.013333,15
106
+ 3,logos to reflect your enhance the document. Sister city agreements are often posted at,0.176863,0.302121,0.695686,0.013636,16
107
+ 3,city hall or other municipal offices and should reflect their historical importance,0.176471,0.318485,0.630588,0.013333,17
108
+ 3,Look at other agreements your city has signed. These agreements may give you an idea,0.177255,0.347879,0.705098,0.013636,18
109
+ 3,"of what is acceptable or possible, and they may be in an easily replicable format. If you",0.176471,0.364242,0.695686,0.013636,19
110
+ 3,"cannot access older agreements please contact Sister Cities International, we may",0.176863,0.380303,0.663137,0.013636,20
111
+ 3,"have them on file, although we do not have copies of all partnership agreements.",0.176863,0.396667,0.64549,0.013636,21
112
+ 3,Documents must be signed by the top elected official of both communities.,0.177255,0.426364,0.601569,0.013333,22
113
+ 3,"Check with your mayor, city council, town clerk, et al. to make sure that the agreement",0.176863,0.455758,0.694118,0.013636,23
114
+ 3,"is OK with them. The mayor is the one putting his or her name on the paper, and you",0.176863,0.471818,0.677255,0.013333,24
115
+ 3,don't want to spend time developing an agreement which will never be signed.,0.176863,0.488182,0.629412,0.013636,25
116
+ 3,Official documents are usually signed during a formal ceremony recognizing the,0.176863,0.517576,0.638431,0.013636,26
117
+ 3,partnership. Be sure both communities receive a signed set of the official documents,0.177255,0.533939,0.683922,0.013636,27
118
+ 3,for their records.,0.176078,0.550606,0.131373,0.010606,28
119
+ 3,Remember to send your signed agreement to Sister Cities International. After we,0.177255,0.579697,0.645098,0.013636,29
120
+ 3,receive your agreement we will post the relationship in the City Directory and make sure,0.176863,0.595758,0.703137,0.013636,30
121
+ 3,it is included in our Annual Membership Directory.,0.176863,0.612121,0.398039,0.013333,31
122
+ 3,Remember that each city's sister city program is independent and can impose requirements,0.118431,0.640606,0.736471,0.013939,32
123
+ 3,"like the establishment of a committee, a review period, sustainability/funding plan, among",0.118039,0.65697,0.715686,0.013636,33
124
+ 3,"others, before sanctioning a sister city agreement. Check with your local program or mayor's",0.117647,0.672727,0.743529,0.014242,34
125
+ 3,office to see if this is the case.,0.117647,0.689091,0.241176,0.011515,35
126
+ 3,On the following pages you'll find a series of partnership agreements to give you an idea of,0.118039,0.717879,0.728627,0.013939,36
127
+ 3,"what is possible. While you should feel free to use some of the formatting and language, we",0.117255,0.734242,0.73451,0.013636,37
128
+ 3,encourage you to make your agreement your own and be creative with what you produce. If,0.117647,0.750606,0.737647,0.013636,38
129
+ 3,you are unsure about your agreement or want advice you can always solicit feedback by,0.117647,0.766667,0.708627,0.013636,39
130
+ 3,sending it to our Membership Director at [email protected] or contacting us at (202),0.117647,0.782727,0.732157,0.013636,40
131
+ 3,347-8630.,0.117647,0.799394,0.080392,0.010303,41
132
+ 4,Partnership Agreement,0.516471,0.027879,0.440784,0.032727,1
133
+ 4,SisterCities,0.169412,0.033333,0.239608,0.028485,2
134
+ 4,INTERNATIONAL,0.170196,0.066667,0.238431,0.009091,3
135
+ 4,Toolkit,0.830588,0.072727,0.127843,0.025758,4
136
+ 4,Connect globally. Thrive locally.,0.169412,0.08697,0.239608,0.013333,5
137
+ 4,"jull bubzig 2000 3,312",0.378039,0.291212,0.32549,0.019394,6
138
+ 4,ABU DHABI MUNICIPALITY & TOWN PLANNING,0.376471,0.316667,0.327451,0.016667,7
139
+ 4,AN AGREEMENT FOR THE ESTABLISHMENT OF,0.260784,0.373636,0.52549,0.012727,8
140
+ 4,SISTER CITIES RELATIONSHIP,0.337647,0.393636,0.342745,0.012121,9
141
+ 4,BETWEEN,0.454902,0.413636,0.110588,0.011212,10
142
+ 4,THE CITY OF ABU DHABI ( U. A.E),0.337255,0.432727,0.375686,0.013939,11
143
+ 4,AND,0.487843,0.452727,0.048235,0.011212,12
144
+ 4,"HOUSTON, TEXAS ( U.S.A)",0.385882,0.471515,0.298039,0.014848,13
145
+ 4,"The Sister City Program, administered by Sister Cities International, was initiated",0.221961,0.525455,0.597255,0.01303,14
146
+ 4,By the President of the United States of America in 1956 to encourage greater,0.222745,0.539394,0.561961,0.012727,15
147
+ 4,Friendship and understanding between the United States and other nations through,0.222745,0.553333,0.608235,0.012727,16
148
+ 4,Direct personal contact: and,0.222745,0.567576,0.20549,0.012424,17
149
+ 4,"In order to foster those goals, the people of Abu Dhabi and Houston, in a gesture of",0.222353,0.594242,0.603529,0.012424,18
150
+ 4,"Friendship and goodwill, agree to collaborate for the mutual benefit of their",0.222745,0.608182,0.547843,0.01303,19
151
+ 4,"Communities by exploring education, economic and cultural opportunities.",0.222353,0.622121,0.541961,0.012121,20
152
+ 4,"Abu Dhabi and Houston, sharing a common interest in energy, technology and",0.221569,0.648788,0.574118,0.012424,21
153
+ 4,"medicine, and the desire to promote mutual understanding among our citizens do",0.222353,0.66303,0.588235,0.012121,22
154
+ 4,"hereby proclaim themselves Sister Cities beginning on the 13th day of March 2001,",0.221961,0.673636,0.594118,0.015758,23
155
+ 4,the date of Houston City Council resolution estatblishing the Sister City,0.221961,0.690303,0.519608,0.01303,24
156
+ 4,relationship became effective.,0.221569,0.705152,0.217647,0.012424,25
157
+ 4,"Signed on this 26 of October 2002, in duplicate in the Arabic and English",0.221569,0.732121,0.533333,0.01303,26
158
+ 4,"Languages, both text being equally authentic.",0.221961,0.746667,0.328627,0.012727,27
159
+ 4,A,0.344314,0.768485,0.084706,0.030303,28
160
+ 4,Sheikh Mohammed bin Butti AI Hamed,0.245882,0.806364,0.366275,0.010909,29
161
+ 4,Lee P.Brown,0.729412,0.806364,0.118824,0.010303,30
162
+ 4,Mayor of Houston,0.704706,0.823333,0.166667,0.012424,31
163
+ 4,Chairman of Abu Dhabi Municipality,0.24549,0.823636,0.342353,0.012727,32
164
+ 4,&Town Planning,0.324314,0.841212,0.155686,0.012424,33
165
+ 5,Partnership Agreement,0.516078,0.027879,0.441176,0.032424,1
166
+ 5,SisterCities,0.169412,0.033333,0.239608,0.028485,2
167
+ 5,INTERNATIONAL,0.17098,0.066667,0.237255,0.009091,3
168
+ 5,Toolkit,0.83098,0.072727,0.127059,0.025758,4
169
+ 5,Connect globally. Thrive locally.,0.169412,0.08697,0.239216,0.013333,5
170
+ 5,THE CITY OF NEW YORK,0.438824,0.262121,0.240784,0.009697,6
171
+ 5,OFFICE OF THE MAYOR,0.450196,0.27697,0.220392,0.009697,7
172
+ 5,"NEW YORK, N.Y. 10007",0.461176,0.29303,0.196863,0.010303,8
173
+ 5,THE NEW YORK CITY-LONDON SISTER CITY PARTNERSHIP,0.267451,0.355758,0.582745,0.011818,9
174
+ 5,Memorandum of Understanding,0.420392,0.371212,0.274902,0.013333,10
175
+ 5,The Sister City partnership between New York City and London will foster mutually,0.201176,0.402121,0.674118,0.014242,11
176
+ 5,beneficial solutions to common challenges for these two great cosmopolitan entities.,0.201176,0.417273,0.66902,0.013636,12
177
+ 5,"Consequently, the Sister City relationship between the two will be one of the most",0.201176,0.432727,0.652549,0.015152,13
178
+ 5,"important in their network of global partnerships, as it strives to:",0.201176,0.448182,0.50902,0.015455,14
179
+ 5,Encourage and publicize existing exchanges between London and New York City so,0.230588,0.480303,0.671373,0.015152,15
180
+ 5,that they can flourish to benefit a wider cross-section of the citizens of both;,0.230588,0.496061,0.602353,0.015152,16
181
+ 5,"Support and promote the development of new social, economic, academic and",0.230196,0.512424,0.618431,0.015455,17
182
+ 5,community programs to encourage both cities' citizens to share their experiences as a,0.229804,0.527879,0.678039,0.014848,18
183
+ 5,medium for learning from one another;,0.229804,0.543636,0.309412,0.013939,19
184
+ 5,Generate an improvement of the operation of the cities' various government agencies,0.229804,0.56,0.676078,0.014545,20
185
+ 5,by serving as a conduit of information;,0.22902,0.575758,0.307843,0.014848,21
186
+ 5,"Identify themes, common to both, that can generate new initiatives to further and",0.229412,0.591818,0.640784,0.015152,22
187
+ 5,"nurture the increasingly powerful financial, social and cultural relationships between",0.22902,0.607576,0.671373,0.014242,23
188
+ 5,the cities;,0.22902,0.624545,0.076471,0.012424,24
189
+ 5,Promote key mayoral priorities relevant to both London and New York City;,0.228627,0.639394,0.608627,0.015152,25
190
+ 5,Provide financial or in kind support to community-led programs that advance the,0.228627,0.656061,0.641569,0.013636,26
191
+ 5,aims of the Sister City partnership;,0.22902,0.672121,0.275294,0.013636,27
192
+ 5,"With the above purposes in mind, the Mayor of the City of New York and the Mayor of",0.198824,0.702424,0.697647,0.014848,28
193
+ 5,London solemnly confirm that these two cities are united by an official partnership by the,0.198824,0.718182,0.710196,0.014545,29
194
+ 5,protocol of this Memorandum of Understanding.,0.198431,0.733939,0.384314,0.015152,30
195
+ 5,This agreement will go into effect from the date of signatures.,0.310196,0.780606,0.488235,0.014545,31
196
+ 5,Thedder Rudolph W. Giuliani,0.178824,0.795455,0.244314,0.100909,32
197
+ 5,Signed in March of 2001,0.455686,0.796364,0.19451,0.013636,33
198
+ 5,Ken Mayor Livingstone,0.672157,0.877576,0.132941,0.029091,34
199
+ 5,Mayor,0.311373,0.894848,0.053333,0.012727,35
200
+ 5,New York City,0.287843,0.909091,0.121176,0.013333,36
201
+ 5,London,0.701961,0.909091,0.061569,0.010606,37
202
+ 6,Partnership Agreement,0.515686,0.027576,0.441961,0.03303,1
203
+ 6,SisterCities,0.169412,0.03303,0.24,0.028182,2
204
+ 6,INTERNATIONAL,0.169804,0.066667,0.238431,0.009091,3
205
+ 6,Toolkit,0.83098,0.072727,0.127451,0.025758,4
206
+ 6,Connect globally. Thrive locally.,0.169412,0.08697,0.239608,0.013333,5
207
+ 6,CHIC OF STATE,0.247451,0.190606,0.141961,0.036364,6
208
+ 6,City of Long Beach,0.388627,0.196667,0.476471,0.066364,7
209
+ 6,California,0.551373,0.257273,0.136471,0.033333,8
210
+ 6,Sister City Agreement,0.321961,0.305455,0.378431,0.035152,9
211
+ 6,between the,0.464706,0.352727,0.084314,0.009697,10
212
+ 6,City of Long Beach,0.38,0.378485,0.252549,0.01697,11
213
+ 6,"California, USA",0.4,0.397576,0.21098,0.016061,12
214
+ 6,and the,0.48,0.415152,0.053333,0.009091,13
215
+ 6,City of San Pablo de Manta,0.321569,0.428788,0.369804,0.01697,14
216
+ 6,"Ecuador, South America",0.347451,0.447879,0.317255,0.015152,15
217
+ 6,"In accordance with the authorization and approval expressed by the City of Long Beach,",0.261569,0.482121,0.536863,0.012121,16
218
+ 6,"California, USA, and the City of San Pablo de Manta, Ecundor, South America, it is declared",0.217647,0.492727,0.581176,0.01303,17
219
+ 6,"that a ""Sister City Agreement between the two cities is hereby established for the following",0.217647,0.502727,0.581569,0.012121,18
220
+ 6,purposes:,0.216863,0.516061,0.058039,0.009394,19
221
+ 6,(1) to promote and expand the effective and mutually beneficial cooperation between,0.278824,0.532727,0.520392,0.012424,20
222
+ 6,the people of Long Beach and the people of San Pablo de Manta; and,0.218039,0.543636,0.40549,0.012424,21
223
+ 6,"(2) to promote international goodwill, understanding, and expanded business",0.279216,0.56303,0.520784,0.012424,22
224
+ 6,"relations between the two cities and their respective nations by the exchange of people, ideas, and",0.218039,0.573636,0.581569,0.012121,23
225
+ 6,"information in a unide variety of economic, social, cultural, municipal, environmental,",0.218039,0.584242,0.581176,0.012121,24
226
+ 6,"professional, technical, youth, and other endeavors; and",0.217647,0.594848,0.333333,0.012121,25
227
+ 6,"(3) to foster and encourage charitable, scientific, trade and commerce, literary and",0.279608,0.613939,0.520784,0.012727,26
228
+ 6,educational activities between the two cities;,0.218039,0.625455,0.265882,0.009697,27
229
+ 6,This Sister City Agreement shall be officially established and shall become effective when,0.263137,0.644545,0.536863,0.012727,28
230
+ 6,"this document has been duly executed by the Mayor of Long Beach, California, USA, and the",0.218824,0.654848,0.581961,0.012424,29
231
+ 6,"Mayor of San Pablo de Manta, Ecundor, South America.",0.218431,0.665758,0.338824,0.012121,30
232
+ 6,STATE OFFICE,0.276471,0.713636,0.050588,0.048788,31
233
+ 6,Beverly 0 Neill,0.587451,0.736667,0.121961,0.013636,32
234
+ 6,"Mayor, City of Long Beach",0.542353,0.751212,0.21098,0.013636,33
235
+ 6,"California, USA",0.582745,0.765758,0.125098,0.01303,34
236
+ 6,10.2aulus,0.490588,0.771818,0.220392,0.062424,35
237
+ 6,Ing. Jorge O. Zambrano Cedeño,0.527059,0.825152,0.242745,0.013333,36
238
+ 6,"Mayor, City of San Pablo de Manta",0.505098,0.839394,0.277647,0.013636,37
239
+ 6,"Ecuador, South America",0.551765,0.854242,0.188235,0.011818,38
240
+ 6,"Dated: September 19, 2000",0.544706,0.883333,0.202745,0.01303,39
241
+ 7,Partnership Agreement,0.516078,0.027879,0.441176,0.032424,1
242
+ 7,SisterCities,0.169412,0.03303,0.24,0.028485,2
243
+ 7,INTERNATIONAL,0.170196,0.066667,0.237647,0.009091,3
244
+ 7,Toolkit,0.83098,0.072727,0.127451,0.025758,4
245
+ 7,Connect globally. Thrive locally.,0.169412,0.08697,0.239216,0.013333,5
246
+ 7,REAFFIRMATION OF SISTER CITIES DECLARATION,0.324706,0.165152,0.483529,0.013939,6
247
+ 7,adopted by,0.2,0.213333,0.080392,0.013636,7
248
+ 7,THE HONORABLE RICHARD M. DALEY,0.396078,0.214242,0.335686,0.012424,8
249
+ 7,MAYOR OF CHICAGO,0.472549,0.231212,0.18549,0.011515,9
250
+ 7,and,0.199608,0.260909,0.026275,0.010606,10
251
+ 7,THE HONORABLE ZHANG RONGMAO,0.401961,0.261212,0.323137,0.011212,11
252
+ 7,MAYOR OF SHENYANG,0.463529,0.273636,0.202353,0.011212,12
253
+ 7,ON,0.551765,0.298182,0.026667,0.011515,13
254
+ 7,"JUNE 5, 1995",0.500392,0.323636,0.128235,0.014848,14
255
+ 7,"On this the tenth anniversary of the signing of a sister city agreement, in order to further",0.255686,0.36303,0.67098,0.015152,15
256
+ 7,the traditional links of friendship between Chicago and Shenyang and to reaffirm their mutual,0.198824,0.378788,0.727843,0.015455,16
257
+ 7,"aspiration to work in unison for the benefit of their cities and nations, the Honorable Mayor",0.199608,0.394848,0.727843,0.014848,17
258
+ 7,"Richard M. Daley, Mayor of the City of Chicago, and the Honorable Zhang Rongmao, Mayor",0.199216,0.411212,0.727451,0.014242,18
259
+ 7,"of the City of Shenyang, on this fifth day of June 1995, do hereby acknowledge and reaffirm the",0.199216,0.42697,0.72549,0.014848,19
260
+ 7,sister cities agreement between the City of Chicago and the City of Shenyang.,0.199608,0.443636,0.57451,0.014242,20
261
+ 7,"The City of Chicago and the City of Shenyang on the basis of friendly cooperation,",0.256078,0.473939,0.665098,0.015152,21
262
+ 7,equality and mutual benefit will continue to develop a sister cities relationship to promote and,0.2,0.490303,0.724706,0.014242,22
263
+ 7,broaden economic cooperation and cultural exchanges between the two cities.,0.199216,0.506061,0.57451,0.014242,23
264
+ 7,The two cities do hereby declare their interest in exploring the establishment of business,0.255294,0.537273,0.668235,0.015455,24
265
+ 7,and trade relations between Chicago and Shenyang.,0.198824,0.554545,0.387843,0.013636,25
266
+ 7,"In addition, exchanges will be promoted in the area of the arts such as exhibits, music,",0.254118,0.583939,0.666667,0.015455,26
267
+ 7,dance and other cultural activities.,0.198431,0.601212,0.256471,0.010606,27
268
+ 7,"In addition, exchanges will be promoted in education and the establishment of contacts",0.254118,0.630303,0.668627,0.015758,28
269
+ 7,within educational institutions encouraged.,0.198824,0.647273,0.32,0.014242,29
270
+ 7,"In addition, we declare our intention to promote exchanges in such fields as science and",0.253725,0.678182,0.668627,0.014848,30
271
+ 7,"technology, sports, health, youth and any areas that will contribute to the prosperity and the",0.198039,0.693636,0.722745,0.015152,31
272
+ 7,further development of friendship between the people of our two cities.,0.194902,0.711515,0.525098,0.013636,32
273
+ 7,3h.5.,0.593725,0.750606,0.218039,0.06303,33
274
+ 7,THE HONORABLE ZHANG RONGMAO,0.588627,0.819394,0.287843,0.011818,34
275
+ 7,THE HONORABLE RICHARD M. DALEY,0.197255,0.821515,0.303529,0.010606,35
276
+ 7,MAYOR OF SHENYANG,0.587451,0.835455,0.177647,0.010303,36
277
+ 7,MAYOR OF CHICAGO,0.195686,0.835758,0.164706,0.010606,37
example_data/example_outputs/doubled_output_joined.pdf_ocr_output.csv CHANGED
The diff for this file is too large to render. See raw diff
 
tools/config.py CHANGED
@@ -161,11 +161,9 @@ if OUTPUT_FOLDER == "TEMP" or INPUT_FOLDER == "TEMP":
161
  INPUT_FOLDER = temp_dir + "/"
162
 
163
  GRADIO_TEMP_DIR = get_or_create_env_var(
164
- "GRADIO_TEMP_DIR", "tmp/gradio_tmp/"
165
  ) # Default Gradio temp folder
166
- MPLCONFIGDIR = get_or_create_env_var(
167
- "MPLCONFIGDIR", "tmp/matplotlib_cache/"
168
- ) # Matplotlib cache folder
169
 
170
  ###
171
  # LOGGING OPTIONS
@@ -545,6 +543,8 @@ except Exception as e:
545
  # Get some environment variables and Launch the Gradio app
546
  COGNITO_AUTH = get_or_create_env_var("COGNITO_AUTH", "0")
547
 
 
 
548
  RUN_DIRECT_MODE = get_or_create_env_var("RUN_DIRECT_MODE", "0")
549
 
550
  # Direct mode configuration options
@@ -574,7 +574,7 @@ ROOT_PATH = get_or_create_env_var("ROOT_PATH", "")
574
 
575
  DEFAULT_CONCURRENCY_LIMIT = int(get_or_create_env_var("DEFAULT_CONCURRENCY_LIMIT", "3"))
576
 
577
- FILE_INPUT_HEIGHT = get_or_create_env_var("FILE_INPUT_HEIGHT", "200")
578
 
579
  ### ALLOW LIST
580
 
 
161
  INPUT_FOLDER = temp_dir + "/"
162
 
163
  GRADIO_TEMP_DIR = get_or_create_env_var(
164
+ "GRADIO_TEMP_DIR", ""
165
  ) # Default Gradio temp folder
166
+ MPLCONFIGDIR = get_or_create_env_var("MPLCONFIGDIR", "") # Matplotlib cache folder
 
 
167
 
168
  ###
169
  # LOGGING OPTIONS
 
543
  # Get some environment variables and Launch the Gradio app
544
  COGNITO_AUTH = get_or_create_env_var("COGNITO_AUTH", "0")
545
 
546
+ SHOW_EXAMPLES = get_or_create_env_var("SHOW_EXAMPLES", "True")
547
+
548
  RUN_DIRECT_MODE = get_or_create_env_var("RUN_DIRECT_MODE", "0")
549
 
550
  # Direct mode configuration options
 
574
 
575
  DEFAULT_CONCURRENCY_LIMIT = int(get_or_create_env_var("DEFAULT_CONCURRENCY_LIMIT", "3"))
576
 
577
+ FILE_INPUT_HEIGHT = int(get_or_create_env_var("FILE_INPUT_HEIGHT", "200"))
578
 
579
  ### ALLOW LIST
580
 
tools/data_anonymise.py CHANGED
@@ -515,8 +515,6 @@ def anonymise_files_with_open_text(
515
  if isinstance(out_message, str):
516
  out_message = [out_message]
517
 
518
- # print("log_files_output_paths:",log_files_output_paths)
519
-
520
  if isinstance(log_files_output_paths, str):
521
  log_files_output_paths = list()
522
 
 
515
  if isinstance(out_message, str):
516
  out_message = [out_message]
517
 
 
 
518
  if isinstance(log_files_output_paths, str):
519
  log_files_output_paths = list()
520
 
tools/file_conversion.py CHANGED
@@ -87,9 +87,6 @@ def is_pdf(filename):
87
  return filename.lower().endswith(".pdf")
88
 
89
 
90
- ## Convert pdf to image if necessary
91
-
92
-
93
  def check_image_size_and_reduce(out_path: str, image: Image):
94
  """
95
  Check if a given image size is above around 4.5mb, and reduce size if necessary. 5mb is the maximum possible to submit to AWS Textract.
@@ -297,7 +294,6 @@ def process_file_for_image_creation(
297
 
298
  # Check if the file is a PDF
299
  elif file_extension == ".pdf":
300
- # print(f"{file_path} is a PDF file. Converting to image set")
301
 
302
  # Run your function for processing PDF files here
303
  img_path, image_sizes_width, image_sizes_height, all_img_details = (
@@ -653,8 +649,8 @@ def word_level_ocr_output_to_dataframe(ocr_results: dict) -> pd.DataFrame:
653
  def prepare_image_or_pdf(
654
  file_paths: List[str],
655
  text_extract_method: str,
656
- all_line_level_ocr_results_df: pd.DataFrame,
657
- all_page_line_level_ocr_results_with_words_df: pd.DataFrame,
658
  latest_file_completed: int = 0,
659
  out_message: List[str] = list(),
660
  first_loop_state: bool = False,
 
87
  return filename.lower().endswith(".pdf")
88
 
89
 
 
 
 
90
  def check_image_size_and_reduce(out_path: str, image: Image):
91
  """
92
  Check if a given image size is above around 4.5mb, and reduce size if necessary. 5mb is the maximum possible to submit to AWS Textract.
 
294
 
295
  # Check if the file is a PDF
296
  elif file_extension == ".pdf":
 
297
 
298
  # Run your function for processing PDF files here
299
  img_path, image_sizes_width, image_sizes_height, all_img_details = (
 
649
  def prepare_image_or_pdf(
650
  file_paths: List[str],
651
  text_extract_method: str,
652
+ all_line_level_ocr_results_df: pd.DataFrame = None,
653
+ all_page_line_level_ocr_results_with_words_df: pd.DataFrame = None,
654
  latest_file_completed: int = 0,
655
  out_message: List[str] = list(),
656
  first_loop_state: bool = False,