Commit
·
bbf844d
1
Parent(s):
6a6aac2
Added examples to tops of various tabs to demonstrate basic functions (optional). Minor changes to example csv ocr output
Browse files
app.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import os
|
2 |
-
import time
|
3 |
|
4 |
import gradio as gr
|
5 |
import pandas as pd
|
@@ -95,6 +94,7 @@ from tools.config import (
|
|
95 |
SAVE_LOGS_TO_DYNAMODB,
|
96 |
SESSION_OUTPUT_FOLDER,
|
97 |
SHOW_COSTS,
|
|
|
98 |
SHOW_LANGUAGE_SELECTION,
|
99 |
SHOW_WHOLE_DOCUMENT_TEXTRACT_CALL_OPTIONS,
|
100 |
TABULAR_PII_DETECTION_MODELS,
|
@@ -206,8 +206,11 @@ pd.set_option("future.no_silent_downcasting", True)
|
|
206 |
ensure_folder_exists(CONFIG_FOLDER)
|
207 |
ensure_folder_exists(OUTPUT_FOLDER)
|
208 |
ensure_folder_exists(INPUT_FOLDER)
|
209 |
-
|
210 |
-
ensure_folder_exists(
|
|
|
|
|
|
|
211 |
ensure_folder_exists(FEEDBACK_LOGS_FOLDER)
|
212 |
ensure_folder_exists(ACCESS_LOGS_FOLDER)
|
213 |
ensure_folder_exists(USAGE_LOGS_FOLDER)
|
@@ -291,79 +294,116 @@ if DEFAULT_HANDWRITE_SIGNATURE_CHECKBOX:
|
|
291 |
CHOSEN_COMPREHEND_ENTITIES.extend(custom_entities)
|
292 |
FULL_COMPREHEND_ENTITY_LIST.extend(custom_entities)
|
293 |
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
367 |
|
368 |
# Create the gradio interface
|
369 |
app = gr.Blocks(
|
@@ -967,35 +1007,105 @@ with app:
|
|
967 |
# REDACTION PDF/IMAGES TABLE
|
968 |
###
|
969 |
with gr.Tab("Redact PDFs/images"):
|
970 |
-
with gr.Accordion("Redact document", open=True):
|
971 |
-
in_doc_files = gr.File(
|
972 |
-
label="Choose a PDF document or image file (PDF, JPG, PNG)",
|
973 |
-
file_count="multiple",
|
974 |
-
file_types=[".pdf", ".jpg", ".png", ".json", ".zip"],
|
975 |
-
height=FILE_INPUT_HEIGHT,
|
976 |
-
)
|
977 |
|
978 |
-
|
979 |
-
|
980 |
-
|
981 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
982 |
)
|
983 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
984 |
with gr.Accordion(
|
985 |
"Enable AWS Textract signature detection (default is off)", open=False
|
986 |
):
|
987 |
-
handwrite_signature_checkbox = gr.CheckboxGroup(
|
988 |
-
|
989 |
-
|
990 |
-
|
991 |
-
)
|
|
|
992 |
|
993 |
with gr.Row(equal_height=True):
|
994 |
-
pii_identification_method_drop = gr.Radio(
|
995 |
-
|
996 |
-
|
997 |
-
|
998 |
-
)
|
|
|
999 |
|
1000 |
if SHOW_COSTS == "True":
|
1001 |
with gr.Accordion(
|
@@ -1536,30 +1646,72 @@ with app:
|
|
1536 |
"Search for duplicate pages/subdocuments in your ocr_output files. By default, this function will search for duplicate text across multiple pages, and then join consecutive matching pages together into matched 'subdocuments'. The results can be reviewed below, false positives removed, and then the verified results applied to a document you have loaded in on the 'Review redactions' tab."
|
1537 |
)
|
1538 |
|
1539 |
-
|
1540 |
-
|
1541 |
-
|
1542 |
-
|
1543 |
-
|
1544 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1545 |
)
|
1546 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1547 |
with gr.Accordion("Duplicate matching parameters", open=False):
|
1548 |
with gr.Row():
|
1549 |
-
duplicate_threshold_input = gr.Number(
|
1550 |
-
|
1551 |
-
|
1552 |
-
|
1553 |
-
)
|
1554 |
-
|
1555 |
-
|
1556 |
-
|
1557 |
-
|
1558 |
-
|
1559 |
-
|
1560 |
-
|
1561 |
-
|
1562 |
-
|
|
|
|
|
|
|
|
|
|
|
1563 |
|
1564 |
gr.Markdown("#### Matching Strategy")
|
1565 |
greedy_match_input = gr.Checkbox(
|
@@ -1653,14 +1805,62 @@ with app:
|
|
1653 |
"""Choose Word or a tabular data file (xlsx or csv) to redact. Note that when redacting complex Word files with e.g. images, some content/formatting will be removed, and it may not attempt to redact headers. You may prefer to convert the doc file to PDF in Word, and then run it through the first tab of this app (Print to PDF in print settings). Alternatively, an xlsx file output is provided when redacting docx files directly to allow for copying and pasting outputs back into the original document if preferred."""
|
1654 |
)
|
1655 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1656 |
with gr.Accordion("Redact Word or Excel/csv files", open=True):
|
1657 |
with gr.Accordion("Upload docx, xlsx, or csv files", open=True):
|
1658 |
-
in_data_files = gr.File(
|
1659 |
-
|
1660 |
-
|
1661 |
-
|
1662 |
-
|
1663 |
-
)
|
|
|
1664 |
with gr.Accordion("Redact open text", open=False):
|
1665 |
in_text = gr.Textbox(
|
1666 |
label="Enter open text",
|
@@ -1676,34 +1876,39 @@ with app:
|
|
1676 |
allow_custom_value=True,
|
1677 |
)
|
1678 |
|
1679 |
-
in_colnames = gr.Dropdown(
|
1680 |
-
|
1681 |
-
|
1682 |
-
|
1683 |
-
)
|
1684 |
-
|
1685 |
-
|
1686 |
-
|
1687 |
-
|
1688 |
-
|
1689 |
-
|
|
|
|
|
|
|
1690 |
|
1691 |
with gr.Accordion(
|
1692 |
"Anonymisation output format - by default will replace PII with a blank space",
|
1693 |
open=False,
|
1694 |
):
|
1695 |
with gr.Row():
|
1696 |
-
anon_strategy = gr.Radio(
|
1697 |
-
|
1698 |
-
|
1699 |
-
|
1700 |
-
|
1701 |
-
|
1702 |
-
|
1703 |
-
|
1704 |
-
|
1705 |
-
|
1706 |
-
) # , "encrypt", "fake_first_name" are also available, but are not currently included as not that useful in current form
|
|
|
|
|
1707 |
do_initial_clean = gr.Checkbox(
|
1708 |
label="Do initial clean of text (remove URLs, HTML tags, and non-ASCII characters)",
|
1709 |
value=DO_INITIAL_TABULAR_DATA_CLEAN,
|
@@ -1713,15 +1918,15 @@ with app:
|
|
1713 |
"Redact text/data files", variant="primary"
|
1714 |
)
|
1715 |
|
1716 |
-
|
1717 |
-
|
1718 |
-
|
1719 |
-
|
1720 |
-
|
1721 |
-
|
1722 |
-
|
1723 |
-
|
1724 |
-
|
1725 |
|
1726 |
###
|
1727 |
# TABULAR DUPLICATE DETECTION
|
@@ -1732,12 +1937,13 @@ with app:
|
|
1732 |
)
|
1733 |
|
1734 |
with gr.Accordion("Step 1: Upload files and configure analysis", open=True):
|
1735 |
-
in_tabular_duplicate_files = gr.File(
|
1736 |
-
|
1737 |
-
|
1738 |
-
|
1739 |
-
|
1740 |
-
)
|
|
|
1741 |
|
1742 |
with gr.Row(equal_height=True):
|
1743 |
tabular_duplicate_threshold = gr.Number(
|
@@ -1768,12 +1974,13 @@ with app:
|
|
1768 |
allow_custom_value=True,
|
1769 |
)
|
1770 |
|
1771 |
-
tabular_text_columns = gr.Dropdown(
|
1772 |
-
|
1773 |
-
|
1774 |
-
|
1775 |
-
|
1776 |
-
)
|
|
|
1777 |
|
1778 |
find_tabular_duplicates_btn = gr.Button(
|
1779 |
value="Find duplicate cells/rows", variant="primary"
|
@@ -1937,18 +2144,20 @@ with app:
|
|
1937 |
)
|
1938 |
|
1939 |
with gr.Accordion("Select entity types to redact", open=True):
|
1940 |
-
in_redact_entities = gr.Dropdown(
|
1941 |
-
|
1942 |
-
|
1943 |
-
|
1944 |
-
|
1945 |
-
)
|
1946 |
-
in_redact_comprehend_entities = gr.Dropdown(
|
1947 |
-
|
1948 |
-
|
1949 |
-
|
1950 |
-
|
1951 |
-
)
|
|
|
|
|
1952 |
|
1953 |
with gr.Row():
|
1954 |
max_fuzzy_spelling_mistakes_num = gr.Number(
|
@@ -5013,6 +5222,7 @@ with app:
|
|
5013 |
comprehend_query_number,
|
5014 |
],
|
5015 |
api_name="redact_data",
|
|
|
5016 |
)
|
5017 |
|
5018 |
# If the output file count text box changes, keep going with redacting each data file until done
|
@@ -5053,6 +5263,7 @@ with app:
|
|
5053 |
actual_time_taken_number,
|
5054 |
comprehend_query_number,
|
5055 |
],
|
|
|
5056 |
).success(
|
5057 |
fn=reveal_feedback_buttons,
|
5058 |
outputs=[
|
@@ -5085,6 +5296,7 @@ with app:
|
|
5085 |
actual_time_taken_number,
|
5086 |
task_textbox,
|
5087 |
],
|
|
|
5088 |
)
|
5089 |
|
5090 |
# full_duplicated_data_df,
|
|
|
1 |
import os
|
|
|
2 |
|
3 |
import gradio as gr
|
4 |
import pandas as pd
|
|
|
94 |
SAVE_LOGS_TO_DYNAMODB,
|
95 |
SESSION_OUTPUT_FOLDER,
|
96 |
SHOW_COSTS,
|
97 |
+
SHOW_EXAMPLES,
|
98 |
SHOW_LANGUAGE_SELECTION,
|
99 |
SHOW_WHOLE_DOCUMENT_TEXTRACT_CALL_OPTIONS,
|
100 |
TABULAR_PII_DETECTION_MODELS,
|
|
|
206 |
ensure_folder_exists(CONFIG_FOLDER)
|
207 |
ensure_folder_exists(OUTPUT_FOLDER)
|
208 |
ensure_folder_exists(INPUT_FOLDER)
|
209 |
+
if GRADIO_TEMP_DIR:
|
210 |
+
ensure_folder_exists(GRADIO_TEMP_DIR)
|
211 |
+
if MPLCONFIGDIR:
|
212 |
+
ensure_folder_exists(MPLCONFIGDIR)
|
213 |
+
|
214 |
ensure_folder_exists(FEEDBACK_LOGS_FOLDER)
|
215 |
ensure_folder_exists(ACCESS_LOGS_FOLDER)
|
216 |
ensure_folder_exists(USAGE_LOGS_FOLDER)
|
|
|
294 |
CHOSEN_COMPREHEND_ENTITIES.extend(custom_entities)
|
295 |
FULL_COMPREHEND_ENTITY_LIST.extend(custom_entities)
|
296 |
|
297 |
+
# Load some components outside of blocks context that are used for examples
|
298 |
+
## Redaction examples
|
299 |
+
in_doc_files = gr.File(
|
300 |
+
label="Choose a PDF document or image file (PDF, JPG, PNG)",
|
301 |
+
file_count="multiple",
|
302 |
+
file_types=[".pdf", ".jpg", ".png", ".json", ".zip"],
|
303 |
+
height=FILE_INPUT_HEIGHT,
|
304 |
+
)
|
305 |
+
|
306 |
+
text_extract_method_radio = gr.Radio(
|
307 |
+
label="""Choose text extraction method. Local options are lower quality but cost nothing - they may be worth a try if you are willing to spend some time reviewing outputs. AWS Textract has a cost per page - £2.66 ($3.50) per 1,000 pages with signature detection (default), £1.14 ($1.50) without. Change the settings in the tab below (AWS Textract signature detection) to change this.""",
|
308 |
+
value=DEFAULT_TEXT_EXTRACTION_MODEL,
|
309 |
+
choices=TEXT_EXTRACTION_MODELS,
|
310 |
+
)
|
311 |
+
|
312 |
+
pii_identification_method_drop = gr.Radio(
|
313 |
+
label="""Choose personal information detection method. The local model is lower quality but costs nothing - it may be worth a try if you are willing to spend some time reviewing outputs, or if you are only interested in searching for custom search terms (see Redaction settings - custom deny list). AWS Comprehend has a cost of around £0.0075 ($0.01) per 10,000 characters.""",
|
314 |
+
value=DEFAULT_PII_DETECTION_MODEL,
|
315 |
+
choices=PII_DETECTION_MODELS,
|
316 |
+
)
|
317 |
+
|
318 |
+
handwrite_signature_checkbox = gr.CheckboxGroup(
|
319 |
+
label="AWS Textract extraction settings",
|
320 |
+
choices=HANDWRITE_SIGNATURE_TEXTBOX_FULL_OPTIONS,
|
321 |
+
value=DEFAULT_HANDWRITE_SIGNATURE_CHECKBOX,
|
322 |
+
)
|
323 |
+
|
324 |
+
in_redact_entities = gr.Dropdown(
|
325 |
+
value=CHOSEN_REDACT_ENTITIES,
|
326 |
+
choices=FULL_ENTITY_LIST,
|
327 |
+
multiselect=True,
|
328 |
+
label="Local PII identification model (click empty space in box for full list)",
|
329 |
+
)
|
330 |
+
in_redact_comprehend_entities = gr.Dropdown(
|
331 |
+
value=CHOSEN_COMPREHEND_ENTITIES,
|
332 |
+
choices=FULL_COMPREHEND_ENTITY_LIST,
|
333 |
+
multiselect=True,
|
334 |
+
label="AWS Comprehend PII identification model (click empty space in box for full list)",
|
335 |
+
)
|
336 |
+
|
337 |
+
## Deduplication examples
|
338 |
+
in_duplicate_pages = gr.File(
|
339 |
+
label="Upload one or multiple 'ocr_output.csv' files to find duplicate pages and subdocuments",
|
340 |
+
file_count="multiple",
|
341 |
+
height=FILE_INPUT_HEIGHT,
|
342 |
+
file_types=[".csv"],
|
343 |
+
)
|
344 |
+
|
345 |
+
duplicate_threshold_input = gr.Number(
|
346 |
+
value=DEFAULT_DUPLICATE_DETECTION_THRESHOLD,
|
347 |
+
label="Similarity threshold",
|
348 |
+
info="Score (0-1) to consider pages a match.",
|
349 |
+
)
|
350 |
+
|
351 |
+
min_word_count_input = gr.Number(
|
352 |
+
value=DEFAULT_MIN_WORD_COUNT,
|
353 |
+
label="Minimum word count",
|
354 |
+
info="Pages with fewer words than this value are ignored.",
|
355 |
+
)
|
356 |
+
|
357 |
+
combine_page_text_for_duplicates_bool = gr.Checkbox(
|
358 |
+
value=True,
|
359 |
+
label="Analyse duplicate text by page (off for by line)",
|
360 |
+
)
|
361 |
+
|
362 |
+
## Tabular examples
|
363 |
+
in_data_files = gr.File(
|
364 |
+
label="Choose Excel or csv files",
|
365 |
+
file_count="multiple",
|
366 |
+
file_types=[".xlsx", ".xls", ".csv", ".parquet", ".docx"],
|
367 |
+
height=FILE_INPUT_HEIGHT,
|
368 |
+
)
|
369 |
+
|
370 |
+
in_colnames = gr.Dropdown(
|
371 |
+
choices=["Choose columns to anonymise"],
|
372 |
+
multiselect=True,
|
373 |
+
allow_custom_value=True,
|
374 |
+
label="Select columns that you want to anonymise (showing columns present across all files).",
|
375 |
+
)
|
376 |
+
|
377 |
+
pii_identification_method_drop_tabular = gr.Radio(
|
378 |
+
label="Choose PII detection method. AWS Comprehend has a cost of approximately $0.01 per 10,000 characters.",
|
379 |
+
value=DEFAULT_PII_DETECTION_MODEL,
|
380 |
+
choices=TABULAR_PII_DETECTION_MODELS,
|
381 |
+
)
|
382 |
+
|
383 |
+
anon_strategy = gr.Radio(
|
384 |
+
choices=[
|
385 |
+
"replace with 'REDACTED'",
|
386 |
+
"replace with <ENTITY_NAME>",
|
387 |
+
"redact completely",
|
388 |
+
"hash",
|
389 |
+
"mask",
|
390 |
+
],
|
391 |
+
label="Select an anonymisation method.",
|
392 |
+
value=DEFAULT_TABULAR_ANONYMISATION_STRATEGY,
|
393 |
+
) # , "encrypt", "fake_first_name" are also available, but are not currently included as not that useful in current form
|
394 |
+
|
395 |
+
in_tabular_duplicate_files = gr.File(
|
396 |
+
label="Upload CSV, Excel, or Parquet files to find duplicate cells/rows. Note that the app will remove duplicates from later cells/files that are found in earlier cells/files and not vice versa.",
|
397 |
+
file_count="multiple",
|
398 |
+
file_types=[".csv", ".xlsx", ".xls", ".parquet"],
|
399 |
+
height=FILE_INPUT_HEIGHT,
|
400 |
+
)
|
401 |
+
|
402 |
+
tabular_text_columns = gr.Dropdown(
|
403 |
+
label="Choose columns to deduplicate",
|
404 |
+
multiselect=True,
|
405 |
+
allow_custom_value=True,
|
406 |
+
)
|
407 |
|
408 |
# Create the gradio interface
|
409 |
app = gr.Blocks(
|
|
|
1007 |
# REDACTION PDF/IMAGES TABLE
|
1008 |
###
|
1009 |
with gr.Tab("Redact PDFs/images"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1010 |
|
1011 |
+
# Examples for PDF/image redaction
|
1012 |
+
if SHOW_EXAMPLES == "True":
|
1013 |
+
gr.Markdown(
|
1014 |
+
"### Try an example - Click on an example below and then the 'Extract text and redact document' button:"
|
1015 |
+
)
|
1016 |
+
redaction_examples = gr.Examples(
|
1017 |
+
examples=[
|
1018 |
+
[
|
1019 |
+
[
|
1020 |
+
"example_data/example_of_emails_sent_to_a_professor_before_applying.pdf"
|
1021 |
+
],
|
1022 |
+
"Local model - selectable text",
|
1023 |
+
"Local",
|
1024 |
+
[],
|
1025 |
+
CHOSEN_REDACT_ENTITIES,
|
1026 |
+
CHOSEN_COMPREHEND_ENTITIES,
|
1027 |
+
[
|
1028 |
+
"example_data/example_of_emails_sent_to_a_professor_before_applying.pdf"
|
1029 |
+
],
|
1030 |
+
],
|
1031 |
+
[
|
1032 |
+
["example_data/example_complaint_letter.jpg"],
|
1033 |
+
"Local OCR model - PDFs without selectable text",
|
1034 |
+
"Local",
|
1035 |
+
[],
|
1036 |
+
CHOSEN_REDACT_ENTITIES,
|
1037 |
+
CHOSEN_COMPREHEND_ENTITIES,
|
1038 |
+
["example_data/example_complaint_letter.jpg"],
|
1039 |
+
],
|
1040 |
+
[
|
1041 |
+
["example_data/graduate-job-example-cover-letter.pdf"],
|
1042 |
+
"Local OCR model - PDFs without selectable text",
|
1043 |
+
"Local",
|
1044 |
+
[],
|
1045 |
+
["TITLES", "PERSON", "DATE_TIME"],
|
1046 |
+
CHOSEN_COMPREHEND_ENTITIES,
|
1047 |
+
["example_data/graduate-job-example-cover-letter.pdf"],
|
1048 |
+
],
|
1049 |
+
[
|
1050 |
+
["example_data/Partnership-Agreement-Toolkit_0_0.pdf"],
|
1051 |
+
"AWS Textract service - all PDF types",
|
1052 |
+
"AWS Comprehend",
|
1053 |
+
["Extract handwriting", "Extract signatures"],
|
1054 |
+
CHOSEN_REDACT_ENTITIES,
|
1055 |
+
CHOSEN_COMPREHEND_ENTITIES,
|
1056 |
+
["example_data/Partnership-Agreement-Toolkit_0_0.pdf"],
|
1057 |
+
],
|
1058 |
+
],
|
1059 |
+
inputs=[
|
1060 |
+
in_doc_files,
|
1061 |
+
text_extract_method_radio,
|
1062 |
+
pii_identification_method_drop,
|
1063 |
+
handwrite_signature_checkbox,
|
1064 |
+
in_redact_entities,
|
1065 |
+
in_redact_comprehend_entities,
|
1066 |
+
prepared_pdf_state,
|
1067 |
+
],
|
1068 |
+
example_labels=[
|
1069 |
+
"PDF with selectable text redaction",
|
1070 |
+
"Image redaction with local OCR",
|
1071 |
+
"PDF redaction with custom entities (TITLES, PERSON, DATE_TIME)",
|
1072 |
+
"PDF redaction with AWS services and signature detection",
|
1073 |
+
],
|
1074 |
)
|
1075 |
|
1076 |
+
with gr.Accordion("Redact document", open=True):
|
1077 |
+
# in_doc_files = gr.File(
|
1078 |
+
# label="Choose a PDF document or image file (PDF, JPG, PNG)",
|
1079 |
+
# file_count="multiple",
|
1080 |
+
# file_types=[".pdf", ".jpg", ".png", ".json", ".zip"],
|
1081 |
+
# height=FILE_INPUT_HEIGHT,
|
1082 |
+
# )
|
1083 |
+
in_doc_files.render()
|
1084 |
+
|
1085 |
+
# text_extract_method_radio = gr.Radio(
|
1086 |
+
# label="""Choose text extraction method. Local options are lower quality but cost nothing - they may be worth a try if you are willing to spend some time reviewing outputs. AWS Textract has a cost per page - £2.66 ($3.50) per 1,000 pages with signature detection (default), £1.14 ($1.50) without. Change the settings in the tab below (AWS Textract signature detection) to change this.""",
|
1087 |
+
# value=DEFAULT_TEXT_EXTRACTION_MODEL,
|
1088 |
+
# choices=TEXT_EXTRACTION_MODELS,
|
1089 |
+
# )
|
1090 |
+
text_extract_method_radio.render()
|
1091 |
+
|
1092 |
with gr.Accordion(
|
1093 |
"Enable AWS Textract signature detection (default is off)", open=False
|
1094 |
):
|
1095 |
+
# handwrite_signature_checkbox = gr.CheckboxGroup(
|
1096 |
+
# label="AWS Textract extraction settings",
|
1097 |
+
# choices=HANDWRITE_SIGNATURE_TEXTBOX_FULL_OPTIONS,
|
1098 |
+
# value=DEFAULT_HANDWRITE_SIGNATURE_CHECKBOX,
|
1099 |
+
# )
|
1100 |
+
handwrite_signature_checkbox.render()
|
1101 |
|
1102 |
with gr.Row(equal_height=True):
|
1103 |
+
# pii_identification_method_drop = gr.Radio(
|
1104 |
+
# label="""Choose personal information detection method. The local model is lower quality but costs nothing - it may be worth a try if you are willing to spend some time reviewing outputs, or if you are only interested in searching for custom search terms (see Redaction settings - custom deny list). AWS Comprehend has a cost of around £0.0075 ($0.01) per 10,000 characters.""",
|
1105 |
+
# value=DEFAULT_PII_DETECTION_MODEL,
|
1106 |
+
# choices=PII_DETECTION_MODELS,
|
1107 |
+
# )
|
1108 |
+
pii_identification_method_drop.render()
|
1109 |
|
1110 |
if SHOW_COSTS == "True":
|
1111 |
with gr.Accordion(
|
|
|
1646 |
"Search for duplicate pages/subdocuments in your ocr_output files. By default, this function will search for duplicate text across multiple pages, and then join consecutive matching pages together into matched 'subdocuments'. The results can be reviewed below, false positives removed, and then the verified results applied to a document you have loaded in on the 'Review redactions' tab."
|
1647 |
)
|
1648 |
|
1649 |
+
# Examples for duplicate page detection
|
1650 |
+
if SHOW_EXAMPLES == "True":
|
1651 |
+
gr.Markdown(
|
1652 |
+
"### Try an example - Click on an example below and then the 'Identify duplicate pages/subdocuments' button:"
|
1653 |
+
)
|
1654 |
+
duplicate_examples = gr.Examples(
|
1655 |
+
examples=[
|
1656 |
+
[
|
1657 |
+
[
|
1658 |
+
"example_data/example_outputs/doubled_output_joined.pdf_ocr_output.csv"
|
1659 |
+
],
|
1660 |
+
0.95,
|
1661 |
+
10,
|
1662 |
+
True,
|
1663 |
+
],
|
1664 |
+
[
|
1665 |
+
[
|
1666 |
+
"example_data/example_outputs/doubled_output_joined.pdf_ocr_output.csv"
|
1667 |
+
],
|
1668 |
+
0.95,
|
1669 |
+
3,
|
1670 |
+
False,
|
1671 |
+
],
|
1672 |
+
],
|
1673 |
+
inputs=[
|
1674 |
+
in_duplicate_pages,
|
1675 |
+
duplicate_threshold_input,
|
1676 |
+
min_word_count_input,
|
1677 |
+
combine_page_text_for_duplicates_bool,
|
1678 |
+
],
|
1679 |
+
example_labels=[
|
1680 |
+
"Find duplicate pages of text in document OCR outputs",
|
1681 |
+
"Find duplicate text lines in document OCR outputs",
|
1682 |
+
],
|
1683 |
)
|
1684 |
|
1685 |
+
with gr.Accordion("Step 1: Configure and run analysis", open=True):
|
1686 |
+
# in_duplicate_pages = gr.File(
|
1687 |
+
# label="Upload one or multiple 'ocr_output.csv' files to find duplicate pages and subdocuments",
|
1688 |
+
# file_count="multiple",
|
1689 |
+
# height=FILE_INPUT_HEIGHT,
|
1690 |
+
# file_types=[".csv"],
|
1691 |
+
# )
|
1692 |
+
in_duplicate_pages.render()
|
1693 |
+
|
1694 |
with gr.Accordion("Duplicate matching parameters", open=False):
|
1695 |
with gr.Row():
|
1696 |
+
# duplicate_threshold_input = gr.Number(
|
1697 |
+
# value=DEFAULT_DUPLICATE_DETECTION_THRESHOLD,
|
1698 |
+
# label="Similarity threshold",
|
1699 |
+
# info="Score (0-1) to consider pages a match.",
|
1700 |
+
# )
|
1701 |
+
duplicate_threshold_input.render()
|
1702 |
+
|
1703 |
+
# min_word_count_input = gr.Number(
|
1704 |
+
# value=DEFAULT_MIN_WORD_COUNT,
|
1705 |
+
# label="Minimum word count",
|
1706 |
+
# info="Pages with fewer words than this value are ignored.",
|
1707 |
+
# )
|
1708 |
+
min_word_count_input.render()
|
1709 |
+
|
1710 |
+
# combine_page_text_for_duplicates_bool = gr.Checkbox(
|
1711 |
+
# value=True,
|
1712 |
+
# label="Analyse duplicate text by page (off for by line)",
|
1713 |
+
# )
|
1714 |
+
combine_page_text_for_duplicates_bool.render()
|
1715 |
|
1716 |
gr.Markdown("#### Matching Strategy")
|
1717 |
greedy_match_input = gr.Checkbox(
|
|
|
1805 |
"""Choose Word or a tabular data file (xlsx or csv) to redact. Note that when redacting complex Word files with e.g. images, some content/formatting will be removed, and it may not attempt to redact headers. You may prefer to convert the doc file to PDF in Word, and then run it through the first tab of this app (Print to PDF in print settings). Alternatively, an xlsx file output is provided when redacting docx files directly to allow for copying and pasting outputs back into the original document if preferred."""
|
1806 |
)
|
1807 |
|
1808 |
+
# Examples for Word/Excel/csv redaction and tabular duplicate detection
|
1809 |
+
if SHOW_EXAMPLES == "True":
|
1810 |
+
gr.Markdown(
|
1811 |
+
"### Try an example - Click on an example below and then the 'Redact text/data files' button for redaction, or the 'Find duplicate cells/rows' button for duplicate detection:"
|
1812 |
+
)
|
1813 |
+
tabular_examples = gr.Examples(
|
1814 |
+
examples=[
|
1815 |
+
[
|
1816 |
+
["example_data/combined_case_notes.csv"],
|
1817 |
+
["Case Note", "Client"],
|
1818 |
+
"Local",
|
1819 |
+
"replace with 'REDACTED'",
|
1820 |
+
["example_data/combined_case_notes.csv"],
|
1821 |
+
["Case Note"],
|
1822 |
+
],
|
1823 |
+
[
|
1824 |
+
["example_data/Bold minimalist professional cover letter.docx"],
|
1825 |
+
[],
|
1826 |
+
"Local",
|
1827 |
+
"replace with 'REDACTED'",
|
1828 |
+
[],
|
1829 |
+
[],
|
1830 |
+
],
|
1831 |
+
[
|
1832 |
+
["example_data/Lambeth_2030-Our_Future_Our_Lambeth.pdf.csv"],
|
1833 |
+
["text"],
|
1834 |
+
"Local",
|
1835 |
+
"replace with 'REDACTED'",
|
1836 |
+
["example_data/Lambeth_2030-Our_Future_Our_Lambeth.pdf.csv"],
|
1837 |
+
["text"],
|
1838 |
+
],
|
1839 |
+
],
|
1840 |
+
inputs=[
|
1841 |
+
in_data_files,
|
1842 |
+
in_colnames,
|
1843 |
+
pii_identification_method_drop_tabular,
|
1844 |
+
anon_strategy,
|
1845 |
+
in_tabular_duplicate_files,
|
1846 |
+
tabular_text_columns,
|
1847 |
+
],
|
1848 |
+
example_labels=[
|
1849 |
+
"CSV file redaction with specific columns - remove text",
|
1850 |
+
"Word document redaction - replace with REDACTED",
|
1851 |
+
"Tabular duplicate detection in CSV files",
|
1852 |
+
],
|
1853 |
+
)
|
1854 |
+
|
1855 |
with gr.Accordion("Redact Word or Excel/csv files", open=True):
|
1856 |
with gr.Accordion("Upload docx, xlsx, or csv files", open=True):
|
1857 |
+
# in_data_files = gr.File(
|
1858 |
+
# label="Choose Excel or csv files",
|
1859 |
+
# file_count="multiple",
|
1860 |
+
# file_types=[".xlsx", ".xls", ".csv", ".parquet", ".docx"],
|
1861 |
+
# height=FILE_INPUT_HEIGHT,
|
1862 |
+
# )
|
1863 |
+
in_data_files.render()
|
1864 |
with gr.Accordion("Redact open text", open=False):
|
1865 |
in_text = gr.Textbox(
|
1866 |
label="Enter open text",
|
|
|
1876 |
allow_custom_value=True,
|
1877 |
)
|
1878 |
|
1879 |
+
# in_colnames = gr.Dropdown(
|
1880 |
+
# choices=["Choose columns to anonymise"],
|
1881 |
+
# multiselect=True,
|
1882 |
+
# allow_custom_value=True,
|
1883 |
+
# label="Select columns that you want to anonymise (showing columns present across all files).",
|
1884 |
+
# )
|
1885 |
+
in_colnames.render()
|
1886 |
+
|
1887 |
+
# pii_identification_method_drop_tabular = gr.Radio(
|
1888 |
+
# label="Choose PII detection method. AWS Comprehend has a cost of approximately $0.01 per 10,000 characters.",
|
1889 |
+
# value=DEFAULT_PII_DETECTION_MODEL,
|
1890 |
+
# choices=TABULAR_PII_DETECTION_MODELS,
|
1891 |
+
# )
|
1892 |
+
pii_identification_method_drop_tabular.render()
|
1893 |
|
1894 |
with gr.Accordion(
|
1895 |
"Anonymisation output format - by default will replace PII with a blank space",
|
1896 |
open=False,
|
1897 |
):
|
1898 |
with gr.Row():
|
1899 |
+
# anon_strategy = gr.Radio(
|
1900 |
+
# choices=[
|
1901 |
+
# "replace with 'REDACTED'",
|
1902 |
+
# "replace with <ENTITY_NAME>",
|
1903 |
+
# "redact completely",
|
1904 |
+
# "hash",
|
1905 |
+
# "mask",
|
1906 |
+
# ],
|
1907 |
+
# label="Select an anonymisation method.",
|
1908 |
+
# value=DEFAULT_TABULAR_ANONYMISATION_STRATEGY,
|
1909 |
+
# ) # , "encrypt", "fake_first_name" are also available, but are not currently included as not that useful in current form
|
1910 |
+
anon_strategy.render()
|
1911 |
+
|
1912 |
do_initial_clean = gr.Checkbox(
|
1913 |
label="Do initial clean of text (remove URLs, HTML tags, and non-ASCII characters)",
|
1914 |
value=DO_INITIAL_TABULAR_DATA_CLEAN,
|
|
|
1918 |
"Redact text/data files", variant="primary"
|
1919 |
)
|
1920 |
|
1921 |
+
with gr.Row():
|
1922 |
+
text_output_summary = gr.Textbox(label="Output result", lines=4)
|
1923 |
+
text_output_file = gr.File(label="Output files")
|
1924 |
+
text_tabular_files_done = gr.Number(
|
1925 |
+
value=0,
|
1926 |
+
label="Number of tabular files redacted",
|
1927 |
+
interactive=False,
|
1928 |
+
visible=False,
|
1929 |
+
)
|
1930 |
|
1931 |
###
|
1932 |
# TABULAR DUPLICATE DETECTION
|
|
|
1937 |
)
|
1938 |
|
1939 |
with gr.Accordion("Step 1: Upload files and configure analysis", open=True):
|
1940 |
+
# in_tabular_duplicate_files = gr.File(
|
1941 |
+
# label="Upload CSV, Excel, or Parquet files to find duplicate cells/rows. Note that the app will remove duplicates from later cells/files that are found in earlier cells/files and not vice versa.",
|
1942 |
+
# file_count="multiple",
|
1943 |
+
# file_types=[".csv", ".xlsx", ".xls", ".parquet"],
|
1944 |
+
# height=FILE_INPUT_HEIGHT,
|
1945 |
+
# )
|
1946 |
+
in_tabular_duplicate_files.render()
|
1947 |
|
1948 |
with gr.Row(equal_height=True):
|
1949 |
tabular_duplicate_threshold = gr.Number(
|
|
|
1974 |
allow_custom_value=True,
|
1975 |
)
|
1976 |
|
1977 |
+
# tabular_text_columns = gr.Dropdown(
|
1978 |
+
# choices=DEFAULT_TEXT_COLUMNS,
|
1979 |
+
# multiselect=True,
|
1980 |
+
# label="Select specific columns to analyse (leave empty to analyse all text columns simultaneously - i.e. all text is joined together)",
|
1981 |
+
# info="If no columns selected, all text columns will combined together and analysed",
|
1982 |
+
# )
|
1983 |
+
tabular_text_columns.render()
|
1984 |
|
1985 |
find_tabular_duplicates_btn = gr.Button(
|
1986 |
value="Find duplicate cells/rows", variant="primary"
|
|
|
2144 |
)
|
2145 |
|
2146 |
with gr.Accordion("Select entity types to redact", open=True):
|
2147 |
+
# in_redact_entities = gr.Dropdown(
|
2148 |
+
# value=CHOSEN_REDACT_ENTITIES,
|
2149 |
+
# choices=FULL_ENTITY_LIST,
|
2150 |
+
# multiselect=True,
|
2151 |
+
# label="Local PII identification model (click empty space in box for full list)",
|
2152 |
+
# )
|
2153 |
+
# in_redact_comprehend_entities = gr.Dropdown(
|
2154 |
+
# value=CHOSEN_COMPREHEND_ENTITIES,
|
2155 |
+
# choices=FULL_COMPREHEND_ENTITY_LIST,
|
2156 |
+
# multiselect=True,
|
2157 |
+
# label="AWS Comprehend PII identification model (click empty space in box for full list)",
|
2158 |
+
# )
|
2159 |
+
in_redact_entities.render()
|
2160 |
+
in_redact_comprehend_entities.render()
|
2161 |
|
2162 |
with gr.Row():
|
2163 |
max_fuzzy_spelling_mistakes_num = gr.Number(
|
|
|
5222 |
comprehend_query_number,
|
5223 |
],
|
5224 |
api_name="redact_data",
|
5225 |
+
show_progress_on=[text_output_summary],
|
5226 |
)
|
5227 |
|
5228 |
# If the output file count text box changes, keep going with redacting each data file until done
|
|
|
5263 |
actual_time_taken_number,
|
5264 |
comprehend_query_number,
|
5265 |
],
|
5266 |
+
show_progress_on=[text_output_summary],
|
5267 |
).success(
|
5268 |
fn=reveal_feedback_buttons,
|
5269 |
outputs=[
|
|
|
5296 |
actual_time_taken_number,
|
5297 |
task_textbox,
|
5298 |
],
|
5299 |
+
show_progress_on=[results_df_preview],
|
5300 |
)
|
5301 |
|
5302 |
# full_duplicated_data_df,
|
example_data/example_outputs/Partnership-Agreement-Toolkit_0_0.pdf_ocr_output.csv
CHANGED
@@ -1,277 +1,277 @@
|
|
1 |
-
page,text,left,top,width,height
|
2 |
-
1,Partnership Agreement,0.516078,0.027879,0.440784,0.032424
|
3 |
-
1,SisterCities,0.169804,0.033333,0.238431,0.028182
|
4 |
-
1,INTERNATIONAL,0.170196,0.06697,0.237647,0.008788
|
5 |
-
1,Toolkit,0.830588,0.07303,0.126667,0.025152
|
6 |
-
1,Connect globally. Thrive locally.,0.169804,0.08697,0.238824,0.01303
|
7 |
-
1,Types of Affiliations,0.117255,0.157576,0.241961,0.02
|
8 |
-
1,Sister City Relationship,0.117647,0.187273,0.196863,0.013939
|
9 |
-
1,"A Sister City relationship is formed when the mayor or highest elected official (or, if elections",0.117255,0.211212,0.738824,0.013636
|
10 |
-
1,"do not take place, highest appointed official) from a U.S. community and a community in",0.117647,0.227273,0.70902,0.013939
|
11 |
-
1,another country or territory sign a formal agreement on behalf of their communities endorsing a,0.117647,0.243636,0.761961,0.013636
|
12 |
-
1,"""sister city/sister cities"" relationship. Sister city agreements shall be considered active/valid",0.118039,0.259697,0.731373,0.013939
|
13 |
-
1,unless otherwise indicated by one or both of the respective communities.,0.118039,0.276061,0.58549,0.013636
|
14 |
-
1,Sister Cities International shall formally recognize only those relationships by cities/members in,0.118039,0.299697,0.758824,0.013636
|
15 |
-
1,good standing (i.e. who are current on membership dues) in its Membership Directory or on its,0.117647,0.316061,0.754902,0.013636
|
16 |
-
1,"website. However, Sister Cities International shall not assert as invalid or otherwise impugn the",0.116863,0.332121,0.760784,0.013636
|
17 |
-
1,legitimacy of those relationships formed by non-members.,0.118039,0.348485,0.466275,0.013636
|
18 |
-
1,Friendship City,0.118039,0.372121,0.127059,0.013939
|
19 |
-
1,"A Friendship City or Friendship Cities relationship is often formed by cities as a ""stepping",0.117255,0.395758,0.714118,0.013636
|
20 |
-
1,"stone"" to a more formal ""Sister City"" agreement. Typically Friendship City agreements are",0.117647,0.411515,0.720392,0.014242
|
21 |
-
1,referred to as such in the formal documents that are signed. Sister Cities International shall,0.118039,0.428182,0.72549,0.013636
|
22 |
-
1,recognize Friendship City relationships by members in its Membership Directory and website.,0.118039,0.444242,0.747843,0.013636
|
23 |
-
1,As per Sister Cities International Board of Directors:,0.117255,0.467879,0.413333,0.013636
|
24 |
-
1,Sister Cities International will recognize a new sister cities affiliation between a,0.169412,0.492121,0.626667,0.013333
|
25 |
-
1,"U.S. and an international community, even though another affiliation may exist",0.169412,0.507879,0.625098,0.013636
|
26 |
-
1,"between that international community and a different U.S. community, only if a",0.169412,0.524545,0.62902,0.013636
|
27 |
-
1,cooperative agreement among all involved communities is filed with Sister Cities,0.16902,0.540606,0.643137,0.013636
|
28 |
-
1,"International. If a cooperative agreement is denied, or no response to the request",0.170196,0.556667,0.647843,0.013333
|
29 |
-
1,"is received within a reasonable amount of time, Sister Cities International will",0.169412,0.57303,0.612157,0.012727
|
30 |
-
1,recognize the partnership as a friendship city and it will be delineated as such,0.169412,0.589091,0.621176,0.013636
|
31 |
-
1,with a symbol in the membership directories.,0.168627,0.605455,0.358824,0.013333
|
32 |
-
1,The cooperative agreement must be sent by the Mayor/County,0.168627,0.628788,0.509412,0.013939
|
33 |
-
1,"Executive/Governor of the requesting community, and must be sent to the",0.169804,0.645152,0.595294,0.014242
|
34 |
-
1,Mayor/County Executive/Governor of each of the existing partnership,0.169804,0.661212,0.555294,0.013636
|
35 |
-
1,communities. Although the Mayor/County Executive/Governor may request input,0.16902,0.677879,0.647451,0.013636
|
36 |
-
1,"from, or may be given input by, the sister cities program, it is up to the discretion",0.168627,0.693939,0.647059,0.013939
|
37 |
-
1,of the Mayor/County Executive/Governor to sign the cooperative agreement.,0.16902,0.709697,0.612941,0.013939
|
38 |
-
1,Although Sister Cities International will help with the cooperative agreement,0.168627,0.726364,0.605882,0.013636
|
39 |
-
1,"process, it is up to the requesting community to get the agreement signed. Sister",0.169412,0.742121,0.650196,0.013939
|
40 |
-
1,"Cities International will not, in any way, force a community to ""share"" and sign",0.16902,0.758182,0.623922,0.014242
|
41 |
-
1,the cooperative agreement.,0.168627,0.774848,0.219216,0.013333
|
42 |
-
1,"To place a relationship into Emeritus status, the mayor or highest elected official of the U.S.",0.117255,0.798485,0.736471,0.013939
|
43 |
-
1,community must write a letter to the mayor of the foreign city indicating that they wish to,0.118039,0.814545,0.70902,0.013636
|
44 |
-
1,"remain sister cities, but understand that the relationship will remain inactive until such time as",0.118039,0.831212,0.747451,0.013333
|
45 |
-
1,both cities are able to sustain an active relationship. Sister Cities International should be,0.118039,0.847273,0.705098,0.013636
|
46 |
-
1,informed in writing by the mayor of the U.S. city of the situation. Sister Cities International will,0.118039,0.863333,0.746275,0.013636
|
47 |
-
2,Partnership Agreement,0.516078,0.027879,0.440784,0.032424
|
48 |
-
2,SisterCities,0.169804,0.033333,0.238824,0.028182
|
49 |
-
2,INTERNATIONAL,0.170196,0.06697,0.237647,0.008788
|
50 |
-
2,Toolkit,0.83098,0.072727,0.127059,0.025455
|
51 |
-
2,Connect globally. Thrive locally.,0.169804,0.08697,0.239216,0.01303
|
52 |
-
2,then place the partnership into Emeritus Status and will reflect this status in directories and all,0.117255,0.132424,0.751373,0.013333
|
53 |
-
2,lists of sister city programs.,0.118039,0.148788,0.218431,0.013333
|
54 |
-
2,"If a community wishes to terminate a sister city relationship, then a letter from the mayor or",0.118431,0.172424,0.732549,0.013333
|
55 |
-
2,highest elected official of the U.S. city should be sent to the mayor of the sister city. Sister,0.118039,0.188485,0.721569,0.013636
|
56 |
-
2,Cities International should be informed of this action in writing by the mayor of the U.S. city,0.118039,0.204848,0.72902,0.013333
|
57 |
-
2,and Sister Cities International will then remove the partnership from its directories and all lists,0.117647,0.221212,0.746275,0.013333
|
58 |
-
2,of sister city programs. We do not recommend terminating a relationship simply because it is,0.117647,0.237273,0.743529,0.013333
|
59 |
-
2,"dormant. Many partnerships wax and wane over the years, and in many cases a dormant",0.117647,0.253939,0.713333,0.013333
|
60 |
-
2,partnership may be reinvigorated by local members years after it has been inactive.,0.118039,0.269697,0.664314,0.013636
|
61 |
-
2,General Guidelines,0.118039,0.295152,0.231765,0.016061
|
62 |
-
2,In order for a sister city/county/state partnership to be recognized by Sister Cities International,0.118431,0.324242,0.754902,0.013636
|
63 |
-
2,"(SCI), the two communities must sign formal documents which clearly endorse the link. This",0.118039,0.340606,0.74,0.013636
|
64 |
-
2,presumes several key items: that the U.S. community is already a member of SCI and has,0.118039,0.35697,0.718039,0.013636
|
65 |
-
2,followed proper procedures (e.g. passed a city council resolution declaring the intent to twin,0.117255,0.373333,0.737647,0.013636
|
66 |
-
2,with the specific city); that both communities share a mutual commitment to the relationship;,0.117255,0.389394,0.740784,0.013636
|
67 |
-
2,and that both have secured the necessary support structure to build a lasting relationship. You,0.117647,0.405455,0.758039,0.013333
|
68 |
-
2,should check with your local sister city program to see if they have any additional requirements,0.117647,0.421818,0.760784,0.013636
|
69 |
-
2,before pursuing a sister city relationship.,0.118039,0.437879,0.323137,0.013636
|
70 |
-
2,"SCI often refers to these agreements as a ""Sister City Agreement"" or ""Memorandum of",0.118039,0.461515,0.696863,0.013939
|
71 |
-
2,"Understanding."" However, as the following examples show, the actual name and format of",0.118039,0.477576,0.729804,0.013636
|
72 |
-
2,your documents is left up to you.,0.117255,0.494242,0.262745,0.013636
|
73 |
-
2,A few things to keep in mind as you draft your agreement:,0.117255,0.517879,0.463137,0.013636
|
74 |
-
2,"Your agreement can range from the ceremonial, with language focusing on each city's",0.176471,0.542121,0.69098,0.013939
|
75 |
-
2,"commitment to fostering understanding, cooperation, and mutual benefit to the precise,",0.176471,0.558485,0.701961,0.013333
|
76 |
-
2,"with particular areas of interest, specific programs/activities, or more concrete goals",0.176078,0.574848,0.673725,0.013636
|
77 |
-
2,related to anything from numbers of exchanges to economic development.,0.176863,0.591212,0.596863,0.013636
|
78 |
-
2,"Don't try to include everything you plan to do. Some specifics, like particular areas of",0.177255,0.620303,0.681176,0.013939
|
79 |
-
2,"interest or participating institutions are good to include. However, there's no need to",0.176471,0.636667,0.675686,0.013636
|
80 |
-
2,include all the programs you plan to do if it makes the document too lengthy or limits,0.176863,0.652727,0.678824,0.013939
|
81 |
-
2,the scope of projects. This is a formal document to establish the relationship; specific,0.176078,0.668788,0.684706,0.013636
|
82 |
-
2,"tasks, responsibilities, or other nuts-and-bolts text related to implementation or",0.176078,0.685455,0.635686,0.013333
|
83 |
-
2,administration of the partnership can be expressed more fully in a separate,0.176471,0.701212,0.600392,0.013636
|
84 |
-
2,memorandum between the respective sister city committees. Your partnership,0.177255,0.717576,0.626667,0.013636
|
85 |
-
2,agreement is a historical document and should not be dated or limited by being aligned,0.176471,0.733636,0.699216,0.013636
|
86 |
-
2,with very specific tasks.,0.176078,0.750606,0.190196,0.013333
|
87 |
-
2,Work with your counterparts. Remember that this is signed by both cities. You should,0.176078,0.779697,0.68549,0.013636
|
88 |
-
2,share drafts of your agreement with your international partners and solicit feedback on,0.176471,0.795758,0.691765,0.013333
|
89 |
-
2,what they'd like to see in the agreement. Be flexible to cultural or municipal priorities.,0.176471,0.811818,0.679216,0.013939
|
90 |
-
2,Ask your counterparts to translate the agreement if it is drafted in English. It is,0.176078,0.841515,0.623137,0.013636
|
91 |
-
2,important for the citizens of your partner community to be able to read and understand,0.176863,0.857576,0.693725,0.013939
|
92 |
-
2,the commitment their city has made. Have someone in your own community who,0.176078,0.873939,0.649804,0.013636
|
93 |
-
3,Partnership Agreement,0.516078,0.027879,0.441176,0.032121
|
94 |
-
3,SisterCities,0.169804,0.033333,0.239216,0.028182
|
95 |
-
3,INTERNATIONAL,0.170196,0.06697,0.237255,0.008788
|
96 |
-
3,Toolkit,0.83098,0.07303,0.126667,0.025152
|
97 |
-
3,Connect globally. Thrive locally.,0.169804,0.08697,0.239216,0.01303
|
98 |
-
3,speaks that language check the foreign-language version to make sure it mirrors what,0.176471,0.132424,0.688235,0.013333
|
99 |
-
3,you have in your own agreement.,0.176471,0.148788,0.264706,0.013333
|
100 |
-
3,Keep it to one page. Ceremonial documents such as these partnership agreements,0.176863,0.178485,0.66549,0.013636
|
101 |
-
3,work best if they can be posted in their entirety.,0.176078,0.194545,0.380392,0.013636
|
102 |
-
3,Most sister city agreements include some acknowledgement of the founding principles,0.177255,0.224242,0.694902,0.013636
|
103 |
-
3,"of the sister city movement- to promote peace through mutual respect, understanding,",0.176471,0.240303,0.698431,0.013333
|
104 |
-
3,and cooperation.,0.176471,0.25697,0.13451,0.013333
|
105 |
-
3,Consider using official letterhead and/or other embellishments such as city seals or,0.176863,0.286061,0.665882,0.013333
|
106 |
-
3,logos to reflect your enhance the document. Sister city agreements are often posted at,0.176863,0.302121,0.695686,0.013636
|
107 |
-
3,city hall or other municipal offices and should reflect their historical importance,0.176471,0.318485,0.630588,0.013333
|
108 |
-
3,Look at other agreements your city has signed. These agreements may give you an idea,0.177255,0.347879,0.705098,0.013636
|
109 |
-
3,"of what is acceptable or possible, and they may be in an easily replicable format. If you",0.176471,0.364242,0.695686,0.013636
|
110 |
-
3,"cannot access older agreements please contact Sister Cities International, we may",0.176863,0.380303,0.663137,0.013636
|
111 |
-
3,"have them on file, although we do not have copies of all partnership agreements.",0.176863,0.396667,0.64549,0.013636
|
112 |
-
3,Documents must be signed by the top elected official of both communities.,0.177255,0.426364,0.601569,0.013333
|
113 |
-
3,"Check with your mayor, city council, town clerk, et al. to make sure that the agreement",0.176863,0.455758,0.694118,0.013636
|
114 |
-
3,"is OK with them. The mayor is the one putting his or her name on the paper, and you",0.176863,0.471818,0.677255,0.013333
|
115 |
-
3,don't want to spend time developing an agreement which will never be signed.,0.176863,0.488182,0.629412,0.013636
|
116 |
-
3,Official documents are usually signed during a formal ceremony recognizing the,0.176863,0.517576,0.638431,0.013636
|
117 |
-
3,partnership. Be sure both communities receive a signed set of the official documents,0.177255,0.533939,0.683922,0.013636
|
118 |
-
3,for their records.,0.176078,0.550606,0.131373,0.010606
|
119 |
-
3,Remember to send your signed agreement to Sister Cities International. After we,0.177255,0.579697,0.645098,0.013636
|
120 |
-
3,receive your agreement we will post the relationship in the City Directory and make sure,0.176863,0.595758,0.703137,0.013636
|
121 |
-
3,it is included in our Annual Membership Directory.,0.176863,0.612121,0.398039,0.013333
|
122 |
-
3,Remember that each city's sister city program is independent and can impose requirements,0.118431,0.640606,0.736471,0.013939
|
123 |
-
3,"like the establishment of a committee, a review period, sustainability/funding plan, among",0.118039,0.65697,0.715686,0.013636
|
124 |
-
3,"others, before sanctioning a sister city agreement. Check with your local program or mayor's",0.117647,0.672727,0.743529,0.014242
|
125 |
-
3,office to see if this is the case.,0.117647,0.689091,0.241176,0.011515
|
126 |
-
3,On the following pages you'll find a series of partnership agreements to give you an idea of,0.118039,0.717879,0.728627,0.013939
|
127 |
-
3,"what is possible. While you should feel free to use some of the formatting and language, we",0.117255,0.734242,0.73451,0.013636
|
128 |
-
3,encourage you to make your agreement your own and be creative with what you produce. If,0.117647,0.750606,0.737647,0.013636
|
129 |
-
3,you are unsure about your agreement or want advice you can always solicit feedback by,0.117647,0.766667,0.708627,0.013636
|
130 |
-
3,sending it to our Membership Director at [email protected] or contacting us at (202),0.117647,0.782727,0.732157,0.013636
|
131 |
-
3,347-8630.,0.117647,0.799394,0.080392,0.010303
|
132 |
-
4,Partnership Agreement,0.516471,0.027879,0.440784,0.032727
|
133 |
-
4,SisterCities,0.169412,0.033333,0.239608,0.028485
|
134 |
-
4,INTERNATIONAL,0.170196,0.066667,0.238431,0.009091
|
135 |
-
4,Toolkit,0.830588,0.072727,0.127843,0.025758
|
136 |
-
4,Connect globally. Thrive locally.,0.169412,0.08697,0.239608,0.013333
|
137 |
-
4,"jull bubzig 2000 3,312",0.378039,0.291212,0.32549,0.019394
|
138 |
-
4,ABU DHABI MUNICIPALITY & TOWN PLANNING,0.376471,0.316667,0.327451,0.016667
|
139 |
-
4,AN AGREEMENT FOR THE ESTABLISHMENT OF,0.260784,0.373636,0.52549,0.012727
|
140 |
-
4,SISTER CITIES RELATIONSHIP,0.337647,0.393636,0.342745,0.012121
|
141 |
-
4,BETWEEN,0.454902,0.413636,0.110588,0.011212
|
142 |
-
4,THE CITY OF ABU DHABI ( U. A.E),0.337255,0.432727,0.375686,0.013939
|
143 |
-
4,AND,0.487843,0.452727,0.048235,0.011212
|
144 |
-
4,"HOUSTON, TEXAS ( U.S.A)",0.385882,0.471515,0.298039,0.014848
|
145 |
-
4,"The Sister City Program, administered by Sister Cities International, was initiated",0.221961,0.525455,0.597255,0.01303
|
146 |
-
4,By the President of the United States of America in 1956 to encourage greater,0.222745,0.539394,0.561961,0.012727
|
147 |
-
4,Friendship and understanding between the United States and other nations through,0.222745,0.553333,0.608235,0.012727
|
148 |
-
4,Direct personal contact: and,0.222745,0.567576,0.20549,0.012424
|
149 |
-
4,"In order to foster those goals, the people of Abu Dhabi and Houston, in a gesture of",0.222353,0.594242,0.603529,0.012424
|
150 |
-
4,"Friendship and goodwill, agree to collaborate for the mutual benefit of their",0.222745,0.608182,0.547843,0.01303
|
151 |
-
4,"Communities by exploring education, economic and cultural opportunities.",0.222353,0.622121,0.541961,0.012121
|
152 |
-
4,"Abu Dhabi and Houston, sharing a common interest in energy, technology and",0.221569,0.648788,0.574118,0.012424
|
153 |
-
4,"medicine, and the desire to promote mutual understanding among our citizens do",0.222353,0.66303,0.588235,0.012121
|
154 |
-
4,"hereby proclaim themselves Sister Cities beginning on the 13th day of March 2001,",0.221961,0.673636,0.594118,0.015758
|
155 |
-
4,the date of Houston City Council resolution estatblishing the Sister City,0.221961,0.690303,0.519608,0.01303
|
156 |
-
4,relationship became effective.,0.221569,0.705152,0.217647,0.012424
|
157 |
-
4,"Signed on this 26 of October 2002, in duplicate in the Arabic and English",0.221569,0.732121,0.533333,0.01303
|
158 |
-
4,"Languages, both text being equally authentic.",0.221961,0.746667,0.328627,0.012727
|
159 |
-
4,A,0.344314,0.768485,0.084706,0.030303
|
160 |
-
4,Sheikh Mohammed bin Butti AI Hamed,0.245882,0.806364,0.366275,0.010909
|
161 |
-
4,Lee P.Brown,0.729412,0.806364,0.118824,0.010303
|
162 |
-
4,Mayor of Houston,0.704706,0.823333,0.166667,0.012424
|
163 |
-
4,Chairman of Abu Dhabi Municipality,0.24549,0.823636,0.342353,0.012727
|
164 |
-
4,&Town Planning,0.324314,0.841212,0.155686,0.012424
|
165 |
-
5,Partnership Agreement,0.516078,0.027879,0.441176,0.032424
|
166 |
-
5,SisterCities,0.169412,0.033333,0.239608,0.028485
|
167 |
-
5,INTERNATIONAL,0.17098,0.066667,0.237255,0.009091
|
168 |
-
5,Toolkit,0.83098,0.072727,0.127059,0.025758
|
169 |
-
5,Connect globally. Thrive locally.,0.169412,0.08697,0.239216,0.013333
|
170 |
-
5,THE CITY OF NEW YORK,0.438824,0.262121,0.240784,0.009697
|
171 |
-
5,OFFICE OF THE MAYOR,0.450196,0.27697,0.220392,0.009697
|
172 |
-
5,"NEW YORK, N.Y. 10007",0.461176,0.29303,0.196863,0.010303
|
173 |
-
5,THE NEW YORK CITY-LONDON SISTER CITY PARTNERSHIP,0.267451,0.355758,0.582745,0.011818
|
174 |
-
5,Memorandum of Understanding,0.420392,0.371212,0.274902,0.013333
|
175 |
-
5,The Sister City partnership between New York City and London will foster mutually,0.201176,0.402121,0.674118,0.014242
|
176 |
-
5,beneficial solutions to common challenges for these two great cosmopolitan entities.,0.201176,0.417273,0.66902,0.013636
|
177 |
-
5,"Consequently, the Sister City relationship between the two will be one of the most",0.201176,0.432727,0.652549,0.015152
|
178 |
-
5,"important in their network of global partnerships, as it strives to:",0.201176,0.448182,0.50902,0.015455
|
179 |
-
5,Encourage and publicize existing exchanges between London and New York City so,0.230588,0.480303,0.671373,0.015152
|
180 |
-
5,that they can flourish to benefit a wider cross-section of the citizens of both;,0.230588,0.496061,0.602353,0.015152
|
181 |
-
5,"Support and promote the development of new social, economic, academic and",0.230196,0.512424,0.618431,0.015455
|
182 |
-
5,community programs to encourage both cities' citizens to share their experiences as a,0.229804,0.527879,0.678039,0.014848
|
183 |
-
5,medium for learning from one another;,0.229804,0.543636,0.309412,0.013939
|
184 |
-
5,Generate an improvement of the operation of the cities' various government agencies,0.229804,0.56,0.676078,0.014545
|
185 |
-
5,by serving as a conduit of information;,0.22902,0.575758,0.307843,0.014848
|
186 |
-
5,"Identify themes, common to both, that can generate new initiatives to further and",0.229412,0.591818,0.640784,0.015152
|
187 |
-
5,"nurture the increasingly powerful financial, social and cultural relationships between",0.22902,0.607576,0.671373,0.014242
|
188 |
-
5,the cities;,0.22902,0.624545,0.076471,0.012424
|
189 |
-
5,Promote key mayoral priorities relevant to both London and New York City;,0.228627,0.639394,0.608627,0.015152
|
190 |
-
5,Provide financial or in kind support to community-led programs that advance the,0.228627,0.656061,0.641569,0.013636
|
191 |
-
5,aims of the Sister City partnership;,0.22902,0.672121,0.275294,0.013636
|
192 |
-
5,"With the above purposes in mind, the Mayor of the City of New York and the Mayor of",0.198824,0.702424,0.697647,0.014848
|
193 |
-
5,London solemnly confirm that these two cities are united by an official partnership by the,0.198824,0.718182,0.710196,0.014545
|
194 |
-
5,protocol of this Memorandum of Understanding.,0.198431,0.733939,0.384314,0.015152
|
195 |
-
5,This agreement will go into effect from the date of signatures.,0.310196,0.780606,0.488235,0.014545
|
196 |
-
5,Thedder Rudolph W. Giuliani,0.178824,0.795455,0.244314,0.100909
|
197 |
-
5,Signed in March of 2001,0.455686,0.796364,0.19451,0.013636
|
198 |
-
5,Ken Mayor Livingstone,0.672157,0.877576,0.132941,0.029091
|
199 |
-
5,Mayor,0.311373,0.894848,0.053333,0.012727
|
200 |
-
5,New York City,0.287843,0.909091,0.121176,0.013333
|
201 |
-
5,London,0.701961,0.909091,0.061569,0.010606
|
202 |
-
6,Partnership Agreement,0.515686,0.027576,0.441961,0.03303
|
203 |
-
6,SisterCities,0.169412,0.03303,0.24,0.028182
|
204 |
-
6,INTERNATIONAL,0.169804,0.066667,0.238431,0.009091
|
205 |
-
6,Toolkit,0.83098,0.072727,0.127451,0.025758
|
206 |
-
6,Connect globally. Thrive locally.,0.169412,0.08697,0.239608,0.013333
|
207 |
-
6,CHIC OF STATE,0.247451,0.190606,0.141961,0.036364
|
208 |
-
6,City of Long Beach,0.388627,0.196667,0.476471,0.066364
|
209 |
-
6,California,0.551373,0.257273,0.136471,0.033333
|
210 |
-
6,Sister City Agreement,0.321961,0.305455,0.378431,0.035152
|
211 |
-
6,between the,0.464706,0.352727,0.084314,0.009697
|
212 |
-
6,City of Long Beach,0.38,0.378485,0.252549,0.01697
|
213 |
-
6,"California, USA",0.4,0.397576,0.21098,0.016061
|
214 |
-
6,and the,0.48,0.415152,0.053333,0.009091
|
215 |
-
6,City of San Pablo de Manta,0.321569,0.428788,0.369804,0.01697
|
216 |
-
6,"Ecuador, South America",0.347451,0.447879,0.317255,0.015152
|
217 |
-
6,"In accordance with the authorization and approval expressed by the City of Long Beach,",0.261569,0.482121,0.536863,0.012121
|
218 |
-
6,"California, USA, and the City of San Pablo de Manta, Ecundor, South America, it is declared",0.217647,0.492727,0.581176,0.01303
|
219 |
-
6,"that a ""Sister City Agreement between the two cities is hereby established for the following",0.217647,0.502727,0.581569,0.012121
|
220 |
-
6,purposes:,0.216863,0.516061,0.058039,0.009394
|
221 |
-
6,(1) to promote and expand the effective and mutually beneficial cooperation between,0.278824,0.532727,0.520392,0.012424
|
222 |
-
6,the people of Long Beach and the people of San Pablo de Manta; and,0.218039,0.543636,0.40549,0.012424
|
223 |
-
6,"(2) to promote international goodwill, understanding, and expanded business",0.279216,0.56303,0.520784,0.012424
|
224 |
-
6,"relations between the two cities and their respective nations by the exchange of people, ideas, and",0.218039,0.573636,0.581569,0.012121
|
225 |
-
6,"information in a unide variety of economic, social, cultural, municipal, environmental,",0.218039,0.584242,0.581176,0.012121
|
226 |
-
6,"professional, technical, youth, and other endeavors; and",0.217647,0.594848,0.333333,0.012121
|
227 |
-
6,"(3) to foster and encourage charitable, scientific, trade and commerce, literary and",0.279608,0.613939,0.520784,0.012727
|
228 |
-
6,educational activities between the two cities;,0.218039,0.625455,0.265882,0.009697
|
229 |
-
6,This Sister City Agreement shall be officially established and shall become effective when,0.263137,0.644545,0.536863,0.012727
|
230 |
-
6,"this document has been duly executed by the Mayor of Long Beach, California, USA, and the",0.218824,0.654848,0.581961,0.012424
|
231 |
-
6,"Mayor of San Pablo de Manta, Ecundor, South America.",0.218431,0.665758,0.338824,0.012121
|
232 |
-
6,STATE OFFICE,0.276471,0.713636,0.050588,0.048788
|
233 |
-
6,Beverly 0 Neill,0.587451,0.736667,0.121961,0.013636
|
234 |
-
6,"Mayor, City of Long Beach",0.542353,0.751212,0.21098,0.013636
|
235 |
-
6,"California, USA",0.582745,0.765758,0.125098,0.01303
|
236 |
-
6,10.2aulus,0.490588,0.771818,0.220392,0.062424
|
237 |
-
6,Ing. Jorge O. Zambrano Cedeño,0.527059,0.825152,0.242745,0.013333
|
238 |
-
6,"Mayor, City of San Pablo de Manta",0.505098,0.839394,0.277647,0.013636
|
239 |
-
6,"Ecuador, South America",0.551765,0.854242,0.188235,0.011818
|
240 |
-
6,"Dated: September 19, 2000",0.544706,0.883333,0.202745,0.01303
|
241 |
-
7,Partnership Agreement,0.516078,0.027879,0.441176,0.032424
|
242 |
-
7,SisterCities,0.169412,0.03303,0.24,0.028485
|
243 |
-
7,INTERNATIONAL,0.170196,0.066667,0.237647,0.009091
|
244 |
-
7,Toolkit,0.83098,0.072727,0.127451,0.025758
|
245 |
-
7,Connect globally. Thrive locally.,0.169412,0.08697,0.239216,0.013333
|
246 |
-
7,REAFFIRMATION OF SISTER CITIES DECLARATION,0.324706,0.165152,0.483529,0.013939
|
247 |
-
7,adopted by,0.2,0.213333,0.080392,0.013636
|
248 |
-
7,THE HONORABLE RICHARD M. DALEY,0.396078,0.214242,0.335686,0.012424
|
249 |
-
7,MAYOR OF CHICAGO,0.472549,0.231212,0.18549,0.011515
|
250 |
-
7,and,0.199608,0.260909,0.026275,0.010606
|
251 |
-
7,THE HONORABLE ZHANG RONGMAO,0.401961,0.261212,0.323137,0.011212
|
252 |
-
7,MAYOR OF SHENYANG,0.463529,0.273636,0.202353,0.011212
|
253 |
-
7,ON,0.551765,0.298182,0.026667,0.011515
|
254 |
-
7,"JUNE 5, 1995",0.500392,0.323636,0.128235,0.014848
|
255 |
-
7,"On this the tenth anniversary of the signing of a sister city agreement, in order to further",0.255686,0.36303,0.67098,0.015152
|
256 |
-
7,the traditional links of friendship between Chicago and Shenyang and to reaffirm their mutual,0.198824,0.378788,0.727843,0.015455
|
257 |
-
7,"aspiration to work in unison for the benefit of their cities and nations, the Honorable Mayor",0.199608,0.394848,0.727843,0.014848
|
258 |
-
7,"Richard M. Daley, Mayor of the City of Chicago, and the Honorable Zhang Rongmao, Mayor",0.199216,0.411212,0.727451,0.014242
|
259 |
-
7,"of the City of Shenyang, on this fifth day of June 1995, do hereby acknowledge and reaffirm the",0.199216,0.42697,0.72549,0.014848
|
260 |
-
7,sister cities agreement between the City of Chicago and the City of Shenyang.,0.199608,0.443636,0.57451,0.014242
|
261 |
-
7,"The City of Chicago and the City of Shenyang on the basis of friendly cooperation,",0.256078,0.473939,0.665098,0.015152
|
262 |
-
7,equality and mutual benefit will continue to develop a sister cities relationship to promote and,0.2,0.490303,0.724706,0.014242
|
263 |
-
7,broaden economic cooperation and cultural exchanges between the two cities.,0.199216,0.506061,0.57451,0.014242
|
264 |
-
7,The two cities do hereby declare their interest in exploring the establishment of business,0.255294,0.537273,0.668235,0.015455
|
265 |
-
7,and trade relations between Chicago and Shenyang.,0.198824,0.554545,0.387843,0.013636
|
266 |
-
7,"In addition, exchanges will be promoted in the area of the arts such as exhibits, music,",0.254118,0.583939,0.666667,0.015455
|
267 |
-
7,dance and other cultural activities.,0.198431,0.601212,0.256471,0.010606
|
268 |
-
7,"In addition, exchanges will be promoted in education and the establishment of contacts",0.254118,0.630303,0.668627,0.015758
|
269 |
-
7,within educational institutions encouraged.,0.198824,0.647273,0.32,0.014242
|
270 |
-
7,"In addition, we declare our intention to promote exchanges in such fields as science and",0.253725,0.678182,0.668627,0.014848
|
271 |
-
7,"technology, sports, health, youth and any areas that will contribute to the prosperity and the",0.198039,0.693636,0.722745,0.015152
|
272 |
-
7,further development of friendship between the people of our two cities.,0.194902,0.711515,0.525098,0.013636
|
273 |
-
7,3h.5.,0.593725,0.750606,0.218039,0.06303
|
274 |
-
7,THE HONORABLE ZHANG RONGMAO,0.588627,0.819394,0.287843,0.011818
|
275 |
-
7,THE HONORABLE RICHARD M. DALEY,0.197255,0.821515,0.303529,0.010606
|
276 |
-
7,MAYOR OF SHENYANG,0.587451,0.835455,0.177647,0.010303
|
277 |
-
7,MAYOR OF CHICAGO,0.195686,0.835758,0.164706,0.010606
|
|
|
1 |
+
page,text,left,top,width,height,line
|
2 |
+
1,Partnership Agreement,0.516078,0.027879,0.440784,0.032424,1
|
3 |
+
1,SisterCities,0.169804,0.033333,0.238431,0.028182,2
|
4 |
+
1,INTERNATIONAL,0.170196,0.06697,0.237647,0.008788,3
|
5 |
+
1,Toolkit,0.830588,0.07303,0.126667,0.025152,4
|
6 |
+
1,Connect globally. Thrive locally.,0.169804,0.08697,0.238824,0.01303,5
|
7 |
+
1,Types of Affiliations,0.117255,0.157576,0.241961,0.02,6
|
8 |
+
1,Sister City Relationship,0.117647,0.187273,0.196863,0.013939,7
|
9 |
+
1,"A Sister City relationship is formed when the mayor or highest elected official (or, if elections",0.117255,0.211212,0.738824,0.013636,8
|
10 |
+
1,"do not take place, highest appointed official) from a U.S. community and a community in",0.117647,0.227273,0.70902,0.013939,9
|
11 |
+
1,another country or territory sign a formal agreement on behalf of their communities endorsing a,0.117647,0.243636,0.761961,0.013636,10
|
12 |
+
1,"""sister city/sister cities"" relationship. Sister city agreements shall be considered active/valid",0.118039,0.259697,0.731373,0.013939,11
|
13 |
+
1,unless otherwise indicated by one or both of the respective communities.,0.118039,0.276061,0.58549,0.013636,12
|
14 |
+
1,Sister Cities International shall formally recognize only those relationships by cities/members in,0.118039,0.299697,0.758824,0.013636,13
|
15 |
+
1,good standing (i.e. who are current on membership dues) in its Membership Directory or on its,0.117647,0.316061,0.754902,0.013636,14
|
16 |
+
1,"website. However, Sister Cities International shall not assert as invalid or otherwise impugn the",0.116863,0.332121,0.760784,0.013636,15
|
17 |
+
1,legitimacy of those relationships formed by non-members.,0.118039,0.348485,0.466275,0.013636,16
|
18 |
+
1,Friendship City,0.118039,0.372121,0.127059,0.013939,17
|
19 |
+
1,"A Friendship City or Friendship Cities relationship is often formed by cities as a ""stepping",0.117255,0.395758,0.714118,0.013636,18
|
20 |
+
1,"stone"" to a more formal ""Sister City"" agreement. Typically Friendship City agreements are",0.117647,0.411515,0.720392,0.014242,19
|
21 |
+
1,referred to as such in the formal documents that are signed. Sister Cities International shall,0.118039,0.428182,0.72549,0.013636,20
|
22 |
+
1,recognize Friendship City relationships by members in its Membership Directory and website.,0.118039,0.444242,0.747843,0.013636,21
|
23 |
+
1,As per Sister Cities International Board of Directors:,0.117255,0.467879,0.413333,0.013636,22
|
24 |
+
1,Sister Cities International will recognize a new sister cities affiliation between a,0.169412,0.492121,0.626667,0.013333,23
|
25 |
+
1,"U.S. and an international community, even though another affiliation may exist",0.169412,0.507879,0.625098,0.013636,24
|
26 |
+
1,"between that international community and a different U.S. community, only if a",0.169412,0.524545,0.62902,0.013636,25
|
27 |
+
1,cooperative agreement among all involved communities is filed with Sister Cities,0.16902,0.540606,0.643137,0.013636,26
|
28 |
+
1,"International. If a cooperative agreement is denied, or no response to the request",0.170196,0.556667,0.647843,0.013333,27
|
29 |
+
1,"is received within a reasonable amount of time, Sister Cities International will",0.169412,0.57303,0.612157,0.012727,28
|
30 |
+
1,recognize the partnership as a friendship city and it will be delineated as such,0.169412,0.589091,0.621176,0.013636,29
|
31 |
+
1,with a symbol in the membership directories.,0.168627,0.605455,0.358824,0.013333,30
|
32 |
+
1,The cooperative agreement must be sent by the Mayor/County,0.168627,0.628788,0.509412,0.013939,31
|
33 |
+
1,"Executive/Governor of the requesting community, and must be sent to the",0.169804,0.645152,0.595294,0.014242,32
|
34 |
+
1,Mayor/County Executive/Governor of each of the existing partnership,0.169804,0.661212,0.555294,0.013636,33
|
35 |
+
1,communities. Although the Mayor/County Executive/Governor may request input,0.16902,0.677879,0.647451,0.013636,34
|
36 |
+
1,"from, or may be given input by, the sister cities program, it is up to the discretion",0.168627,0.693939,0.647059,0.013939,35
|
37 |
+
1,of the Mayor/County Executive/Governor to sign the cooperative agreement.,0.16902,0.709697,0.612941,0.013939,36
|
38 |
+
1,Although Sister Cities International will help with the cooperative agreement,0.168627,0.726364,0.605882,0.013636,37
|
39 |
+
1,"process, it is up to the requesting community to get the agreement signed. Sister",0.169412,0.742121,0.650196,0.013939,38
|
40 |
+
1,"Cities International will not, in any way, force a community to ""share"" and sign",0.16902,0.758182,0.623922,0.014242,39
|
41 |
+
1,the cooperative agreement.,0.168627,0.774848,0.219216,0.013333,40
|
42 |
+
1,"To place a relationship into Emeritus status, the mayor or highest elected official of the U.S.",0.117255,0.798485,0.736471,0.013939,41
|
43 |
+
1,community must write a letter to the mayor of the foreign city indicating that they wish to,0.118039,0.814545,0.70902,0.013636,42
|
44 |
+
1,"remain sister cities, but understand that the relationship will remain inactive until such time as",0.118039,0.831212,0.747451,0.013333,43
|
45 |
+
1,both cities are able to sustain an active relationship. Sister Cities International should be,0.118039,0.847273,0.705098,0.013636,44
|
46 |
+
1,informed in writing by the mayor of the U.S. city of the situation. Sister Cities International will,0.118039,0.863333,0.746275,0.013636,45
|
47 |
+
2,Partnership Agreement,0.516078,0.027879,0.440784,0.032424,1
|
48 |
+
2,SisterCities,0.169804,0.033333,0.238824,0.028182,2
|
49 |
+
2,INTERNATIONAL,0.170196,0.06697,0.237647,0.008788,3
|
50 |
+
2,Toolkit,0.83098,0.072727,0.127059,0.025455,4
|
51 |
+
2,Connect globally. Thrive locally.,0.169804,0.08697,0.239216,0.01303,5
|
52 |
+
2,then place the partnership into Emeritus Status and will reflect this status in directories and all,0.117255,0.132424,0.751373,0.013333,6
|
53 |
+
2,lists of sister city programs.,0.118039,0.148788,0.218431,0.013333,7
|
54 |
+
2,"If a community wishes to terminate a sister city relationship, then a letter from the mayor or",0.118431,0.172424,0.732549,0.013333,8
|
55 |
+
2,highest elected official of the U.S. city should be sent to the mayor of the sister city. Sister,0.118039,0.188485,0.721569,0.013636,9
|
56 |
+
2,Cities International should be informed of this action in writing by the mayor of the U.S. city,0.118039,0.204848,0.72902,0.013333,10
|
57 |
+
2,and Sister Cities International will then remove the partnership from its directories and all lists,0.117647,0.221212,0.746275,0.013333,11
|
58 |
+
2,of sister city programs. We do not recommend terminating a relationship simply because it is,0.117647,0.237273,0.743529,0.013333,12
|
59 |
+
2,"dormant. Many partnerships wax and wane over the years, and in many cases a dormant",0.117647,0.253939,0.713333,0.013333,13
|
60 |
+
2,partnership may be reinvigorated by local members years after it has been inactive.,0.118039,0.269697,0.664314,0.013636,14
|
61 |
+
2,General Guidelines,0.118039,0.295152,0.231765,0.016061,15
|
62 |
+
2,In order for a sister city/county/state partnership to be recognized by Sister Cities International,0.118431,0.324242,0.754902,0.013636,16
|
63 |
+
2,"(SCI), the two communities must sign formal documents which clearly endorse the link. This",0.118039,0.340606,0.74,0.013636,17
|
64 |
+
2,presumes several key items: that the U.S. community is already a member of SCI and has,0.118039,0.35697,0.718039,0.013636,18
|
65 |
+
2,followed proper procedures (e.g. passed a city council resolution declaring the intent to twin,0.117255,0.373333,0.737647,0.013636,19
|
66 |
+
2,with the specific city); that both communities share a mutual commitment to the relationship;,0.117255,0.389394,0.740784,0.013636,20
|
67 |
+
2,and that both have secured the necessary support structure to build a lasting relationship. You,0.117647,0.405455,0.758039,0.013333,21
|
68 |
+
2,should check with your local sister city program to see if they have any additional requirements,0.117647,0.421818,0.760784,0.013636,22
|
69 |
+
2,before pursuing a sister city relationship.,0.118039,0.437879,0.323137,0.013636,23
|
70 |
+
2,"SCI often refers to these agreements as a ""Sister City Agreement"" or ""Memorandum of",0.118039,0.461515,0.696863,0.013939,24
|
71 |
+
2,"Understanding."" However, as the following examples show, the actual name and format of",0.118039,0.477576,0.729804,0.013636,25
|
72 |
+
2,your documents is left up to you.,0.117255,0.494242,0.262745,0.013636,26
|
73 |
+
2,A few things to keep in mind as you draft your agreement:,0.117255,0.517879,0.463137,0.013636,27
|
74 |
+
2,"Your agreement can range from the ceremonial, with language focusing on each city's",0.176471,0.542121,0.69098,0.013939,28
|
75 |
+
2,"commitment to fostering understanding, cooperation, and mutual benefit to the precise,",0.176471,0.558485,0.701961,0.013333,29
|
76 |
+
2,"with particular areas of interest, specific programs/activities, or more concrete goals",0.176078,0.574848,0.673725,0.013636,30
|
77 |
+
2,related to anything from numbers of exchanges to economic development.,0.176863,0.591212,0.596863,0.013636,31
|
78 |
+
2,"Don't try to include everything you plan to do. Some specifics, like particular areas of",0.177255,0.620303,0.681176,0.013939,32
|
79 |
+
2,"interest or participating institutions are good to include. However, there's no need to",0.176471,0.636667,0.675686,0.013636,33
|
80 |
+
2,include all the programs you plan to do if it makes the document too lengthy or limits,0.176863,0.652727,0.678824,0.013939,34
|
81 |
+
2,the scope of projects. This is a formal document to establish the relationship; specific,0.176078,0.668788,0.684706,0.013636,35
|
82 |
+
2,"tasks, responsibilities, or other nuts-and-bolts text related to implementation or",0.176078,0.685455,0.635686,0.013333,36
|
83 |
+
2,administration of the partnership can be expressed more fully in a separate,0.176471,0.701212,0.600392,0.013636,37
|
84 |
+
2,memorandum between the respective sister city committees. Your partnership,0.177255,0.717576,0.626667,0.013636,38
|
85 |
+
2,agreement is a historical document and should not be dated or limited by being aligned,0.176471,0.733636,0.699216,0.013636,39
|
86 |
+
2,with very specific tasks.,0.176078,0.750606,0.190196,0.013333,40
|
87 |
+
2,Work with your counterparts. Remember that this is signed by both cities. You should,0.176078,0.779697,0.68549,0.013636,41
|
88 |
+
2,share drafts of your agreement with your international partners and solicit feedback on,0.176471,0.795758,0.691765,0.013333,42
|
89 |
+
2,what they'd like to see in the agreement. Be flexible to cultural or municipal priorities.,0.176471,0.811818,0.679216,0.013939,43
|
90 |
+
2,Ask your counterparts to translate the agreement if it is drafted in English. It is,0.176078,0.841515,0.623137,0.013636,44
|
91 |
+
2,important for the citizens of your partner community to be able to read and understand,0.176863,0.857576,0.693725,0.013939,1
|
92 |
+
2,the commitment their city has made. Have someone in your own community who,0.176078,0.873939,0.649804,0.013636,2
|
93 |
+
3,Partnership Agreement,0.516078,0.027879,0.441176,0.032121,3
|
94 |
+
3,SisterCities,0.169804,0.033333,0.239216,0.028182,4
|
95 |
+
3,INTERNATIONAL,0.170196,0.06697,0.237255,0.008788,5
|
96 |
+
3,Toolkit,0.83098,0.07303,0.126667,0.025152,6
|
97 |
+
3,Connect globally. Thrive locally.,0.169804,0.08697,0.239216,0.01303,7
|
98 |
+
3,speaks that language check the foreign-language version to make sure it mirrors what,0.176471,0.132424,0.688235,0.013333,8
|
99 |
+
3,you have in your own agreement.,0.176471,0.148788,0.264706,0.013333,9
|
100 |
+
3,Keep it to one page. Ceremonial documents such as these partnership agreements,0.176863,0.178485,0.66549,0.013636,10
|
101 |
+
3,work best if they can be posted in their entirety.,0.176078,0.194545,0.380392,0.013636,11
|
102 |
+
3,Most sister city agreements include some acknowledgement of the founding principles,0.177255,0.224242,0.694902,0.013636,12
|
103 |
+
3,"of the sister city movement- to promote peace through mutual respect, understanding,",0.176471,0.240303,0.698431,0.013333,13
|
104 |
+
3,and cooperation.,0.176471,0.25697,0.13451,0.013333,14
|
105 |
+
3,Consider using official letterhead and/or other embellishments such as city seals or,0.176863,0.286061,0.665882,0.013333,15
|
106 |
+
3,logos to reflect your enhance the document. Sister city agreements are often posted at,0.176863,0.302121,0.695686,0.013636,16
|
107 |
+
3,city hall or other municipal offices and should reflect their historical importance,0.176471,0.318485,0.630588,0.013333,17
|
108 |
+
3,Look at other agreements your city has signed. These agreements may give you an idea,0.177255,0.347879,0.705098,0.013636,18
|
109 |
+
3,"of what is acceptable or possible, and they may be in an easily replicable format. If you",0.176471,0.364242,0.695686,0.013636,19
|
110 |
+
3,"cannot access older agreements please contact Sister Cities International, we may",0.176863,0.380303,0.663137,0.013636,20
|
111 |
+
3,"have them on file, although we do not have copies of all partnership agreements.",0.176863,0.396667,0.64549,0.013636,21
|
112 |
+
3,Documents must be signed by the top elected official of both communities.,0.177255,0.426364,0.601569,0.013333,22
|
113 |
+
3,"Check with your mayor, city council, town clerk, et al. to make sure that the agreement",0.176863,0.455758,0.694118,0.013636,23
|
114 |
+
3,"is OK with them. The mayor is the one putting his or her name on the paper, and you",0.176863,0.471818,0.677255,0.013333,24
|
115 |
+
3,don't want to spend time developing an agreement which will never be signed.,0.176863,0.488182,0.629412,0.013636,25
|
116 |
+
3,Official documents are usually signed during a formal ceremony recognizing the,0.176863,0.517576,0.638431,0.013636,26
|
117 |
+
3,partnership. Be sure both communities receive a signed set of the official documents,0.177255,0.533939,0.683922,0.013636,27
|
118 |
+
3,for their records.,0.176078,0.550606,0.131373,0.010606,28
|
119 |
+
3,Remember to send your signed agreement to Sister Cities International. After we,0.177255,0.579697,0.645098,0.013636,29
|
120 |
+
3,receive your agreement we will post the relationship in the City Directory and make sure,0.176863,0.595758,0.703137,0.013636,30
|
121 |
+
3,it is included in our Annual Membership Directory.,0.176863,0.612121,0.398039,0.013333,31
|
122 |
+
3,Remember that each city's sister city program is independent and can impose requirements,0.118431,0.640606,0.736471,0.013939,32
|
123 |
+
3,"like the establishment of a committee, a review period, sustainability/funding plan, among",0.118039,0.65697,0.715686,0.013636,33
|
124 |
+
3,"others, before sanctioning a sister city agreement. Check with your local program or mayor's",0.117647,0.672727,0.743529,0.014242,34
|
125 |
+
3,office to see if this is the case.,0.117647,0.689091,0.241176,0.011515,35
|
126 |
+
3,On the following pages you'll find a series of partnership agreements to give you an idea of,0.118039,0.717879,0.728627,0.013939,36
|
127 |
+
3,"what is possible. While you should feel free to use some of the formatting and language, we",0.117255,0.734242,0.73451,0.013636,37
|
128 |
+
3,encourage you to make your agreement your own and be creative with what you produce. If,0.117647,0.750606,0.737647,0.013636,38
|
129 |
+
3,you are unsure about your agreement or want advice you can always solicit feedback by,0.117647,0.766667,0.708627,0.013636,39
|
130 |
+
3,sending it to our Membership Director at [email protected] or contacting us at (202),0.117647,0.782727,0.732157,0.013636,40
|
131 |
+
3,347-8630.,0.117647,0.799394,0.080392,0.010303,41
|
132 |
+
4,Partnership Agreement,0.516471,0.027879,0.440784,0.032727,1
|
133 |
+
4,SisterCities,0.169412,0.033333,0.239608,0.028485,2
|
134 |
+
4,INTERNATIONAL,0.170196,0.066667,0.238431,0.009091,3
|
135 |
+
4,Toolkit,0.830588,0.072727,0.127843,0.025758,4
|
136 |
+
4,Connect globally. Thrive locally.,0.169412,0.08697,0.239608,0.013333,5
|
137 |
+
4,"jull bubzig 2000 3,312",0.378039,0.291212,0.32549,0.019394,6
|
138 |
+
4,ABU DHABI MUNICIPALITY & TOWN PLANNING,0.376471,0.316667,0.327451,0.016667,7
|
139 |
+
4,AN AGREEMENT FOR THE ESTABLISHMENT OF,0.260784,0.373636,0.52549,0.012727,8
|
140 |
+
4,SISTER CITIES RELATIONSHIP,0.337647,0.393636,0.342745,0.012121,9
|
141 |
+
4,BETWEEN,0.454902,0.413636,0.110588,0.011212,10
|
142 |
+
4,THE CITY OF ABU DHABI ( U. A.E),0.337255,0.432727,0.375686,0.013939,11
|
143 |
+
4,AND,0.487843,0.452727,0.048235,0.011212,12
|
144 |
+
4,"HOUSTON, TEXAS ( U.S.A)",0.385882,0.471515,0.298039,0.014848,13
|
145 |
+
4,"The Sister City Program, administered by Sister Cities International, was initiated",0.221961,0.525455,0.597255,0.01303,14
|
146 |
+
4,By the President of the United States of America in 1956 to encourage greater,0.222745,0.539394,0.561961,0.012727,15
|
147 |
+
4,Friendship and understanding between the United States and other nations through,0.222745,0.553333,0.608235,0.012727,16
|
148 |
+
4,Direct personal contact: and,0.222745,0.567576,0.20549,0.012424,17
|
149 |
+
4,"In order to foster those goals, the people of Abu Dhabi and Houston, in a gesture of",0.222353,0.594242,0.603529,0.012424,18
|
150 |
+
4,"Friendship and goodwill, agree to collaborate for the mutual benefit of their",0.222745,0.608182,0.547843,0.01303,19
|
151 |
+
4,"Communities by exploring education, economic and cultural opportunities.",0.222353,0.622121,0.541961,0.012121,20
|
152 |
+
4,"Abu Dhabi and Houston, sharing a common interest in energy, technology and",0.221569,0.648788,0.574118,0.012424,21
|
153 |
+
4,"medicine, and the desire to promote mutual understanding among our citizens do",0.222353,0.66303,0.588235,0.012121,22
|
154 |
+
4,"hereby proclaim themselves Sister Cities beginning on the 13th day of March 2001,",0.221961,0.673636,0.594118,0.015758,23
|
155 |
+
4,the date of Houston City Council resolution estatblishing the Sister City,0.221961,0.690303,0.519608,0.01303,24
|
156 |
+
4,relationship became effective.,0.221569,0.705152,0.217647,0.012424,25
|
157 |
+
4,"Signed on this 26 of October 2002, in duplicate in the Arabic and English",0.221569,0.732121,0.533333,0.01303,26
|
158 |
+
4,"Languages, both text being equally authentic.",0.221961,0.746667,0.328627,0.012727,27
|
159 |
+
4,A,0.344314,0.768485,0.084706,0.030303,28
|
160 |
+
4,Sheikh Mohammed bin Butti AI Hamed,0.245882,0.806364,0.366275,0.010909,29
|
161 |
+
4,Lee P.Brown,0.729412,0.806364,0.118824,0.010303,30
|
162 |
+
4,Mayor of Houston,0.704706,0.823333,0.166667,0.012424,31
|
163 |
+
4,Chairman of Abu Dhabi Municipality,0.24549,0.823636,0.342353,0.012727,32
|
164 |
+
4,&Town Planning,0.324314,0.841212,0.155686,0.012424,33
|
165 |
+
5,Partnership Agreement,0.516078,0.027879,0.441176,0.032424,1
|
166 |
+
5,SisterCities,0.169412,0.033333,0.239608,0.028485,2
|
167 |
+
5,INTERNATIONAL,0.17098,0.066667,0.237255,0.009091,3
|
168 |
+
5,Toolkit,0.83098,0.072727,0.127059,0.025758,4
|
169 |
+
5,Connect globally. Thrive locally.,0.169412,0.08697,0.239216,0.013333,5
|
170 |
+
5,THE CITY OF NEW YORK,0.438824,0.262121,0.240784,0.009697,6
|
171 |
+
5,OFFICE OF THE MAYOR,0.450196,0.27697,0.220392,0.009697,7
|
172 |
+
5,"NEW YORK, N.Y. 10007",0.461176,0.29303,0.196863,0.010303,8
|
173 |
+
5,THE NEW YORK CITY-LONDON SISTER CITY PARTNERSHIP,0.267451,0.355758,0.582745,0.011818,9
|
174 |
+
5,Memorandum of Understanding,0.420392,0.371212,0.274902,0.013333,10
|
175 |
+
5,The Sister City partnership between New York City and London will foster mutually,0.201176,0.402121,0.674118,0.014242,11
|
176 |
+
5,beneficial solutions to common challenges for these two great cosmopolitan entities.,0.201176,0.417273,0.66902,0.013636,12
|
177 |
+
5,"Consequently, the Sister City relationship between the two will be one of the most",0.201176,0.432727,0.652549,0.015152,13
|
178 |
+
5,"important in their network of global partnerships, as it strives to:",0.201176,0.448182,0.50902,0.015455,14
|
179 |
+
5,Encourage and publicize existing exchanges between London and New York City so,0.230588,0.480303,0.671373,0.015152,15
|
180 |
+
5,that they can flourish to benefit a wider cross-section of the citizens of both;,0.230588,0.496061,0.602353,0.015152,16
|
181 |
+
5,"Support and promote the development of new social, economic, academic and",0.230196,0.512424,0.618431,0.015455,17
|
182 |
+
5,community programs to encourage both cities' citizens to share their experiences as a,0.229804,0.527879,0.678039,0.014848,18
|
183 |
+
5,medium for learning from one another;,0.229804,0.543636,0.309412,0.013939,19
|
184 |
+
5,Generate an improvement of the operation of the cities' various government agencies,0.229804,0.56,0.676078,0.014545,20
|
185 |
+
5,by serving as a conduit of information;,0.22902,0.575758,0.307843,0.014848,21
|
186 |
+
5,"Identify themes, common to both, that can generate new initiatives to further and",0.229412,0.591818,0.640784,0.015152,22
|
187 |
+
5,"nurture the increasingly powerful financial, social and cultural relationships between",0.22902,0.607576,0.671373,0.014242,23
|
188 |
+
5,the cities;,0.22902,0.624545,0.076471,0.012424,24
|
189 |
+
5,Promote key mayoral priorities relevant to both London and New York City;,0.228627,0.639394,0.608627,0.015152,25
|
190 |
+
5,Provide financial or in kind support to community-led programs that advance the,0.228627,0.656061,0.641569,0.013636,26
|
191 |
+
5,aims of the Sister City partnership;,0.22902,0.672121,0.275294,0.013636,27
|
192 |
+
5,"With the above purposes in mind, the Mayor of the City of New York and the Mayor of",0.198824,0.702424,0.697647,0.014848,28
|
193 |
+
5,London solemnly confirm that these two cities are united by an official partnership by the,0.198824,0.718182,0.710196,0.014545,29
|
194 |
+
5,protocol of this Memorandum of Understanding.,0.198431,0.733939,0.384314,0.015152,30
|
195 |
+
5,This agreement will go into effect from the date of signatures.,0.310196,0.780606,0.488235,0.014545,31
|
196 |
+
5,Thedder Rudolph W. Giuliani,0.178824,0.795455,0.244314,0.100909,32
|
197 |
+
5,Signed in March of 2001,0.455686,0.796364,0.19451,0.013636,33
|
198 |
+
5,Ken Mayor Livingstone,0.672157,0.877576,0.132941,0.029091,34
|
199 |
+
5,Mayor,0.311373,0.894848,0.053333,0.012727,35
|
200 |
+
5,New York City,0.287843,0.909091,0.121176,0.013333,36
|
201 |
+
5,London,0.701961,0.909091,0.061569,0.010606,37
|
202 |
+
6,Partnership Agreement,0.515686,0.027576,0.441961,0.03303,1
|
203 |
+
6,SisterCities,0.169412,0.03303,0.24,0.028182,2
|
204 |
+
6,INTERNATIONAL,0.169804,0.066667,0.238431,0.009091,3
|
205 |
+
6,Toolkit,0.83098,0.072727,0.127451,0.025758,4
|
206 |
+
6,Connect globally. Thrive locally.,0.169412,0.08697,0.239608,0.013333,5
|
207 |
+
6,CHIC OF STATE,0.247451,0.190606,0.141961,0.036364,6
|
208 |
+
6,City of Long Beach,0.388627,0.196667,0.476471,0.066364,7
|
209 |
+
6,California,0.551373,0.257273,0.136471,0.033333,8
|
210 |
+
6,Sister City Agreement,0.321961,0.305455,0.378431,0.035152,9
|
211 |
+
6,between the,0.464706,0.352727,0.084314,0.009697,10
|
212 |
+
6,City of Long Beach,0.38,0.378485,0.252549,0.01697,11
|
213 |
+
6,"California, USA",0.4,0.397576,0.21098,0.016061,12
|
214 |
+
6,and the,0.48,0.415152,0.053333,0.009091,13
|
215 |
+
6,City of San Pablo de Manta,0.321569,0.428788,0.369804,0.01697,14
|
216 |
+
6,"Ecuador, South America",0.347451,0.447879,0.317255,0.015152,15
|
217 |
+
6,"In accordance with the authorization and approval expressed by the City of Long Beach,",0.261569,0.482121,0.536863,0.012121,16
|
218 |
+
6,"California, USA, and the City of San Pablo de Manta, Ecundor, South America, it is declared",0.217647,0.492727,0.581176,0.01303,17
|
219 |
+
6,"that a ""Sister City Agreement between the two cities is hereby established for the following",0.217647,0.502727,0.581569,0.012121,18
|
220 |
+
6,purposes:,0.216863,0.516061,0.058039,0.009394,19
|
221 |
+
6,(1) to promote and expand the effective and mutually beneficial cooperation between,0.278824,0.532727,0.520392,0.012424,20
|
222 |
+
6,the people of Long Beach and the people of San Pablo de Manta; and,0.218039,0.543636,0.40549,0.012424,21
|
223 |
+
6,"(2) to promote international goodwill, understanding, and expanded business",0.279216,0.56303,0.520784,0.012424,22
|
224 |
+
6,"relations between the two cities and their respective nations by the exchange of people, ideas, and",0.218039,0.573636,0.581569,0.012121,23
|
225 |
+
6,"information in a unide variety of economic, social, cultural, municipal, environmental,",0.218039,0.584242,0.581176,0.012121,24
|
226 |
+
6,"professional, technical, youth, and other endeavors; and",0.217647,0.594848,0.333333,0.012121,25
|
227 |
+
6,"(3) to foster and encourage charitable, scientific, trade and commerce, literary and",0.279608,0.613939,0.520784,0.012727,26
|
228 |
+
6,educational activities between the two cities;,0.218039,0.625455,0.265882,0.009697,27
|
229 |
+
6,This Sister City Agreement shall be officially established and shall become effective when,0.263137,0.644545,0.536863,0.012727,28
|
230 |
+
6,"this document has been duly executed by the Mayor of Long Beach, California, USA, and the",0.218824,0.654848,0.581961,0.012424,29
|
231 |
+
6,"Mayor of San Pablo de Manta, Ecundor, South America.",0.218431,0.665758,0.338824,0.012121,30
|
232 |
+
6,STATE OFFICE,0.276471,0.713636,0.050588,0.048788,31
|
233 |
+
6,Beverly 0 Neill,0.587451,0.736667,0.121961,0.013636,32
|
234 |
+
6,"Mayor, City of Long Beach",0.542353,0.751212,0.21098,0.013636,33
|
235 |
+
6,"California, USA",0.582745,0.765758,0.125098,0.01303,34
|
236 |
+
6,10.2aulus,0.490588,0.771818,0.220392,0.062424,35
|
237 |
+
6,Ing. Jorge O. Zambrano Cedeño,0.527059,0.825152,0.242745,0.013333,36
|
238 |
+
6,"Mayor, City of San Pablo de Manta",0.505098,0.839394,0.277647,0.013636,37
|
239 |
+
6,"Ecuador, South America",0.551765,0.854242,0.188235,0.011818,38
|
240 |
+
6,"Dated: September 19, 2000",0.544706,0.883333,0.202745,0.01303,39
|
241 |
+
7,Partnership Agreement,0.516078,0.027879,0.441176,0.032424,1
|
242 |
+
7,SisterCities,0.169412,0.03303,0.24,0.028485,2
|
243 |
+
7,INTERNATIONAL,0.170196,0.066667,0.237647,0.009091,3
|
244 |
+
7,Toolkit,0.83098,0.072727,0.127451,0.025758,4
|
245 |
+
7,Connect globally. Thrive locally.,0.169412,0.08697,0.239216,0.013333,5
|
246 |
+
7,REAFFIRMATION OF SISTER CITIES DECLARATION,0.324706,0.165152,0.483529,0.013939,6
|
247 |
+
7,adopted by,0.2,0.213333,0.080392,0.013636,7
|
248 |
+
7,THE HONORABLE RICHARD M. DALEY,0.396078,0.214242,0.335686,0.012424,8
|
249 |
+
7,MAYOR OF CHICAGO,0.472549,0.231212,0.18549,0.011515,9
|
250 |
+
7,and,0.199608,0.260909,0.026275,0.010606,10
|
251 |
+
7,THE HONORABLE ZHANG RONGMAO,0.401961,0.261212,0.323137,0.011212,11
|
252 |
+
7,MAYOR OF SHENYANG,0.463529,0.273636,0.202353,0.011212,12
|
253 |
+
7,ON,0.551765,0.298182,0.026667,0.011515,13
|
254 |
+
7,"JUNE 5, 1995",0.500392,0.323636,0.128235,0.014848,14
|
255 |
+
7,"On this the tenth anniversary of the signing of a sister city agreement, in order to further",0.255686,0.36303,0.67098,0.015152,15
|
256 |
+
7,the traditional links of friendship between Chicago and Shenyang and to reaffirm their mutual,0.198824,0.378788,0.727843,0.015455,16
|
257 |
+
7,"aspiration to work in unison for the benefit of their cities and nations, the Honorable Mayor",0.199608,0.394848,0.727843,0.014848,17
|
258 |
+
7,"Richard M. Daley, Mayor of the City of Chicago, and the Honorable Zhang Rongmao, Mayor",0.199216,0.411212,0.727451,0.014242,18
|
259 |
+
7,"of the City of Shenyang, on this fifth day of June 1995, do hereby acknowledge and reaffirm the",0.199216,0.42697,0.72549,0.014848,19
|
260 |
+
7,sister cities agreement between the City of Chicago and the City of Shenyang.,0.199608,0.443636,0.57451,0.014242,20
|
261 |
+
7,"The City of Chicago and the City of Shenyang on the basis of friendly cooperation,",0.256078,0.473939,0.665098,0.015152,21
|
262 |
+
7,equality and mutual benefit will continue to develop a sister cities relationship to promote and,0.2,0.490303,0.724706,0.014242,22
|
263 |
+
7,broaden economic cooperation and cultural exchanges between the two cities.,0.199216,0.506061,0.57451,0.014242,23
|
264 |
+
7,The two cities do hereby declare their interest in exploring the establishment of business,0.255294,0.537273,0.668235,0.015455,24
|
265 |
+
7,and trade relations between Chicago and Shenyang.,0.198824,0.554545,0.387843,0.013636,25
|
266 |
+
7,"In addition, exchanges will be promoted in the area of the arts such as exhibits, music,",0.254118,0.583939,0.666667,0.015455,26
|
267 |
+
7,dance and other cultural activities.,0.198431,0.601212,0.256471,0.010606,27
|
268 |
+
7,"In addition, exchanges will be promoted in education and the establishment of contacts",0.254118,0.630303,0.668627,0.015758,28
|
269 |
+
7,within educational institutions encouraged.,0.198824,0.647273,0.32,0.014242,29
|
270 |
+
7,"In addition, we declare our intention to promote exchanges in such fields as science and",0.253725,0.678182,0.668627,0.014848,30
|
271 |
+
7,"technology, sports, health, youth and any areas that will contribute to the prosperity and the",0.198039,0.693636,0.722745,0.015152,31
|
272 |
+
7,further development of friendship between the people of our two cities.,0.194902,0.711515,0.525098,0.013636,32
|
273 |
+
7,3h.5.,0.593725,0.750606,0.218039,0.06303,33
|
274 |
+
7,THE HONORABLE ZHANG RONGMAO,0.588627,0.819394,0.287843,0.011818,34
|
275 |
+
7,THE HONORABLE RICHARD M. DALEY,0.197255,0.821515,0.303529,0.010606,35
|
276 |
+
7,MAYOR OF SHENYANG,0.587451,0.835455,0.177647,0.010303,36
|
277 |
+
7,MAYOR OF CHICAGO,0.195686,0.835758,0.164706,0.010606,37
|
example_data/example_outputs/doubled_output_joined.pdf_ocr_output.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
tools/config.py
CHANGED
@@ -161,11 +161,9 @@ if OUTPUT_FOLDER == "TEMP" or INPUT_FOLDER == "TEMP":
|
|
161 |
INPUT_FOLDER = temp_dir + "/"
|
162 |
|
163 |
GRADIO_TEMP_DIR = get_or_create_env_var(
|
164 |
-
"GRADIO_TEMP_DIR", "
|
165 |
) # Default Gradio temp folder
|
166 |
-
MPLCONFIGDIR = get_or_create_env_var(
|
167 |
-
"MPLCONFIGDIR", "tmp/matplotlib_cache/"
|
168 |
-
) # Matplotlib cache folder
|
169 |
|
170 |
###
|
171 |
# LOGGING OPTIONS
|
@@ -545,6 +543,8 @@ except Exception as e:
|
|
545 |
# Get some environment variables and Launch the Gradio app
|
546 |
COGNITO_AUTH = get_or_create_env_var("COGNITO_AUTH", "0")
|
547 |
|
|
|
|
|
548 |
RUN_DIRECT_MODE = get_or_create_env_var("RUN_DIRECT_MODE", "0")
|
549 |
|
550 |
# Direct mode configuration options
|
@@ -574,7 +574,7 @@ ROOT_PATH = get_or_create_env_var("ROOT_PATH", "")
|
|
574 |
|
575 |
DEFAULT_CONCURRENCY_LIMIT = int(get_or_create_env_var("DEFAULT_CONCURRENCY_LIMIT", "3"))
|
576 |
|
577 |
-
FILE_INPUT_HEIGHT = get_or_create_env_var("FILE_INPUT_HEIGHT", "200")
|
578 |
|
579 |
### ALLOW LIST
|
580 |
|
|
|
161 |
INPUT_FOLDER = temp_dir + "/"
|
162 |
|
163 |
GRADIO_TEMP_DIR = get_or_create_env_var(
|
164 |
+
"GRADIO_TEMP_DIR", ""
|
165 |
) # Default Gradio temp folder
|
166 |
+
MPLCONFIGDIR = get_or_create_env_var("MPLCONFIGDIR", "") # Matplotlib cache folder
|
|
|
|
|
167 |
|
168 |
###
|
169 |
# LOGGING OPTIONS
|
|
|
543 |
# Get some environment variables and Launch the Gradio app
|
544 |
COGNITO_AUTH = get_or_create_env_var("COGNITO_AUTH", "0")
|
545 |
|
546 |
+
SHOW_EXAMPLES = get_or_create_env_var("SHOW_EXAMPLES", "True")
|
547 |
+
|
548 |
RUN_DIRECT_MODE = get_or_create_env_var("RUN_DIRECT_MODE", "0")
|
549 |
|
550 |
# Direct mode configuration options
|
|
|
574 |
|
575 |
DEFAULT_CONCURRENCY_LIMIT = int(get_or_create_env_var("DEFAULT_CONCURRENCY_LIMIT", "3"))
|
576 |
|
577 |
+
FILE_INPUT_HEIGHT = int(get_or_create_env_var("FILE_INPUT_HEIGHT", "200"))
|
578 |
|
579 |
### ALLOW LIST
|
580 |
|
tools/data_anonymise.py
CHANGED
@@ -515,8 +515,6 @@ def anonymise_files_with_open_text(
|
|
515 |
if isinstance(out_message, str):
|
516 |
out_message = [out_message]
|
517 |
|
518 |
-
# print("log_files_output_paths:",log_files_output_paths)
|
519 |
-
|
520 |
if isinstance(log_files_output_paths, str):
|
521 |
log_files_output_paths = list()
|
522 |
|
|
|
515 |
if isinstance(out_message, str):
|
516 |
out_message = [out_message]
|
517 |
|
|
|
|
|
518 |
if isinstance(log_files_output_paths, str):
|
519 |
log_files_output_paths = list()
|
520 |
|
tools/file_conversion.py
CHANGED
@@ -87,9 +87,6 @@ def is_pdf(filename):
|
|
87 |
return filename.lower().endswith(".pdf")
|
88 |
|
89 |
|
90 |
-
## Convert pdf to image if necessary
|
91 |
-
|
92 |
-
|
93 |
def check_image_size_and_reduce(out_path: str, image: Image):
|
94 |
"""
|
95 |
Check if a given image size is above around 4.5mb, and reduce size if necessary. 5mb is the maximum possible to submit to AWS Textract.
|
@@ -297,7 +294,6 @@ def process_file_for_image_creation(
|
|
297 |
|
298 |
# Check if the file is a PDF
|
299 |
elif file_extension == ".pdf":
|
300 |
-
# print(f"{file_path} is a PDF file. Converting to image set")
|
301 |
|
302 |
# Run your function for processing PDF files here
|
303 |
img_path, image_sizes_width, image_sizes_height, all_img_details = (
|
@@ -653,8 +649,8 @@ def word_level_ocr_output_to_dataframe(ocr_results: dict) -> pd.DataFrame:
|
|
653 |
def prepare_image_or_pdf(
|
654 |
file_paths: List[str],
|
655 |
text_extract_method: str,
|
656 |
-
all_line_level_ocr_results_df: pd.DataFrame,
|
657 |
-
all_page_line_level_ocr_results_with_words_df: pd.DataFrame,
|
658 |
latest_file_completed: int = 0,
|
659 |
out_message: List[str] = list(),
|
660 |
first_loop_state: bool = False,
|
|
|
87 |
return filename.lower().endswith(".pdf")
|
88 |
|
89 |
|
|
|
|
|
|
|
90 |
def check_image_size_and_reduce(out_path: str, image: Image):
|
91 |
"""
|
92 |
Check if a given image size is above around 4.5mb, and reduce size if necessary. 5mb is the maximum possible to submit to AWS Textract.
|
|
|
294 |
|
295 |
# Check if the file is a PDF
|
296 |
elif file_extension == ".pdf":
|
|
|
297 |
|
298 |
# Run your function for processing PDF files here
|
299 |
img_path, image_sizes_width, image_sizes_height, all_img_details = (
|
|
|
649 |
def prepare_image_or_pdf(
|
650 |
file_paths: List[str],
|
651 |
text_extract_method: str,
|
652 |
+
all_line_level_ocr_results_df: pd.DataFrame = None,
|
653 |
+
all_page_line_level_ocr_results_with_words_df: pd.DataFrame = None,
|
654 |
latest_file_completed: int = 0,
|
655 |
out_message: List[str] = list(),
|
656 |
first_loop_state: bool = False,
|