Merge pull request #9 from seanpedrick-case/dev
Browse filesApp now correctly updates custom fuzzy recognisers
- requirements.txt +1 -1
- tools/file_redaction.py +8 -4
requirements.txt
CHANGED
|
@@ -21,7 +21,7 @@ python-levenshtein==0.26.1
|
|
| 21 |
spaczz==0.6.1
|
| 22 |
gradio_image_annotation==0.2.5
|
| 23 |
# The following version includes rotation and image zoom options - not currently working so reverting to original until fixed
|
| 24 |
-
#
|
| 25 |
rapidfuzz==3.12.1
|
| 26 |
numpy==1.26.4
|
| 27 |
awslambdaric==3.0.0
|
|
|
|
| 21 |
spaczz==0.6.1
|
| 22 |
gradio_image_annotation==0.2.5
|
| 23 |
# The following version includes rotation and image zoom options - not currently working so reverting to original until fixed
|
| 24 |
+
#https://github.com/seanpedrick-case/gradio_image_annotator/releases/download/v0.3.0/gradio_image_annotation-0.3.0-py3-none-any.whl
|
| 25 |
rapidfuzz==3.12.1
|
| 26 |
numpy==1.26.4
|
| 27 |
awslambdaric==3.0.0
|
tools/file_redaction.py
CHANGED
|
@@ -987,11 +987,12 @@ def redact_image_pdf(file_path:str,
|
|
| 987 |
#print("new_custom_recogniser:", new_custom_recogniser)
|
| 988 |
nlp_analyser.registry.add_recognizer(new_custom_recogniser)
|
| 989 |
|
| 990 |
-
nlp_analyser.registry.remove_recognizer("
|
| 991 |
new_custom_fuzzy_recogniser = CustomWordFuzzyRecognizer(supported_entities=["CUSTOM_FUZZY"], custom_list=custom_recogniser_word_list, spelling_mistakes_max=max_fuzzy_spelling_mistakes_num, search_whole_phrase=match_fuzzy_whole_phrase_bool)
|
| 992 |
#print("new_custom_recogniser:", new_custom_recogniser)
|
| 993 |
nlp_analyser.registry.add_recognizer(new_custom_fuzzy_recogniser)
|
| 994 |
|
|
|
|
| 995 |
image_analyser = CustomImageAnalyzerEngine(nlp_analyser)
|
| 996 |
|
| 997 |
if pii_identification_method == "AWS Comprehend" and comprehend_client == "":
|
|
@@ -1591,16 +1592,19 @@ def redact_text_pdf(
|
|
| 1591 |
new_custom_recogniser = custom_word_list_recogniser(custom_recogniser_word_list)
|
| 1592 |
nlp_analyser.registry.add_recognizer(new_custom_recogniser)
|
| 1593 |
|
| 1594 |
-
nlp_analyser.registry.remove_recognizer("
|
| 1595 |
new_custom_fuzzy_recogniser = CustomWordFuzzyRecognizer(supported_entities=["CUSTOM_FUZZY"], custom_list=custom_recogniser_word_list, spelling_mistakes_max=max_fuzzy_spelling_mistakes_num, search_whole_phrase=match_fuzzy_whole_phrase_bool)
|
| 1596 |
nlp_analyser.registry.add_recognizer(new_custom_fuzzy_recogniser)
|
| 1597 |
|
| 1598 |
# List all elements currently in the nlp_analyser registry
|
| 1599 |
#print("Current recognizers in nlp_analyser registry:")
|
| 1600 |
#for recognizer_name in nlp_analyser.registry.recognizers:
|
| 1601 |
-
|
|
|
|
|
|
|
|
|
|
| 1602 |
|
| 1603 |
-
#print("
|
| 1604 |
|
| 1605 |
tic = time.perf_counter()
|
| 1606 |
|
|
|
|
| 987 |
#print("new_custom_recogniser:", new_custom_recogniser)
|
| 988 |
nlp_analyser.registry.add_recognizer(new_custom_recogniser)
|
| 989 |
|
| 990 |
+
nlp_analyser.registry.remove_recognizer("CustomWordFuzzyRecognizer")
|
| 991 |
new_custom_fuzzy_recogniser = CustomWordFuzzyRecognizer(supported_entities=["CUSTOM_FUZZY"], custom_list=custom_recogniser_word_list, spelling_mistakes_max=max_fuzzy_spelling_mistakes_num, search_whole_phrase=match_fuzzy_whole_phrase_bool)
|
| 992 |
#print("new_custom_recogniser:", new_custom_recogniser)
|
| 993 |
nlp_analyser.registry.add_recognizer(new_custom_fuzzy_recogniser)
|
| 994 |
|
| 995 |
+
|
| 996 |
image_analyser = CustomImageAnalyzerEngine(nlp_analyser)
|
| 997 |
|
| 998 |
if pii_identification_method == "AWS Comprehend" and comprehend_client == "":
|
|
|
|
| 1592 |
new_custom_recogniser = custom_word_list_recogniser(custom_recogniser_word_list)
|
| 1593 |
nlp_analyser.registry.add_recognizer(new_custom_recogniser)
|
| 1594 |
|
| 1595 |
+
nlp_analyser.registry.remove_recognizer("CustomWordFuzzyRecognizer")
|
| 1596 |
new_custom_fuzzy_recogniser = CustomWordFuzzyRecognizer(supported_entities=["CUSTOM_FUZZY"], custom_list=custom_recogniser_word_list, spelling_mistakes_max=max_fuzzy_spelling_mistakes_num, search_whole_phrase=match_fuzzy_whole_phrase_bool)
|
| 1597 |
nlp_analyser.registry.add_recognizer(new_custom_fuzzy_recogniser)
|
| 1598 |
|
| 1599 |
# List all elements currently in the nlp_analyser registry
|
| 1600 |
#print("Current recognizers in nlp_analyser registry:")
|
| 1601 |
#for recognizer_name in nlp_analyser.registry.recognizers:
|
| 1602 |
+
#print(recognizer_name)
|
| 1603 |
+
#print(recognizer_name.name)
|
| 1604 |
+
|
| 1605 |
+
#print("Custom recogniser:", nlp_analyser.registry)
|
| 1606 |
|
| 1607 |
+
#print("custom_recogniser_word_list:", custom_recogniser_word_list)
|
| 1608 |
|
| 1609 |
tic = time.perf_counter()
|
| 1610 |
|