Commit
·
82b9d9d
1
Parent(s):
7917a26
App now correctly updates custom fuzzy recognisers
Browse files- requirements.txt +1 -1
- tools/file_redaction.py +8 -4
requirements.txt
CHANGED
@@ -21,7 +21,7 @@ python-levenshtein==0.26.1
|
|
21 |
spaczz==0.6.1
|
22 |
gradio_image_annotation==0.2.5
|
23 |
# The following version includes rotation and image zoom options - not currently working so reverting to original until fixed
|
24 |
-
#
|
25 |
rapidfuzz==3.12.1
|
26 |
numpy==1.26.4
|
27 |
awslambdaric==3.0.0
|
|
|
21 |
spaczz==0.6.1
|
22 |
gradio_image_annotation==0.2.5
|
23 |
# The following version includes rotation and image zoom options - not currently working so reverting to original until fixed
|
24 |
+
#https://github.com/seanpedrick-case/gradio_image_annotator/releases/download/v0.3.0/gradio_image_annotation-0.3.0-py3-none-any.whl
|
25 |
rapidfuzz==3.12.1
|
26 |
numpy==1.26.4
|
27 |
awslambdaric==3.0.0
|
tools/file_redaction.py
CHANGED
@@ -987,11 +987,12 @@ def redact_image_pdf(file_path:str,
|
|
987 |
#print("new_custom_recogniser:", new_custom_recogniser)
|
988 |
nlp_analyser.registry.add_recognizer(new_custom_recogniser)
|
989 |
|
990 |
-
nlp_analyser.registry.remove_recognizer("
|
991 |
new_custom_fuzzy_recogniser = CustomWordFuzzyRecognizer(supported_entities=["CUSTOM_FUZZY"], custom_list=custom_recogniser_word_list, spelling_mistakes_max=max_fuzzy_spelling_mistakes_num, search_whole_phrase=match_fuzzy_whole_phrase_bool)
|
992 |
#print("new_custom_recogniser:", new_custom_recogniser)
|
993 |
nlp_analyser.registry.add_recognizer(new_custom_fuzzy_recogniser)
|
994 |
|
|
|
995 |
image_analyser = CustomImageAnalyzerEngine(nlp_analyser)
|
996 |
|
997 |
if pii_identification_method == "AWS Comprehend" and comprehend_client == "":
|
@@ -1591,16 +1592,19 @@ def redact_text_pdf(
|
|
1591 |
new_custom_recogniser = custom_word_list_recogniser(custom_recogniser_word_list)
|
1592 |
nlp_analyser.registry.add_recognizer(new_custom_recogniser)
|
1593 |
|
1594 |
-
nlp_analyser.registry.remove_recognizer("
|
1595 |
new_custom_fuzzy_recogniser = CustomWordFuzzyRecognizer(supported_entities=["CUSTOM_FUZZY"], custom_list=custom_recogniser_word_list, spelling_mistakes_max=max_fuzzy_spelling_mistakes_num, search_whole_phrase=match_fuzzy_whole_phrase_bool)
|
1596 |
nlp_analyser.registry.add_recognizer(new_custom_fuzzy_recogniser)
|
1597 |
|
1598 |
# List all elements currently in the nlp_analyser registry
|
1599 |
#print("Current recognizers in nlp_analyser registry:")
|
1600 |
#for recognizer_name in nlp_analyser.registry.recognizers:
|
1601 |
-
|
|
|
|
|
|
|
1602 |
|
1603 |
-
#print("
|
1604 |
|
1605 |
tic = time.perf_counter()
|
1606 |
|
|
|
987 |
#print("new_custom_recogniser:", new_custom_recogniser)
|
988 |
nlp_analyser.registry.add_recognizer(new_custom_recogniser)
|
989 |
|
990 |
+
nlp_analyser.registry.remove_recognizer("CustomWordFuzzyRecognizer")
|
991 |
new_custom_fuzzy_recogniser = CustomWordFuzzyRecognizer(supported_entities=["CUSTOM_FUZZY"], custom_list=custom_recogniser_word_list, spelling_mistakes_max=max_fuzzy_spelling_mistakes_num, search_whole_phrase=match_fuzzy_whole_phrase_bool)
|
992 |
#print("new_custom_recogniser:", new_custom_recogniser)
|
993 |
nlp_analyser.registry.add_recognizer(new_custom_fuzzy_recogniser)
|
994 |
|
995 |
+
|
996 |
image_analyser = CustomImageAnalyzerEngine(nlp_analyser)
|
997 |
|
998 |
if pii_identification_method == "AWS Comprehend" and comprehend_client == "":
|
|
|
1592 |
new_custom_recogniser = custom_word_list_recogniser(custom_recogniser_word_list)
|
1593 |
nlp_analyser.registry.add_recognizer(new_custom_recogniser)
|
1594 |
|
1595 |
+
nlp_analyser.registry.remove_recognizer("CustomWordFuzzyRecognizer")
|
1596 |
new_custom_fuzzy_recogniser = CustomWordFuzzyRecognizer(supported_entities=["CUSTOM_FUZZY"], custom_list=custom_recogniser_word_list, spelling_mistakes_max=max_fuzzy_spelling_mistakes_num, search_whole_phrase=match_fuzzy_whole_phrase_bool)
|
1597 |
nlp_analyser.registry.add_recognizer(new_custom_fuzzy_recogniser)
|
1598 |
|
1599 |
# List all elements currently in the nlp_analyser registry
|
1600 |
#print("Current recognizers in nlp_analyser registry:")
|
1601 |
#for recognizer_name in nlp_analyser.registry.recognizers:
|
1602 |
+
#print(recognizer_name)
|
1603 |
+
#print(recognizer_name.name)
|
1604 |
+
|
1605 |
+
#print("Custom recogniser:", nlp_analyser.registry)
|
1606 |
|
1607 |
+
#print("custom_recogniser_word_list:", custom_recogniser_word_list)
|
1608 |
|
1609 |
tic = time.perf_counter()
|
1610 |
|