seanpedrickcase commited on
Commit
82b9d9d
·
1 Parent(s): 7917a26

App now correctly updates custom fuzzy recognisers

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -1
  2. tools/file_redaction.py +8 -4
requirements.txt CHANGED
@@ -21,7 +21,7 @@ python-levenshtein==0.26.1
21
  spaczz==0.6.1
22
  gradio_image_annotation==0.2.5
23
  # The following version includes rotation and image zoom options - not currently working so reverting to original until fixed
24
- #git+https://github.com/seanpedrick-case/gradio_image_annotator
25
  rapidfuzz==3.12.1
26
  numpy==1.26.4
27
  awslambdaric==3.0.0
 
21
  spaczz==0.6.1
22
  gradio_image_annotation==0.2.5
23
  # The following version includes rotation and image zoom options - not currently working so reverting to original until fixed
24
+ #https://github.com/seanpedrick-case/gradio_image_annotator/releases/download/v0.3.0/gradio_image_annotation-0.3.0-py3-none-any.whl
25
  rapidfuzz==3.12.1
26
  numpy==1.26.4
27
  awslambdaric==3.0.0
tools/file_redaction.py CHANGED
@@ -987,11 +987,12 @@ def redact_image_pdf(file_path:str,
987
  #print("new_custom_recogniser:", new_custom_recogniser)
988
  nlp_analyser.registry.add_recognizer(new_custom_recogniser)
989
 
990
- nlp_analyser.registry.remove_recognizer("CUSTOM_FUZZY")
991
  new_custom_fuzzy_recogniser = CustomWordFuzzyRecognizer(supported_entities=["CUSTOM_FUZZY"], custom_list=custom_recogniser_word_list, spelling_mistakes_max=max_fuzzy_spelling_mistakes_num, search_whole_phrase=match_fuzzy_whole_phrase_bool)
992
  #print("new_custom_recogniser:", new_custom_recogniser)
993
  nlp_analyser.registry.add_recognizer(new_custom_fuzzy_recogniser)
994
 
 
995
  image_analyser = CustomImageAnalyzerEngine(nlp_analyser)
996
 
997
  if pii_identification_method == "AWS Comprehend" and comprehend_client == "":
@@ -1591,16 +1592,19 @@ def redact_text_pdf(
1591
  new_custom_recogniser = custom_word_list_recogniser(custom_recogniser_word_list)
1592
  nlp_analyser.registry.add_recognizer(new_custom_recogniser)
1593
 
1594
- nlp_analyser.registry.remove_recognizer("CUSTOM_FUZZY")
1595
  new_custom_fuzzy_recogniser = CustomWordFuzzyRecognizer(supported_entities=["CUSTOM_FUZZY"], custom_list=custom_recogniser_word_list, spelling_mistakes_max=max_fuzzy_spelling_mistakes_num, search_whole_phrase=match_fuzzy_whole_phrase_bool)
1596
  nlp_analyser.registry.add_recognizer(new_custom_fuzzy_recogniser)
1597
 
1598
  # List all elements currently in the nlp_analyser registry
1599
  #print("Current recognizers in nlp_analyser registry:")
1600
  #for recognizer_name in nlp_analyser.registry.recognizers:
1601
- # print(recognizer_name)
 
 
 
1602
 
1603
- #print("Custom recogniser:", nlp_analyser.registry.)
1604
 
1605
  tic = time.perf_counter()
1606
 
 
987
  #print("new_custom_recogniser:", new_custom_recogniser)
988
  nlp_analyser.registry.add_recognizer(new_custom_recogniser)
989
 
990
+ nlp_analyser.registry.remove_recognizer("CustomWordFuzzyRecognizer")
991
  new_custom_fuzzy_recogniser = CustomWordFuzzyRecognizer(supported_entities=["CUSTOM_FUZZY"], custom_list=custom_recogniser_word_list, spelling_mistakes_max=max_fuzzy_spelling_mistakes_num, search_whole_phrase=match_fuzzy_whole_phrase_bool)
992
  #print("new_custom_recogniser:", new_custom_recogniser)
993
  nlp_analyser.registry.add_recognizer(new_custom_fuzzy_recogniser)
994
 
995
+
996
  image_analyser = CustomImageAnalyzerEngine(nlp_analyser)
997
 
998
  if pii_identification_method == "AWS Comprehend" and comprehend_client == "":
 
1592
  new_custom_recogniser = custom_word_list_recogniser(custom_recogniser_word_list)
1593
  nlp_analyser.registry.add_recognizer(new_custom_recogniser)
1594
 
1595
+ nlp_analyser.registry.remove_recognizer("CustomWordFuzzyRecognizer")
1596
  new_custom_fuzzy_recogniser = CustomWordFuzzyRecognizer(supported_entities=["CUSTOM_FUZZY"], custom_list=custom_recogniser_word_list, spelling_mistakes_max=max_fuzzy_spelling_mistakes_num, search_whole_phrase=match_fuzzy_whole_phrase_bool)
1597
  nlp_analyser.registry.add_recognizer(new_custom_fuzzy_recogniser)
1598
 
1599
  # List all elements currently in the nlp_analyser registry
1600
  #print("Current recognizers in nlp_analyser registry:")
1601
  #for recognizer_name in nlp_analyser.registry.recognizers:
1602
+ #print(recognizer_name)
1603
+ #print(recognizer_name.name)
1604
+
1605
+ #print("Custom recogniser:", nlp_analyser.registry)
1606
 
1607
+ #print("custom_recogniser_word_list:", custom_recogniser_word_list)
1608
 
1609
  tic = time.perf_counter()
1610