seanpedrickcase commited on
Commit
a63133d
·
1 Parent(s): 43287c3

Added some commentary to file conversion and redaction

Browse files
tools/file_conversion.py CHANGED
@@ -38,12 +38,15 @@ def convert_pdf_to_images(pdf_path, progress=Progress(track_tqdm=True)):
38
 
39
  # Get the number of pages in the PDF
40
  page_count = pdfinfo_from_path(pdf_path)['Pages']
 
41
 
42
  images = []
43
 
44
  # Open the PDF file
45
  for page_num in progress.tqdm(range(0,page_count), total=page_count, unit="pages", desc="Converting pages"):
46
 
 
 
47
  # Convert one page to image
48
  image = convert_from_path(pdf_path, first_page=page_num+1, last_page=page_num+1)
49
 
 
38
 
39
  # Get the number of pages in the PDF
40
  page_count = pdfinfo_from_path(pdf_path)['Pages']
41
+ print("Number of pages in PDF: ", str(page_count))
42
 
43
  images = []
44
 
45
  # Open the PDF file
46
  for page_num in progress.tqdm(range(0,page_count), total=page_count, unit="pages", desc="Converting pages"):
47
 
48
+ print("Current page: ", str(page_num))
49
+
50
  # Convert one page to image
51
  image = convert_from_path(pdf_path, first_page=page_num+1, last_page=page_num+1)
52
 
tools/file_redaction.py CHANGED
@@ -15,7 +15,9 @@ def redact_image_pdf(file_path:str, language:str, chosen_redact_entities:List[st
15
  take an path for an image of a document, then run this image through the Presidio ImageAnalyzer to get a redacted page back
16
  '''
17
 
18
- progress(0, desc="Converting pages to image")
 
 
19
 
20
  image_paths = process_file(file_path)
21
 
@@ -25,10 +27,14 @@ def redact_image_pdf(file_path:str, language:str, chosen_redact_entities:List[st
25
  images = []
26
  number_of_pages = len(image_paths)
27
 
28
- progress(0.1, desc="Redacting pages")
 
 
29
 
30
  for i in progress.tqdm(range(0,number_of_pages), total=number_of_pages, unit="pages", desc="Redacting pages"):
31
 
 
 
32
  # Get the image to redact using PIL lib (pillow)
33
  image = image_paths[i] #Image.open(image_paths[i])
34
 
 
15
  take an path for an image of a document, then run this image through the Presidio ImageAnalyzer to get a redacted page back
16
  '''
17
 
18
+ out_message = "Converting pages to image"
19
+ print(out_message)
20
+ progress(0, desc=out_message)
21
 
22
  image_paths = process_file(file_path)
23
 
 
27
  images = []
28
  number_of_pages = len(image_paths)
29
 
30
+ out_message = "Redacting pages"
31
+ print(out_message)
32
+ progress(0.1, desc=out_message)
33
 
34
  for i in progress.tqdm(range(0,number_of_pages), total=number_of_pages, unit="pages", desc="Redacting pages"):
35
 
36
+ print("Redacting page ", str(i + 1))
37
+
38
  # Get the image to redact using PIL lib (pillow)
39
  image = image_paths[i] #Image.open(image_paths[i])
40