Spaces:
Sleeping
Sleeping
Commit
·
a63133d
1
Parent(s):
43287c3
Added some commentary to file conversion and redaction
Browse files- tools/file_conversion.py +3 -0
- tools/file_redaction.py +8 -2
tools/file_conversion.py
CHANGED
@@ -38,12 +38,15 @@ def convert_pdf_to_images(pdf_path, progress=Progress(track_tqdm=True)):
|
|
38 |
|
39 |
# Get the number of pages in the PDF
|
40 |
page_count = pdfinfo_from_path(pdf_path)['Pages']
|
|
|
41 |
|
42 |
images = []
|
43 |
|
44 |
# Open the PDF file
|
45 |
for page_num in progress.tqdm(range(0,page_count), total=page_count, unit="pages", desc="Converting pages"):
|
46 |
|
|
|
|
|
47 |
# Convert one page to image
|
48 |
image = convert_from_path(pdf_path, first_page=page_num+1, last_page=page_num+1)
|
49 |
|
|
|
38 |
|
39 |
# Get the number of pages in the PDF
|
40 |
page_count = pdfinfo_from_path(pdf_path)['Pages']
|
41 |
+
print("Number of pages in PDF: ", str(page_count))
|
42 |
|
43 |
images = []
|
44 |
|
45 |
# Open the PDF file
|
46 |
for page_num in progress.tqdm(range(0,page_count), total=page_count, unit="pages", desc="Converting pages"):
|
47 |
|
48 |
+
print("Current page: ", str(page_num))
|
49 |
+
|
50 |
# Convert one page to image
|
51 |
image = convert_from_path(pdf_path, first_page=page_num+1, last_page=page_num+1)
|
52 |
|
tools/file_redaction.py
CHANGED
@@ -15,7 +15,9 @@ def redact_image_pdf(file_path:str, language:str, chosen_redact_entities:List[st
|
|
15 |
take an path for an image of a document, then run this image through the Presidio ImageAnalyzer to get a redacted page back
|
16 |
'''
|
17 |
|
18 |
-
|
|
|
|
|
19 |
|
20 |
image_paths = process_file(file_path)
|
21 |
|
@@ -25,10 +27,14 @@ def redact_image_pdf(file_path:str, language:str, chosen_redact_entities:List[st
|
|
25 |
images = []
|
26 |
number_of_pages = len(image_paths)
|
27 |
|
28 |
-
|
|
|
|
|
29 |
|
30 |
for i in progress.tqdm(range(0,number_of_pages), total=number_of_pages, unit="pages", desc="Redacting pages"):
|
31 |
|
|
|
|
|
32 |
# Get the image to redact using PIL lib (pillow)
|
33 |
image = image_paths[i] #Image.open(image_paths[i])
|
34 |
|
|
|
15 |
take an path for an image of a document, then run this image through the Presidio ImageAnalyzer to get a redacted page back
|
16 |
'''
|
17 |
|
18 |
+
out_message = "Converting pages to image"
|
19 |
+
print(out_message)
|
20 |
+
progress(0, desc=out_message)
|
21 |
|
22 |
image_paths = process_file(file_path)
|
23 |
|
|
|
27 |
images = []
|
28 |
number_of_pages = len(image_paths)
|
29 |
|
30 |
+
out_message = "Redacting pages"
|
31 |
+
print(out_message)
|
32 |
+
progress(0.1, desc=out_message)
|
33 |
|
34 |
for i in progress.tqdm(range(0,number_of_pages), total=number_of_pages, unit="pages", desc="Redacting pages"):
|
35 |
|
36 |
+
print("Redacting page ", str(i + 1))
|
37 |
+
|
38 |
# Get the image to redact using PIL lib (pillow)
|
39 |
image = image_paths[i] #Image.open(image_paths[i])
|
40 |
|