Commit
·
15026f7
1
Parent(s):
d8c98c8
Adjusted outputs correctly for situations where the pdf mediabox size is different from the visible page size
Browse files- tools/file_redaction.py +38 -19
tools/file_redaction.py
CHANGED
@@ -225,30 +225,47 @@ def choose_and_run_redactor(file_paths:List[str], image_paths:List[str], languag
|
|
225 |
|
226 |
return out_message_out, out_file_paths, out_file_paths, latest_file_completed, log_files_output_paths, log_files_output_paths, estimated_time_taken_state, all_request_metadata_str
|
227 |
|
228 |
-
def redact_page_with_pymupdf(doc, annotations_on_page, page_no, scale=(1,1)):
|
229 |
|
230 |
page = doc.load_page(page_no)
|
231 |
-
|
|
|
|
|
|
|
232 |
|
233 |
#print("page_rect_height:", page.rect.height)
|
234 |
#print("page mediabox size:", page.mediabox[3] - page.mediabox[1])
|
235 |
|
236 |
for annot in annotations_on_page:
|
237 |
if isinstance(annot, CustomImageRecognizerResult):
|
238 |
-
|
239 |
-
scale_height = scale[1]
|
240 |
|
241 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
242 |
|
243 |
# Calculate scaled coordinates
|
244 |
-
x1 = annot.left * scale_width
|
245 |
-
new_y1 = (annot.top * scale_height) # Flip Y0 (since it starts from bottom)
|
246 |
-
x2 = (annot.left + annot.width) * scale_width # Calculate x1
|
247 |
-
new_y2 = ((annot.top + annot.height) * scale_height) # Calculate y1 correctly
|
248 |
|
249 |
rect = Rect(x1, new_y1, x2, new_y2) # Create the PyMuPDF Rect (y1, y0 are flipped)
|
250 |
|
251 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
#print("In the pikepdf conversion function")
|
253 |
# Extract the /Rect field
|
254 |
rect_field = annot["/Rect"]
|
@@ -258,8 +275,10 @@ def redact_page_with_pymupdf(doc, annotations_on_page, page_no, scale=(1,1)):
|
|
258 |
|
259 |
# Convert the Y-coordinates (flip using the page height)
|
260 |
x1, y1, x2, y2 = rect_coordinates
|
261 |
-
|
262 |
-
|
|
|
|
|
263 |
|
264 |
rect = Rect(x1, new_y1, x2, new_y2)
|
265 |
|
@@ -482,18 +501,18 @@ def redact_image_pdf(file_path:str, image_paths:List[str], language:str, chosen_
|
|
482 |
|
483 |
|
484 |
# Get the dimensions of the page in points with pymupdf to get relative scale
|
485 |
-
page = doc.load_page(i)
|
486 |
-
mu_page_rect = page.rect
|
487 |
#mu_page_width = mu_page_rect.width
|
488 |
-
mu_page_height = max(mu_page_rect.height, page.mediabox[3] - page.mediabox[1])
|
489 |
-
mu_page_width = max(mu_page_rect.width, page.mediabox[2] - page.mediabox[0])
|
490 |
#mu_page_height = mu_page_rect.height
|
491 |
|
492 |
# Calculate scaling factors between PIL image and pymupdf
|
493 |
-
scale_width = mu_page_width / page_width
|
494 |
-
scale_height = mu_page_height / page_height
|
495 |
|
496 |
-
scale = (scale_width, scale_height)
|
497 |
|
498 |
|
499 |
# Possibility to use different languages
|
@@ -583,7 +602,7 @@ def redact_image_pdf(file_path:str, image_paths:List[str], language:str, chosen_
|
|
583 |
|
584 |
## Apply annotations with pymupdf
|
585 |
else:
|
586 |
-
doc = redact_page_with_pymupdf(doc, merged_redaction_bboxes, i, scale)
|
587 |
|
588 |
#doc.save("image_redact.pdf")
|
589 |
|
|
|
225 |
|
226 |
return out_message_out, out_file_paths, out_file_paths, latest_file_completed, log_files_output_paths, log_files_output_paths, estimated_time_taken_state, all_request_metadata_str
|
227 |
|
228 |
+
def redact_page_with_pymupdf(doc, annotations_on_page, page_no, image = None):#, scale=(1,1)):
|
229 |
|
230 |
page = doc.load_page(page_no)
|
231 |
+
mediabox_height = page.mediabox[3] - page.mediabox[1]
|
232 |
+
mediabox_width = page.mediabox[2] - page.mediabox[0]
|
233 |
+
rect_height = page.rect.height
|
234 |
+
rect_width = page.rect.width
|
235 |
|
236 |
#print("page_rect_height:", page.rect.height)
|
237 |
#print("page mediabox size:", page.mediabox[3] - page.mediabox[1])
|
238 |
|
239 |
for annot in annotations_on_page:
|
240 |
if isinstance(annot, CustomImageRecognizerResult):
|
241 |
+
image_page_width, image_page_height = image.size
|
|
|
242 |
|
243 |
+
# Calculate scaling factors between PIL image and pymupdf
|
244 |
+
scale_width = rect_width / image_page_width
|
245 |
+
scale_height = rect_height / image_page_height
|
246 |
+
|
247 |
+
#scale_width = scale[0]
|
248 |
+
#scale_height = scale[1]
|
249 |
+
|
250 |
+
#print("scale:", scale)
|
251 |
|
252 |
# Calculate scaled coordinates
|
253 |
+
x1 = (annot.left * scale_width)# + page_x_adjust
|
254 |
+
new_y1 = (annot.top * scale_height)# - page_y_adjust # Flip Y0 (since it starts from bottom)
|
255 |
+
x2 = ((annot.left + annot.width) * scale_width)# + page_x_adjust # Calculate x1
|
256 |
+
new_y2 = ((annot.top + annot.height) * scale_height)# - page_y_adjust # Calculate y1 correctly
|
257 |
|
258 |
rect = Rect(x1, new_y1, x2, new_y2) # Create the PyMuPDF Rect (y1, y0 are flipped)
|
259 |
|
260 |
else:
|
261 |
+
# Calculate scaling factors
|
262 |
+
scale_height = rect_height / mediabox_height if mediabox_height else 1
|
263 |
+
scale_width = rect_width / mediabox_width if mediabox_width else 1
|
264 |
+
|
265 |
+
# Adjust coordinates based on scaling factors
|
266 |
+
page_x_adjust = (rect_width - mediabox_width) / 2 # Center adjustment
|
267 |
+
page_y_adjust = (rect_height - mediabox_height) / 2 # Center adjustment
|
268 |
+
|
269 |
#print("In the pikepdf conversion function")
|
270 |
# Extract the /Rect field
|
271 |
rect_field = annot["/Rect"]
|
|
|
275 |
|
276 |
# Convert the Y-coordinates (flip using the page height)
|
277 |
x1, y1, x2, y2 = rect_coordinates
|
278 |
+
x1 = x1 + page_x_adjust
|
279 |
+
new_y1 = (rect_height - y2) - page_y_adjust
|
280 |
+
x2 = x2 + page_x_adjust
|
281 |
+
new_y2 = (rect_height - y1) - page_y_adjust
|
282 |
|
283 |
rect = Rect(x1, new_y1, x2, new_y2)
|
284 |
|
|
|
501 |
|
502 |
|
503 |
# Get the dimensions of the page in points with pymupdf to get relative scale
|
504 |
+
#page = doc.load_page(i)
|
505 |
+
#mu_page_rect = page.rect
|
506 |
#mu_page_width = mu_page_rect.width
|
507 |
+
#mu_page_height = max(mu_page_rect.height, page.mediabox[3] - page.mediabox[1])
|
508 |
+
#mu_page_width = max(mu_page_rect.width, page.mediabox[2] - page.mediabox[0])
|
509 |
#mu_page_height = mu_page_rect.height
|
510 |
|
511 |
# Calculate scaling factors between PIL image and pymupdf
|
512 |
+
#scale_width = mu_page_width / page_width
|
513 |
+
#scale_height = mu_page_height / page_height
|
514 |
|
515 |
+
#scale = (scale_width, scale_height)
|
516 |
|
517 |
|
518 |
# Possibility to use different languages
|
|
|
602 |
|
603 |
## Apply annotations with pymupdf
|
604 |
else:
|
605 |
+
doc = redact_page_with_pymupdf(doc, merged_redaction_bboxes, i, image)#, scale)
|
606 |
|
607 |
#doc.save("image_redact.pdf")
|
608 |
|