Commit
·
8183bc4
1
Parent(s):
1d772de
Now should correctly remove duplicate items from all_image_annotator
Browse files- tools/redaction_review.py +20 -15
tools/redaction_review.py
CHANGED
|
@@ -130,28 +130,33 @@ def update_annotator(image_annotator_object:AnnotatedImageData, page_num:int, re
|
|
| 130 |
if page_num_reported > page_max_reported:
|
| 131 |
page_num_reported = page_max_reported
|
| 132 |
|
| 133 |
-
|
| 134 |
|
| 135 |
# Remove duplicate elements that are blank
|
| 136 |
def remove_duplicate_images_with_blank_boxes(data: List[AnnotatedImageData]) -> List[AnnotatedImageData]:
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
for item in data:
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
|
| 151 |
image_annotator_object = remove_duplicate_images_with_blank_boxes(image_annotator_object)
|
| 152 |
|
| 153 |
-
|
| 154 |
-
|
| 155 |
|
| 156 |
out_image_annotator = image_annotator(
|
| 157 |
value = image_annotator_object[page_num_reported - 1],
|
|
|
|
| 130 |
if page_num_reported > page_max_reported:
|
| 131 |
page_num_reported = page_max_reported
|
| 132 |
|
| 133 |
+
from collections import defaultdict
|
| 134 |
|
| 135 |
# Remove duplicate elements that are blank
|
| 136 |
def remove_duplicate_images_with_blank_boxes(data: List[AnnotatedImageData]) -> List[AnnotatedImageData]:
|
| 137 |
+
# Group items by 'image'
|
| 138 |
+
image_groups = defaultdict(list)
|
|
|
|
| 139 |
for item in data:
|
| 140 |
+
image_groups[item['image']].append(item)
|
| 141 |
+
|
| 142 |
+
# Process each group to remove duplicates
|
| 143 |
+
result = []
|
| 144 |
+
for image, items in image_groups.items():
|
| 145 |
+
# Filter items with non-empty boxes
|
| 146 |
+
non_empty_boxes = [item for item in items if item['boxes']]
|
| 147 |
+
if non_empty_boxes:
|
| 148 |
+
# Add only the first one with non-empty boxes
|
| 149 |
+
result.append(non_empty_boxes[0])
|
| 150 |
+
else:
|
| 151 |
+
# If all boxes are empty, add the first one
|
| 152 |
+
result.append(items[0])
|
| 153 |
+
|
| 154 |
+
return result
|
| 155 |
|
| 156 |
image_annotator_object = remove_duplicate_images_with_blank_boxes(image_annotator_object)
|
| 157 |
|
| 158 |
+
print("image_annotator_object in update_annotator:", image_annotator_object)
|
| 159 |
+
print("image_annotator_object[page_num_reported - 1]:", image_annotator_object[page_num_reported - 1])
|
| 160 |
|
| 161 |
out_image_annotator = image_annotator(
|
| 162 |
value = image_annotator_object[page_num_reported - 1],
|