Commit
·
8183bc4
1
Parent(s):
1d772de
Now should correctly remove duplicate items from all_image_annotator
Browse files- tools/redaction_review.py +20 -15
tools/redaction_review.py
CHANGED
@@ -130,28 +130,33 @@ def update_annotator(image_annotator_object:AnnotatedImageData, page_num:int, re
|
|
130 |
if page_num_reported > page_max_reported:
|
131 |
page_num_reported = page_max_reported
|
132 |
|
133 |
-
|
134 |
|
135 |
# Remove duplicate elements that are blank
|
136 |
def remove_duplicate_images_with_blank_boxes(data: List[AnnotatedImageData]) -> List[AnnotatedImageData]:
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
for item in data:
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
|
151 |
image_annotator_object = remove_duplicate_images_with_blank_boxes(image_annotator_object)
|
152 |
|
153 |
-
|
154 |
-
|
155 |
|
156 |
out_image_annotator = image_annotator(
|
157 |
value = image_annotator_object[page_num_reported - 1],
|
|
|
130 |
if page_num_reported > page_max_reported:
|
131 |
page_num_reported = page_max_reported
|
132 |
|
133 |
+
from collections import defaultdict
|
134 |
|
135 |
# Remove duplicate elements that are blank
|
136 |
def remove_duplicate_images_with_blank_boxes(data: List[AnnotatedImageData]) -> List[AnnotatedImageData]:
|
137 |
+
# Group items by 'image'
|
138 |
+
image_groups = defaultdict(list)
|
|
|
139 |
for item in data:
|
140 |
+
image_groups[item['image']].append(item)
|
141 |
+
|
142 |
+
# Process each group to remove duplicates
|
143 |
+
result = []
|
144 |
+
for image, items in image_groups.items():
|
145 |
+
# Filter items with non-empty boxes
|
146 |
+
non_empty_boxes = [item for item in items if item['boxes']]
|
147 |
+
if non_empty_boxes:
|
148 |
+
# Add only the first one with non-empty boxes
|
149 |
+
result.append(non_empty_boxes[0])
|
150 |
+
else:
|
151 |
+
# If all boxes are empty, add the first one
|
152 |
+
result.append(items[0])
|
153 |
+
|
154 |
+
return result
|
155 |
|
156 |
image_annotator_object = remove_duplicate_images_with_blank_boxes(image_annotator_object)
|
157 |
|
158 |
+
print("image_annotator_object in update_annotator:", image_annotator_object)
|
159 |
+
print("image_annotator_object[page_num_reported - 1]:", image_annotator_object[page_num_reported - 1])
|
160 |
|
161 |
out_image_annotator = image_annotator(
|
162 |
value = image_annotator_object[page_num_reported - 1],
|