seanpedrickcase commited on
Commit
8183bc4
·
1 Parent(s): 1d772de

Now should correctly remove duplicate items from all_image_annotator

Browse files
Files changed (1) hide show
  1. tools/redaction_review.py +20 -15
tools/redaction_review.py CHANGED
@@ -130,28 +130,33 @@ def update_annotator(image_annotator_object:AnnotatedImageData, page_num:int, re
130
  if page_num_reported > page_max_reported:
131
  page_num_reported = page_max_reported
132
 
133
-
134
 
135
  # Remove duplicate elements that are blank
136
  def remove_duplicate_images_with_blank_boxes(data: List[AnnotatedImageData]) -> List[AnnotatedImageData]:
137
- seen_images = set()
138
- filtered_data = []
139
-
140
  for item in data:
141
- # Check if 'image' is unique
142
- if item['image'] not in seen_images:
143
- filtered_data.append(item)
144
- seen_images.add(item['image'])
145
- # If 'boxes' is empty but 'image' is unique, keep the entry
146
- elif item['boxes']:
147
- filtered_data.append(item)
148
-
149
- return filtered_data
 
 
 
 
 
 
150
 
151
  image_annotator_object = remove_duplicate_images_with_blank_boxes(image_annotator_object)
152
 
153
- #print("image_annotator_object in update_annotator:", image_annotator_object)
154
- #print("image_annotator_object[page_num_reported - 1]:", image_annotator_object[page_num_reported - 1])
155
 
156
  out_image_annotator = image_annotator(
157
  value = image_annotator_object[page_num_reported - 1],
 
130
  if page_num_reported > page_max_reported:
131
  page_num_reported = page_max_reported
132
 
133
+ from collections import defaultdict
134
 
135
  # Remove duplicate elements that are blank
136
  def remove_duplicate_images_with_blank_boxes(data: List[AnnotatedImageData]) -> List[AnnotatedImageData]:
137
+ # Group items by 'image'
138
+ image_groups = defaultdict(list)
 
139
  for item in data:
140
+ image_groups[item['image']].append(item)
141
+
142
+ # Process each group to remove duplicates
143
+ result = []
144
+ for image, items in image_groups.items():
145
+ # Filter items with non-empty boxes
146
+ non_empty_boxes = [item for item in items if item['boxes']]
147
+ if non_empty_boxes:
148
+ # Add only the first one with non-empty boxes
149
+ result.append(non_empty_boxes[0])
150
+ else:
151
+ # If all boxes are empty, add the first one
152
+ result.append(items[0])
153
+
154
+ return result
155
 
156
  image_annotator_object = remove_duplicate_images_with_blank_boxes(image_annotator_object)
157
 
158
+ print("image_annotator_object in update_annotator:", image_annotator_object)
159
+ print("image_annotator_object[page_num_reported - 1]:", image_annotator_object[page_num_reported - 1])
160
 
161
  out_image_annotator = image_annotator(
162
  value = image_annotator_object[page_num_reported - 1],