seanpedrickcase commited on
Commit
a9dcd2e
·
1 Parent(s): 0c2987b

Moved review components to give more space for page. Extended zoom limits. Existing redaction labels should now appear in new redaction box dropdown.

Browse files
Files changed (2) hide show
  1. app.py +10 -8
  2. tools/redaction_review.py +32 -48
app.py CHANGED
@@ -121,7 +121,7 @@ with app:
121
 
122
 
123
  ## Annotator zoom value
124
- annotator_zoom_number = gr.Number(label = "Current annotator zoom level", value=100, precision=0, visible=False)
125
  zoom_true_bool = gr.State(True)
126
  zoom_false_bool = gr.State(False)
127
 
@@ -203,7 +203,7 @@ with app:
203
 
204
  with gr.Accordion(label = "Review redaction file", open=True):
205
  output_review_files = gr.File(label="Review output files", file_count='multiple', height=file_input_height)
206
- upload_previous_review_file_btn = gr.Button("Review previously created redaction file (upload original PDF and ...review_file.csv)")
207
 
208
  with gr.Row():
209
  annotation_last_page_button = gr.Button("Previous page", scale = 3)
@@ -215,12 +215,10 @@ with app:
215
  annotate_zoom_out = gr.Button("Zoom out")
216
  with gr.Row():
217
  clear_all_redactions_on_page_btn = gr.Button("Clear all redactions on page", visible=False)
218
- annotation_button_apply = gr.Button("Apply revised redactions", variant="primary")
219
-
220
 
221
  with gr.Row():
222
 
223
- with gr.Column(scale=4):
224
 
225
  zoom_str = str(annotator_zoom_number) + '%'
226
 
@@ -242,9 +240,13 @@ with app:
242
  interactive=False
243
  )
244
 
245
- with gr.Column(scale=1):
246
- recogniser_entity_dropdown = gr.Dropdown(label="Redaction category", value="ALL", allow_custom_value=True)
247
- recogniser_entity_dataframe = gr.Dataframe(pd.DataFrame(data={"page":[], "label":[]}), col_count=2, type="pandas", label="Search results. Click to go to page")
 
 
 
 
248
 
249
  with gr.Row():
250
  annotation_last_page_button_bottom = gr.Button("Previous page", scale = 3)
 
121
 
122
 
123
  ## Annotator zoom value
124
+ annotator_zoom_number = gr.Number(label = "Current annotator zoom level", value=80, precision=0, visible=False)
125
  zoom_true_bool = gr.State(True)
126
  zoom_false_bool = gr.State(False)
127
 
 
203
 
204
  with gr.Accordion(label = "Review redaction file", open=True):
205
  output_review_files = gr.File(label="Review output files", file_count='multiple', height=file_input_height)
206
+ upload_previous_review_file_btn = gr.Button("Review previously created redaction file (upload original PDF and ...review_file.csv)", variant="primary")
207
 
208
  with gr.Row():
209
  annotation_last_page_button = gr.Button("Previous page", scale = 3)
 
215
  annotate_zoom_out = gr.Button("Zoom out")
216
  with gr.Row():
217
  clear_all_redactions_on_page_btn = gr.Button("Clear all redactions on page", visible=False)
 
 
218
 
219
  with gr.Row():
220
 
221
+ with gr.Column(scale=1):
222
 
223
  zoom_str = str(annotator_zoom_number) + '%'
224
 
 
240
  interactive=False
241
  )
242
 
243
+ with gr.Row():
244
+ annotation_button_apply = gr.Button("Apply revised redactions", variant="primary")
245
+
246
+ #with gr.Column(scale=1):
247
+ with gr.Row():
248
+ recogniser_entity_dropdown = gr.Dropdown(label="Redaction category", value="ALL", allow_custom_value=True)
249
+ recogniser_entity_dataframe = gr.Dataframe(pd.DataFrame(data={"page":[], "label":[]}), col_count=2, type="pandas", label="Search results. Click to go to page")
250
 
251
  with gr.Row():
252
  annotation_last_page_button_bottom = gr.Button("Previous page", scale = 3)
tools/redaction_review.py CHANGED
@@ -45,35 +45,11 @@ def update_zoom(current_zoom_level:int, annotate_current_page:int, decrease:bool
45
  if current_zoom_level >= 70:
46
  current_zoom_level -= 10
47
  else:
48
- if current_zoom_level < 100:
49
  current_zoom_level += 10
50
 
51
  return current_zoom_level, annotate_current_page
52
 
53
-
54
- # Remove duplicate elements that are blank
55
- # def remove_duplicate_images_with_blank_boxes(data: List[AnnotatedImageData]) -> List[AnnotatedImageData]:
56
- # # Group items by 'image'
57
- # image_groups = defaultdict(list)
58
- # for item in data:
59
- # image_groups[item['image']].append(item)
60
-
61
- # # Process each group to retain only the entry with non-empty boxes, if available
62
- # result = []
63
- # for image, items in image_groups.items():
64
- # # Filter items with non-empty boxes
65
- # non_empty_boxes = [item for item in items if item['boxes']]
66
- # if non_empty_boxes:
67
- # # Keep the first entry with non-empty boxes
68
- # result.append(non_empty_boxes[0])
69
- # else:
70
- # # If no non-empty boxes, keep the first item with empty boxes
71
- # result.append(items[0])
72
-
73
- # #print("result:", result)
74
-
75
- # return result
76
-
77
  def remove_duplicate_images_with_blank_boxes(data: List[dict]) -> List[dict]:
78
  '''
79
  Remove items from the annotator object where the same page exists twice.
@@ -97,35 +73,43 @@ def remove_duplicate_images_with_blank_boxes(data: List[dict]) -> List[dict]:
97
 
98
  return result
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  def update_annotator(image_annotator_object:AnnotatedImageData, page_num:int, recogniser_entities_drop=gr.Dropdown(value="ALL", allow_custom_value=True), recogniser_dataframe_gr=gr.Dataframe(pd.DataFrame(data={"page":[], "label":[]})), zoom:int=100):
101
  '''
102
  Update a gradio_image_annotation object with new annotation data
103
- '''
104
- recogniser_entities = []
105
- recogniser_dataframe = pd.DataFrame()
106
 
107
  if recogniser_dataframe_gr.empty:
108
- try:
109
- review_dataframe = convert_review_json_to_pandas_df(image_annotator_object)[["page", "label"]]
110
- #print("review_dataframe['label']", review_dataframe["label"])
111
- recogniser_entities = review_dataframe["label"].unique().tolist()
112
- recogniser_entities.append("ALL")
113
- recogniser_entities = sorted(recogniser_entities)
114
-
115
- #print("recogniser_entities:", recogniser_entities)
116
-
117
- recogniser_dataframe_out = gr.Dataframe(review_dataframe)
118
- recogniser_dataframe_gr = gr.Dataframe(review_dataframe)
119
- recogniser_entities_drop = gr.Dropdown(value=recogniser_entities[0], choices=recogniser_entities, allow_custom_value=True, interactive=True)
120
- except Exception as e:
121
- print("Could not extract recogniser information:", e)
122
- recogniser_dataframe_out = recogniser_dataframe_gr
123
-
124
  else:
125
  review_dataframe = update_entities_df(recogniser_entities_drop, recogniser_dataframe_gr)
126
  recogniser_dataframe_out = gr.Dataframe(review_dataframe)
 
 
127
 
128
  zoom_str = str(zoom) + '%'
 
129
 
130
  if not image_annotator_object:
131
  page_num_reported = 1
@@ -134,8 +118,8 @@ def update_annotator(image_annotator_object:AnnotatedImageData, page_num:int, re
134
  image_annotator_object[page_num_reported - 1],
135
  boxes_alpha=0.1,
136
  box_thickness=1,
137
- #label_list=["Redaction"],
138
- #label_colors=[(0, 0, 0)],
139
  show_label=False,
140
  height=zoom_str,
141
  width=zoom_str,
@@ -179,8 +163,8 @@ def update_annotator(image_annotator_object:AnnotatedImageData, page_num:int, re
179
  value = image_annotator_object[page_num_reported - 1],
180
  boxes_alpha=0.1,
181
  box_thickness=1,
182
- #label_list=["Redaction"],
183
- #label_colors=[(0, 0, 0)],
184
  show_label=False,
185
  height=zoom_str,
186
  width=zoom_str,
 
45
  if current_zoom_level >= 70:
46
  current_zoom_level -= 10
47
  else:
48
+ if current_zoom_level < 110:
49
  current_zoom_level += 10
50
 
51
  return current_zoom_level, annotate_current_page
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  def remove_duplicate_images_with_blank_boxes(data: List[dict]) -> List[dict]:
54
  '''
55
  Remove items from the annotator object where the same page exists twice.
 
73
 
74
  return result
75
 
76
+ def get_recogniser_dataframe_out(image_annotator_object, recogniser_dataframe_gr):
77
+ try:
78
+ review_dataframe = convert_review_json_to_pandas_df(image_annotator_object)[["page", "label"]]
79
+ recogniser_entities = review_dataframe["label"].unique().tolist()
80
+ recogniser_entities.append("ALL")
81
+ recogniser_entities = sorted(recogniser_entities)
82
+
83
+ recogniser_dataframe_out = gr.Dataframe(review_dataframe)
84
+ recogniser_entities_drop = gr.Dropdown(value=recogniser_entities[0], choices=recogniser_entities, allow_custom_value=True, interactive=True)
85
+
86
+ except Exception as e:
87
+ print("Could not extract recogniser information:", e)
88
+ recogniser_dataframe_out = recogniser_dataframe_gr
89
+ recogniser_entities_drop = gr.Dropdown(value="", choices=[""], allow_custom_value=True, interactive=True)
90
+ recogniser_entities = ["Redaction"]
91
+
92
+ return recogniser_dataframe_out, recogniser_dataframe_out, recogniser_entities_drop, recogniser_entities
93
+
94
  def update_annotator(image_annotator_object:AnnotatedImageData, page_num:int, recogniser_entities_drop=gr.Dropdown(value="ALL", allow_custom_value=True), recogniser_dataframe_gr=gr.Dataframe(pd.DataFrame(data={"page":[], "label":[]})), zoom:int=100):
95
  '''
96
  Update a gradio_image_annotation object with new annotation data
97
+ '''
98
+ recogniser_entities_list = ["Redaction"]
99
+ recogniser_dataframe_out = pd.DataFrame()
100
 
101
  if recogniser_dataframe_gr.empty:
102
+ recogniser_dataframe_gr, recogniser_dataframe_out, recogniser_entities_drop, recogniser_entities_list = get_recogniser_dataframe_out(image_annotator_object, recogniser_dataframe_gr)
103
+ elif recogniser_dataframe_gr.iloc[0,0] == "":
104
+ recogniser_dataframe_gr, recogniser_dataframe_out, recogniser_entities_drop, recogniser_entities_list = get_recogniser_dataframe_out(image_annotator_object, recogniser_dataframe_gr)
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  else:
106
  review_dataframe = update_entities_df(recogniser_entities_drop, recogniser_dataframe_gr)
107
  recogniser_dataframe_out = gr.Dataframe(review_dataframe)
108
+ recogniser_entities_list = review_dataframe["label"].unique().tolist()
109
+ recogniser_entities_list = sorted(recogniser_entities_list)
110
 
111
  zoom_str = str(zoom) + '%'
112
+ recogniser_colour_list = [(0, 0, 0) for _ in range(len(recogniser_entities_list))]
113
 
114
  if not image_annotator_object:
115
  page_num_reported = 1
 
118
  image_annotator_object[page_num_reported - 1],
119
  boxes_alpha=0.1,
120
  box_thickness=1,
121
+ label_list=recogniser_entities_list,
122
+ label_colors=recogniser_colour_list,
123
  show_label=False,
124
  height=zoom_str,
125
  width=zoom_str,
 
163
  value = image_annotator_object[page_num_reported - 1],
164
  boxes_alpha=0.1,
165
  box_thickness=1,
166
+ label_list=recogniser_entities_list,
167
+ label_colors=recogniser_colour_list,
168
  show_label=False,
169
  height=zoom_str,
170
  width=zoom_str,