rmm commited on
Commit
6a02bc4
·
1 Parent(s): 3eaf0a5

feat: separated file input and metadata input functions

Browse files

- can't generate widgets within callbacks, they are not stable
- flow instead is:
1. normal flow: add file_uploader with callback
2. buffer files in the callback (st.session_state)
3. normal flow: add UI elements to get metadata, for each file in
buffer

Files changed (2) hide show
  1. src/input/input_handling.py +183 -24
  2. src/main.py +28 -1
src/input/input_handling.py CHANGED
@@ -26,7 +26,7 @@ allowed_image_types = ['jpg', 'jpeg', 'png', 'webp']
26
  # an arbitrary set of defaults so testing is less painful...
27
  # ideally we add in some randomization to the defaults
28
  spoof_metadata = {
29
- "latitude": 23.5,
30
  "longitude": 44,
31
  "author_email": "[email protected]",
32
  "date": None,
@@ -90,13 +90,20 @@ def check_inputs_are_set(empty_ok:bool=False, debug:bool=False) -> bool:
90
  return all([v is not None for v in vals])
91
 
92
 
93
- def process_one_file(file:UploadedFile) -> Tuple[np.ndarray, str, str, InputObservation]:
94
  # do all the non-UI calcs
95
  # add the UI elements
96
  # and in-line, do processing/validation of the inputs
97
  # - how to deal with the gathered data? a) push into session state, b) return all the elements needed?
98
 
99
- viewcontainer = st.sidebar
 
 
 
 
 
 
 
100
 
101
  # do all the non-UI calcs first
102
  ## get the bytes first, then convert into 1) image, 2) md5
@@ -118,7 +125,7 @@ def process_one_file(file:UploadedFile) -> Tuple[np.ndarray, str, str, InputObse
118
  # 3. Latitude Entry Box
119
  latitude = viewcontainer.text_input(
120
  "Latitude for " + filename,
121
- #spoof_metadata.get('latitude', ""),
122
  key=f"input_latitude_{ukey}")
123
  if latitude and not is_valid_number(latitude):
124
  viewcontainer.error("Please enter a valid latitude (numerical only).")
@@ -142,8 +149,8 @@ def process_one_file(file:UploadedFile) -> Tuple[np.ndarray, str, str, InputObse
142
  date_value = datetime.datetime.now().date()
143
 
144
  ## if not, give user the option to enter manually
145
- date_option = st.sidebar.date_input("Date for "+filename, value=date_value, key=f"input_date_{ukey}")
146
- time_option = st.sidebar.time_input("Time for "+filename, time_value, key=f"input_time_{ukey}")
147
 
148
  observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
149
  author_email=author_email, date=image_datetime, time=None,
@@ -161,11 +168,50 @@ def process_one_file(file:UploadedFile) -> Tuple[np.ndarray, str, str, InputObse
161
  return the_data
162
 
163
 
 
 
 
 
 
 
 
 
 
 
164
 
 
 
165
 
166
- #
 
 
 
 
 
 
 
 
 
 
 
167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
 
 
169
 
170
 
171
  def process_files():
@@ -184,8 +230,9 @@ def process_files():
184
  image_hashes = []
185
  filenames = []
186
 
187
- for file in uploaded_files:
188
- (image, image_hash, filename, observation) = process_one_file(file)
 
189
  # big old debug because of pain.
190
 
191
  filenames.append(filename)
@@ -201,9 +248,91 @@ def process_files():
201
  st.session_state.image_filenames = filenames
202
 
203
 
 
 
204
 
 
 
 
 
 
 
205
 
206
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
 
209
  def _setup_oneoff_inputs() -> None:
@@ -211,22 +340,47 @@ def _setup_oneoff_inputs() -> None:
211
  Add the UI input elements for which we have one each
212
 
213
  '''
214
- viewcontainer = st.sidebar
215
- viewcontainer.title("Input image and data")
216
-
217
- # 1. Input the author email
218
- author_email = viewcontainer.text_input("Author Email", spoof_metadata.get('author_email', ""),
219
- key="input_author_email")
220
- if author_email and not is_valid_email(author_email):
221
- viewcontainer.error("Please enter a valid email address.")
222
-
223
- # 2. Image Selector
224
- #uploaded_files = viewcontainer.file_uploader("Upload an image", type=allowed_image_types, accept_multiple_files=True)
 
 
 
 
 
 
225
 
226
- st.file_uploader("Upload one or more images", type=["png", 'jpg', 'jpeg', 'webp'],
227
- accept_multiple_files=True,
228
- key="file_uploader_data",
229
- on_change=process_files)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
 
232
 
@@ -239,6 +393,11 @@ def setup_input(
239
  '''
240
  _setup_oneoff_inputs()
241
  # amazingly we just have to add the uploader and its callback, and the rest is dynamic.
 
 
 
 
 
242
 
243
 
244
  def setup_input_monolithic(
 
26
  # an arbitrary set of defaults so testing is less painful...
27
  # ideally we add in some randomization to the defaults
28
  spoof_metadata = {
29
+ "latitude": 0.5,
30
  "longitude": 44,
31
  "author_email": "[email protected]",
32
  "date": None,
 
90
  return all([v is not None for v in vals])
91
 
92
 
93
+ def process_one_file(file:UploadedFile, ix:int=0) -> Tuple[np.ndarray, str, str, InputObservation]:
94
  # do all the non-UI calcs
95
  # add the UI elements
96
  # and in-line, do processing/validation of the inputs
97
  # - how to deal with the gathered data? a) push into session state, b) return all the elements needed?
98
 
99
+ #viewcontainer = st.sidebarif st.session_state.container_per_file_input_elems is None:
100
+ if st.session_state.container_metadata_inputs is not None:
101
+ viewcontainer = st.session_state.container_metadata_inputs
102
+ else:
103
+ viewcontainer = st.sidebar
104
+ msg = f"[W] `container_metadata_inputs` is None, using sidebar"
105
+ m_logger.warning(msg) ; print(msg)
106
+
107
 
108
  # do all the non-UI calcs first
109
  ## get the bytes first, then convert into 1) image, 2) md5
 
125
  # 3. Latitude Entry Box
126
  latitude = viewcontainer.text_input(
127
  "Latitude for " + filename,
128
+ spoof_metadata.get('latitude', 0) + ix,
129
  key=f"input_latitude_{ukey}")
130
  if latitude and not is_valid_number(latitude):
131
  viewcontainer.error("Please enter a valid latitude (numerical only).")
 
149
  date_value = datetime.datetime.now().date()
150
 
151
  ## if not, give user the option to enter manually
152
+ date_option = viewcontainer.date_input("Date for "+filename, value=date_value, key=f"input_date_{ukey}")
153
+ time_option = viewcontainer.time_input("Time for "+filename, time_value, key=f"input_time_{ukey}")
154
 
155
  observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
156
  author_email=author_email, date=image_datetime, time=None,
 
168
  return the_data
169
 
170
 
171
+ def buffer_files():
172
+ # buffer info from the file_uploader that doesn't require further user input
173
+ # - the image, the hash, the filename
174
+ # a separate function takes care of per-file user inputs for metadata
175
+ # - this is necessary because dynamically producing more widgets should be
176
+ # avoided inside callbacks (tl;dr: they dissapear)
177
+
178
+ # - note that the UploadedFile objects have file_ids, which are unique to each file
179
+ # - these file_ids are not persistent between sessions, seem to just be random identifiers.
180
+
181
 
182
+ # get files from state
183
+ uploaded_files = st.session_state.file_uploader_data
184
 
185
+ filenames = []
186
+ images = {}
187
+ image_hashes = []
188
+
189
+ for ix, file in enumerate(uploaded_files):
190
+ filename:str = file.name
191
+ print(f"[D] processing {ix}th file {filename}. {file.file_id} {file.type} {file.size}")
192
+ # image to np and hash both require reading the file so do together
193
+ image, image_hash = load_file_and_hash(file)
194
+
195
+ filenames.append(filename)
196
+ image_hashes.append(image_hash)
197
 
198
+ images[image_hash] = image
199
+
200
+ st.session_state.images = images
201
+ st.session_state.files = uploaded_files
202
+ st.session_state.image_hashes = image_hashes
203
+ st.session_state.image_filenames = filenames
204
+
205
+
206
+ def load_file_and_hash(file:UploadedFile) -> Tuple[np.ndarray, str]:
207
+ # two operations that require reading the file done together for efficiency
208
+ # load the file, compute the hash, return the image and hash
209
+ _bytes = file.read()
210
+ image_hash = hashlib.md5(_bytes).hexdigest()
211
+ image: np.ndarray = cv2.imdecode(np.asarray(bytearray(_bytes), dtype=np.uint8), 1)
212
 
213
+ return (image, image_hash)
214
+
215
 
216
 
217
  def process_files():
 
230
  image_hashes = []
231
  filenames = []
232
 
233
+ for ix, file in enumerate(uploaded_files):
234
+ print(f"[D] processing file {file.name}. {file.file_id} {file.type} {file.size}")
235
+ (image, image_hash, filename, observation) = process_one_file(file, ix)
236
  # big old debug because of pain.
237
 
238
  filenames.append(filename)
 
248
  st.session_state.image_filenames = filenames
249
 
250
 
251
+ def metadata_inputs_one_file(file:UploadedFile, ukey:str, dbg_ix:int=0) -> InputObservation:
252
+ # dbg_ix is a hack to have different data in each input group, checking persistence
253
 
254
+ if st.session_state.container_metadata_inputs is not None:
255
+ _viewcontainer = st.session_state.container_metadata_inputs
256
+ else:
257
+ _viewcontainer = st.sidebar
258
+ print(f"[W] `container_metadata_inputs` is None, using sidebar")
259
+
260
 
261
 
262
+ author_email = st.session_state["input_author_email"]
263
+ filename = file.name
264
+ image_datetime = get_image_datetime(file)
265
+ # add the UI elements
266
+ #viewcontainer.title(f"Metadata for {filename}")
267
+ viewcontainer = _viewcontainer.expander(f"Metadata for {file.name}", expanded=True)
268
+
269
+ # TODO: use session state so any changes are persisted within session -- currently I think
270
+ # we are going to take the defaults over and over again -- if the user adjusts coords, or date, it will get lost
271
+ # - it is a bit complicated, if no values change, they persist (the widget definition: params, name, key, etc)
272
+ # even if the code is re-run. but if the value changes, it is lost.
273
+
274
+
275
+ # 3. Latitude Entry Box
276
+ latitude = viewcontainer.text_input(
277
+ "Latitude for " + filename,
278
+ spoof_metadata.get('latitude', 0) + dbg_ix,
279
+ key=f"input_latitude_{ukey}")
280
+ if latitude and not is_valid_number(latitude):
281
+ viewcontainer.error("Please enter a valid latitude (numerical only).")
282
+ m_logger.error(f"Invalid latitude entered: {latitude}.")
283
+ # 4. Longitude Entry Box
284
+ longitude = viewcontainer.text_input(
285
+ "Longitude for " + filename,
286
+ spoof_metadata.get('longitude', ""),
287
+ key=f"input_longitude_{ukey}")
288
+ if longitude and not is_valid_number(longitude):
289
+ viewcontainer.error("Please enter a valid longitude (numerical only).")
290
+ m_logger.error(f"Invalid latitude entered: {latitude}.")
291
+
292
+ # 5. Date/time
293
+ ## first from image metadata
294
+ if image_datetime is not None:
295
+ time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
296
+ date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
297
+ else:
298
+ time_value = datetime.datetime.now().time() # Default to current time
299
+ date_value = datetime.datetime.now().date()
300
+
301
+ ## if not, give user the option to enter manually
302
+ date_option = viewcontainer.date_input("Date for "+filename, value=date_value, key=f"input_date_{ukey}")
303
+ time_option = viewcontainer.time_input("Time for "+filename, time_value, key=f"input_time_{ukey}")
304
+
305
+ observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
306
+ author_email=author_email, date=image_datetime, time=None,
307
+ date_option=date_option, time_option=time_option,
308
+ uploaded_filename=file,
309
+ )
310
+
311
+ # TODO: pass in the hash to InputObservation, so it is done once only. (need to refactor the class a bit)
312
+ return observation
313
+
314
+
315
+
316
+ def _setup_dynamic_inputs() -> None:
317
+
318
+ # for each file uploaded,
319
+ # - add the UI elements for the metadata
320
+ # - validate the data
321
+ # end of cycle should have observation objects set for each file.
322
+ # - and these go into session state
323
+
324
+ # load the files from the session state
325
+ uploaded_files = st.session_state.files
326
+ hashes = st.session_state.image_hashes
327
+ #images = st.session_state.images
328
+ observations = {}
329
+
330
+ for ix, file in enumerate(uploaded_files):
331
+ hash = hashes[ix]
332
+ observation = metadata_inputs_one_file(file, hash, ix)
333
+ observations[hash] = observation
334
+
335
+ st.session_state.observations = observations
336
 
337
 
338
  def _setup_oneoff_inputs() -> None:
 
340
  Add the UI input elements for which we have one each
341
 
342
  '''
343
+ st.title("Input image and data")
344
+
345
+ # setup containers for consistent layout order with dynamic elements
346
+ #container_file_uploader = st.container(border=False, key="container_file_uploader")
347
+ container_file_uploader = st.session_state.container_file_uploader
348
+ # - a container for the dynamic input elements (this one matters)
349
+ #if "container_per_file_input_elems" not in st.session_state:
350
+ # if st.session_state.container_per_file_input_elems is None:
351
+ # #st.session_state.container_per_file_input_elems = None
352
+ # c = st.container(border=True, key="container_per_file_input_elems")
353
+ # with c:
354
+ # st.write("No files uploaded yet.")
355
+ # print(f"[D] initialised the container..... {id(c)} | {c=}")
356
+ # st.session_state.container_per_file_input_elems = c
357
+ # else:
358
+ # print(f"[D] already present, don't redo... {id(st.session_state.container_per_file_input_elems)} | {st.session_state.container_per_file_input_elems=}")
359
+
360
 
361
+ with container_file_uploader:
362
+ # 1. Input the author email
363
+ author_email = st.text_input("Author Email", spoof_metadata.get('author_email', ""),
364
+ key="input_author_email")
365
+ if author_email and not is_valid_email(author_email):
366
+ st.error("Please enter a valid email address.")
367
+
368
+ # 2. Image Selector
369
+ st.file_uploader("Upload one or more images", type=["png", 'jpg', 'jpeg', 'webp'],
370
+ accept_multiple_files=True,
371
+ key="file_uploader_data",
372
+ #on_change=process_files)
373
+ on_change=buffer_files)
374
+ if 1:
375
+
376
+ uploaded_files = st.session_state.file_uploader_data
377
+
378
+ for ix, file in enumerate(uploaded_files):
379
+ print(f"[DD] rechecking file {file.name}. {file.file_id} {file.type} {file.size}")
380
+ pass
381
+
382
+
383
+
384
 
385
 
386
 
 
393
  '''
394
  _setup_oneoff_inputs()
395
  # amazingly we just have to add the uploader and its callback, and the rest is dynamic.
396
+ # or not... the situation is more complex :(
397
+
398
+ # setup dynamic UI input elements, based on the data that is buffered in session_state
399
+ _setup_dynamic_inputs()
400
+
401
 
402
 
403
  def setup_input_monolithic(
src/main.py CHANGED
@@ -82,6 +82,15 @@ if "workflow_fsm" not in st.session_state:
82
  # create and init the state machine
83
  st.session_state.workflow_fsm = WorkflowFSM(FSM_STATES)
84
 
 
 
 
 
 
 
 
 
 
85
  def refresh_progress():
86
  with st.sidebar:
87
  tot = st.session_state.workflow_fsm.num_states - 1
@@ -100,8 +109,14 @@ if "progress" not in st.session_state:
100
  def dbg_show_obs_hashes():
101
  # a debug: we seem to be losing the whale classes?
102
  st.write(f"[D] num observations: {len(st.session_state.observations)}")
 
103
  for hash in st.session_state.observations.keys():
104
- st.markdown(f"- [D] observation {hash} has {len(st.session_state.observations[hash].top_predictions)} predictions")
 
 
 
 
 
105
 
106
 
107
  def main() -> None:
@@ -141,6 +156,16 @@ def main() -> None:
141
 
142
  # create a sidebar, and parse all the input (returned as `observations` object)
143
  with st.sidebar:
 
 
 
 
 
 
 
 
 
 
144
  setup_input(viewcontainer=st.sidebar)
145
 
146
 
@@ -281,6 +306,8 @@ def main() -> None:
281
  # we can enter the next state - visualising the results / review)
282
  # ok it doesn't if done programmatically. maybe interacting with teh button? check docs.
283
  refresh_progress()
 
 
284
 
285
  elif st.session_state.workflow_fsm.is_in_state('ml_classification_completed'):
286
  # show the results, and allow manual validation
 
82
  # create and init the state machine
83
  st.session_state.workflow_fsm = WorkflowFSM(FSM_STATES)
84
 
85
+ if "container_per_file_input_elems" not in st.session_state:
86
+ st.session_state.container_per_file_input_elems = None
87
+
88
+ if "container_file_uploader" not in st.session_state:
89
+ st.session_state.container_file_uploader = None
90
+
91
+ if "container_metadata_inputs" not in st.session_state:
92
+ st.session_state.container_metadata_inputs = None
93
+
94
  def refresh_progress():
95
  with st.sidebar:
96
  tot = st.session_state.workflow_fsm.num_states - 1
 
109
  def dbg_show_obs_hashes():
110
  # a debug: we seem to be losing the whale classes?
111
  st.write(f"[D] num observations: {len(st.session_state.observations)}")
112
+ s = ""
113
  for hash in st.session_state.observations.keys():
114
+ obs = st.session_state.observations[hash]
115
+ s += f"- [D] observation {hash} ({obs._inst_id}) has {len(obs.top_predictions)} predictions\n"
116
+
117
+ #st.markdown(f"- [D] observation {hash} has {len(st.session_state.observations[hash].top_predictions)} predictions")
118
+
119
+ st.markdown(s)
120
 
121
 
122
  def main() -> None:
 
156
 
157
  # create a sidebar, and parse all the input (returned as `observations` object)
158
  with st.sidebar:
159
+ st.divider()
160
+
161
+ st.markdown('<style>.st-key-container_file_uploader_id { border: 1px solid skyblue; border-radius: 5px; }</style>', unsafe_allow_html=True)
162
+ container_file_uploader = st.container(border=True, key="container_file_uploader_id")
163
+ st.session_state.container_file_uploader = container_file_uploader
164
+ st.markdown('<style>.st-key-container_metadata_inputs_id { border: 1px solid lightgreen; border-radius: 5px; }</style>', unsafe_allow_html=True)
165
+ container_metadata_inputs = st.container(border=True, key="container_metadata_inputs_id")
166
+ container_metadata_inputs.write("Metadata Inputs... wait for file upload ")
167
+ st.session_state.container_metadata_inputs = container_metadata_inputs
168
+
169
  setup_input(viewcontainer=st.sidebar)
170
 
171
 
 
306
  # we can enter the next state - visualising the results / review)
307
  # ok it doesn't if done programmatically. maybe interacting with teh button? check docs.
308
  refresh_progress()
309
+ #TODO: validate this doesn't harm performance adversely.
310
+ st.rerun()
311
 
312
  elif st.session_state.workflow_fsm.is_in_state('ml_classification_completed'):
313
  # show the results, and allow manual validation