rmm commited on
Commit
fd18838
·
1 Parent(s): 4d0f7fd

feat: nearly complete input handling with stable state

Browse files

- main bug was that every interaction with the UI led to the
file_uploader being re-instantiated, and then all the inputs
got re-parsed, the hashes recalculated, and the data lost.

- solution is via callback, and using the session state to implicitly
store the file_uploader return value (not well documented)

- on change of the file_uploader state, we dynamically generate
the input elements to supply the metadata. And process them inline.
- TODO: the data is stable in the session_state, but the UI loses the
elements for the list -- because the list hasn't changed! the
callback doesn't get triggered.
- Good: we don't overwrite our loaded data, and the ML/presentation
can continue, but...
- Bad: we don't redraw the elements. -> more caching I suppose.

src/input/input_handling.py CHANGED
@@ -1,8 +1,11 @@
 
1
  import datetime
2
  import logging
 
3
 
4
  import streamlit as st
5
  from streamlit.delta_generator import DeltaGenerator
 
6
 
7
  import cv2
8
  import numpy as np
@@ -31,6 +34,47 @@ spoof_metadata = {
31
  }
32
 
33
  def check_inputs_are_set(empty_ok:bool=False, debug:bool=False) -> bool:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  """
35
  Checks if all expected inputs have been entered
36
 
@@ -65,9 +109,164 @@ def check_inputs_are_set(empty_ok:bool=False, debug:bool=False) -> bool:
65
 
66
  return all([v is not None for v in vals])
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
 
 
 
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  def setup_input(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  viewcontainer: DeltaGenerator=None,
72
  _allowed_image_types: list=None, ) -> InputObservation:
73
  """
 
1
+ from typing import List, Tuple
2
  import datetime
3
  import logging
4
+ import hashlib
5
 
6
  import streamlit as st
7
  from streamlit.delta_generator import DeltaGenerator
8
+ from streamlit.runtime.uploaded_file_manager import UploadedFile
9
 
10
  import cv2
11
  import numpy as np
 
34
  }
35
 
36
  def check_inputs_are_set(empty_ok:bool=False, debug:bool=False) -> bool:
37
+ return check_inputs_are_set_by_hash(empty_ok=empty_ok, debug=debug)
38
+
39
+
40
+ def check_inputs_are_set_by_hash(empty_ok:bool=False, debug:bool=False) -> bool:
41
+ """
42
+ Checks if all expected inputs have been entered
43
+
44
+ Implementation: via the Streamlit session state.
45
+
46
+ Args:
47
+ empty_ok (bool): If True, returns True if no inputs are set. Default is False.
48
+ debug (bool): If True, prints and logs the status of each expected input key. Default is False.
49
+ Returns:
50
+ bool: True if all expected input keys are set, False otherwise.
51
+ """
52
+ image_hashes = st.session_state.image_hashes
53
+ if len(image_hashes) == 0:
54
+ return empty_ok
55
+
56
+
57
+ exp_input_key_stubs = ["input_latitude", "input_longitude"]
58
+ #exp_input_key_stubs = ["input_latitude", "input_longitude", "input_author_email", "input_date", "input_time", "input_image_selector"]
59
+ vals = []
60
+ for image_hash in image_hashes:
61
+ for stub in exp_input_key_stubs:
62
+ key = f"{stub}_{image_hash}"
63
+ val = None
64
+ if key in st.session_state:
65
+ val = st.session_state[key]
66
+ vals.append(val)
67
+ if debug:
68
+ msg = f"{key:15}, {(val is not None):8}, {val}"
69
+ m_logger.debug(msg)
70
+ print(msg)
71
+
72
+
73
+
74
+ return all([v is not None for v in vals])
75
+
76
+
77
+ def check_inputs_are_set_by_fname(empty_ok:bool=False, debug:bool=False) -> bool:
78
  """
79
  Checks if all expected inputs have been entered
80
 
 
109
 
110
  return all([v is not None for v in vals])
111
 
112
+
113
+ def process_one_file(file:UploadedFile) -> Tuple[np.ndarray, str, str, InputObservation]:
114
+ # do all the non-UI calcs
115
+ # add the UI elements
116
+ # and in-line, do processing/validation of the inputs
117
+ # - how to deal with the gathered data? a) push into session state, b) return all the elements needed?
118
+
119
+ viewcontainer = st.sidebar
120
+
121
+ # do all the non-UI calcs first
122
+ ## get the bytes first, then convert into 1) image, 2) md5
123
+ _bytes = file.read()
124
+ image_hash = hashlib.md5(_bytes).hexdigest()
125
+ #file_bytes = np.asarray(bytearray(_bytes), dtype=np.uint8)
126
+ image: np.ndarray = cv2.imdecode(np.asarray(bytearray(_bytes), dtype=np.uint8), 1)
127
+ filename:str = file.name
128
+ image_datetime = get_image_datetime(file)
129
+ m_logger.debug(f"image date extracted as {image_datetime} (from {file})")
130
+
131
+ author_email = st.session_state["input_author_email"]
132
+
133
+
134
+ # add the UI elements
135
+ viewcontainer.title(f"Metadata for {filename}")
136
+ ukey = image_hash
137
+
138
+ # 3. Latitude Entry Box
139
+ latitude = viewcontainer.text_input(
140
+ "Latitude for " + filename,
141
+ spoof_metadata.get('latitude', ""),
142
+ key=f"input_latitude_{ukey}")
143
+ if latitude and not is_valid_number(latitude):
144
+ viewcontainer.error("Please enter a valid latitude (numerical only).")
145
+ m_logger.error(f"Invalid latitude entered: {latitude}.")
146
+ # 4. Longitude Entry Box
147
+ longitude = viewcontainer.text_input(
148
+ "Longitude for " + filename,
149
+ spoof_metadata.get('longitude', ""),
150
+ key=f"input_longitude_{ukey}")
151
+ if longitude and not is_valid_number(longitude):
152
+ viewcontainer.error("Please enter a valid longitude (numerical only).")
153
+ m_logger.error(f"Invalid latitude entered: {latitude}.")
154
+
155
+ # 5. Date/time
156
+ ## first from image metadata
157
+ if image_datetime is not None:
158
+ time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
159
+ date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
160
+ else:
161
+ time_value = datetime.datetime.now().time() # Default to current time
162
+ date_value = datetime.datetime.now().date()
163
+
164
+ ## if not, give user the option to enter manually
165
+ date_option = st.sidebar.date_input("Date for "+filename, value=date_value, key=f"input_date_{ukey}")
166
+ time_option = st.sidebar.time_input("Time for "+filename, time_value, key=f"input_time_{ukey}")
167
+
168
+ observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
169
+ author_email=author_email, date=image_datetime, time=None,
170
+ date_option=date_option, time_option=time_option,
171
+ uploaded_filename=file,
172
+ )
173
+
174
+ #the_data = [] \
175
+ # + [image, file, image_hash, filename, ] \
176
+ # + [latitude, longitude, date_option, time_option]
177
+ # TODO: pass in the hash to InputObservation, so it is done once only. (need to refactor the class a bit)
178
+
179
+ the_data = (image, image_hash, filename, observation)
180
+
181
+ return the_data
182
+
183
+
184
+
185
+
186
+ #
187
+
188
+
189
+
190
+
191
+ def process_files():
192
+ # this is triggered whenever the uploaded files are changed.
193
+
194
+ # process one file: add UI elements, and process the inputs
195
+ # generate an observation from the return info
196
+ # finally, put all the relevant stuff into the session state
197
+ # - note: here we overwrite the session state, we aren't extending it.
198
+
199
+ # get files from state
200
+ uploaded_files = st.session_state.file_uploader_data
201
+
202
+ observations = {}
203
+ images = {}
204
+ image_hashes = []
205
+ filenames = []
206
+
207
+ for file in uploaded_files:
208
+ (image, image_hash, filename, observation) = process_one_file(file)
209
+ # big old debug because of pain.
210
+
211
+ filenames.append(filename)
212
+ image_hashes.append(image_hash)
213
+
214
+ observations[image_hash] = observation
215
+ images[image_hash] = image
216
+
217
+ st.session_state.images = images
218
+ st.session_state.files = uploaded_files
219
+ st.session_state.observations = observations
220
+ st.session_state.image_hashes = image_hashes
221
+ st.session_state.image_filenames = filenames
222
+
223
 
224
+
225
+
226
+
227
 
228
+
229
+ def _setup_oneoff_inputs() -> None:
230
+ '''
231
+ Add the UI input elements for which we have one each
232
+
233
+ '''
234
+ viewcontainer = st.sidebar
235
+ viewcontainer.title("Input image and data")
236
+
237
+ # 1. Input the author email
238
+ author_email = viewcontainer.text_input("Author Email", spoof_metadata.get('author_email', ""),
239
+ key="input_author_email")
240
+ if author_email and not is_valid_email(author_email):
241
+ viewcontainer.error("Please enter a valid email address.")
242
+
243
+ # 2. Image Selector
244
+ #uploaded_files = viewcontainer.file_uploader("Upload an image", type=allowed_image_types, accept_multiple_files=True)
245
+
246
+ st.file_uploader("Upload one or more images", type=["png", 'jpg', 'jpeg', 'webp'],
247
+ accept_multiple_files=True,
248
+ key="file_uploader_data",
249
+ on_change=process_files)
250
+
251
+
252
+
253
  def setup_input(
254
+ viewcontainer: DeltaGenerator=None,
255
+ _allowed_image_types: list=None, ) -> None:
256
+ '''
257
+ Set up the input handling for the whale observation guidance tool
258
+
259
+ '''
260
+ _setup_oneoff_inputs()
261
+ # amazingly we just have to add the uploader and its callback, and the rest is dynamic.
262
+
263
+ # # check if the inputs are set
264
+ # if check_inputs_are_set(empty_ok=True):
265
+ # st.sidebar.success("All inputs are set.")
266
+ # else:
267
+ # st.sidebar.warning("Please fill in all the required inputs.")
268
+
269
+ def setup_input_monolithic(
270
  viewcontainer: DeltaGenerator=None,
271
  _allowed_image_types: list=None, ) -> InputObservation:
272
  """
src/input/input_observation.py CHANGED
@@ -44,6 +44,9 @@ class InputObservation:
44
  from_input(input):
45
  Creates an observation from another input observation.
46
  """
 
 
 
47
  def __init__(self, image=None, latitude=None, longitude=None,
48
  author_email=None, date=None, time=None, date_option=None, time_option=None,
49
  uploaded_filename=None):
@@ -56,8 +59,13 @@ class InputObservation:
56
  self.date_option = date_option
57
  self.time_option = time_option
58
  self.uploaded_filename = uploaded_filename
 
59
  self._top_predictions = []
60
 
 
 
 
 
61
  def set_top_predictions(self, top_predictions:list):
62
  self._top_predictions = top_predictions
63
 
@@ -66,6 +74,11 @@ class InputObservation:
66
  def top_predictions(self):
67
  return self._top_predictions
68
 
 
 
 
 
 
69
 
70
  def __str__(self):
71
  return f"Observation: {self.image}, {self.latitude}, {self.longitude}, {self.author_email}, {self.date}, {self.time}, {self.date_option}, {self.time_option}, {self.uploaded_filename}"
@@ -88,7 +101,8 @@ class InputObservation:
88
  return {
89
  #"image": self.image,
90
  "image_filename": self.uploaded_filename.name if self.uploaded_filename else None,
91
- "image_md5": hashlib.md5(self.uploaded_filename.read()).hexdigest() if self.uploaded_filename else generate_random_md5(),
 
92
  "latitude": self.latitude,
93
  "longitude": self.longitude,
94
  "author_email": self.author_email,
 
44
  from_input(input):
45
  Creates an observation from another input observation.
46
  """
47
+
48
+ _inst_count = 0
49
+
50
  def __init__(self, image=None, latitude=None, longitude=None,
51
  author_email=None, date=None, time=None, date_option=None, time_option=None,
52
  uploaded_filename=None):
 
59
  self.date_option = date_option
60
  self.time_option = time_option
61
  self.uploaded_filename = uploaded_filename
62
+ self._image_md5 = None
63
  self._top_predictions = []
64
 
65
+ InputObservation._inst_count += 1
66
+ self._inst_id = InputObservation._inst_count
67
+ self.assign_image_md5()
68
+
69
  def set_top_predictions(self, top_predictions:list):
70
  self._top_predictions = top_predictions
71
 
 
74
  def top_predictions(self):
75
  return self._top_predictions
76
 
77
+ # add a method to assign the image_md5 only once
78
+ def assign_image_md5(self):
79
+ if not self._image_md5:
80
+ self._image_md5 = hashlib.md5(self.uploaded_filename.read()).hexdigest() if self.uploaded_filename else generate_random_md5()
81
+
82
 
83
  def __str__(self):
84
  return f"Observation: {self.image}, {self.latitude}, {self.longitude}, {self.author_email}, {self.date}, {self.time}, {self.date_option}, {self.time_option}, {self.uploaded_filename}"
 
101
  return {
102
  #"image": self.image,
103
  "image_filename": self.uploaded_filename.name if self.uploaded_filename else None,
104
+ "image_md5": self._image_md5,
105
+ #"image_md5": hashlib.md5(self.uploaded_filename.read()).hexdigest() if self.uploaded_filename else generate_random_md5(),
106
  "latitude": self.latitude,
107
  "longitude": self.longitude,
108
  "author_email": self.author_email,
src/main.py CHANGED
@@ -97,6 +97,12 @@ if "progress" not in st.session_state:
97
  st.sidebar.button("Refresh Progress", on_click=refresh_progress)
98
 
99
 
 
 
 
 
 
 
100
 
101
  def main() -> None:
102
  """
@@ -134,7 +140,8 @@ def main() -> None:
134
  refresh_progress()
135
 
136
  # create a sidebar, and parse all the input (returned as `observations` object)
137
- setup_input(viewcontainer=st.sidebar)
 
138
 
139
 
140
  if 0:## WIP
@@ -250,6 +257,9 @@ def main() -> None:
250
  # 6. manual validation done -> enable the upload buttons
251
  #
252
  with tab_inference:
 
 
 
253
  add_classifier_header()
254
  # if we are before data_entry_validated, show the button, disabled.
255
  if not st.session_state.workflow_fsm.is_in_state_or_beyond('data_entry_validated'):
 
97
  st.sidebar.button("Refresh Progress", on_click=refresh_progress)
98
 
99
 
100
+ def dbg_show_obs_hashes():
101
+ # a debug: we seem to be losing the whale classes?
102
+ st.write(f"[D] num observations: {len(st.session_state.observations)}")
103
+ for hash in st.session_state.observations.keys():
104
+ st.markdown(f"- [D] observation {hash} has {len(st.session_state.observations[hash].top_predictions)} predictions")
105
+
106
 
107
  def main() -> None:
108
  """
 
140
  refresh_progress()
141
 
142
  # create a sidebar, and parse all the input (returned as `observations` object)
143
+ with st.sidebar:
144
+ setup_input(viewcontainer=st.sidebar)
145
 
146
 
147
  if 0:## WIP
 
257
  # 6. manual validation done -> enable the upload buttons
258
  #
259
  with tab_inference:
260
+
261
+ dbg_show_obs_hashes()
262
+
263
  add_classifier_header()
264
  # if we are before data_entry_validated, show the button, disabled.
265
  if not st.session_state.workflow_fsm.is_in_state_or_beyond('data_entry_validated'):