Spaces:
Sleeping
feat: nearly complete input handling with stable state
Browse files- main bug was that every interaction with the UI led to the
file_uploader being re-instantiated, and then all the inputs
got re-parsed, the hashes recalculated, and the data lost.
- solution is via callback, and using the session state to implicitly
store the file_uploader return value (not well documented)
- on change of the file_uploader state, we dynamically generate
the input elements to supply the metadata. And process them inline.
- TODO: the data is stable in the session_state, but the UI loses the
elements for the list -- because the list hasn't changed! the
callback doesn't get triggered.
- Good: we don't overwrite our loaded data, and the ML/presentation
can continue, but...
- Bad: we don't redraw the elements. -> more caching I suppose.
- src/input/input_handling.py +199 -0
- src/input/input_observation.py +15 -1
- src/main.py +11 -1
@@ -1,8 +1,11 @@
|
|
|
|
1 |
import datetime
|
2 |
import logging
|
|
|
3 |
|
4 |
import streamlit as st
|
5 |
from streamlit.delta_generator import DeltaGenerator
|
|
|
6 |
|
7 |
import cv2
|
8 |
import numpy as np
|
@@ -31,6 +34,47 @@ spoof_metadata = {
|
|
31 |
}
|
32 |
|
33 |
def check_inputs_are_set(empty_ok:bool=False, debug:bool=False) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
"""
|
35 |
Checks if all expected inputs have been entered
|
36 |
|
@@ -65,9 +109,164 @@ def check_inputs_are_set(empty_ok:bool=False, debug:bool=False) -> bool:
|
|
65 |
|
66 |
return all([v is not None for v in vals])
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
|
|
|
|
|
|
69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
def setup_input(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
viewcontainer: DeltaGenerator=None,
|
72 |
_allowed_image_types: list=None, ) -> InputObservation:
|
73 |
"""
|
|
|
1 |
+
from typing import List, Tuple
|
2 |
import datetime
|
3 |
import logging
|
4 |
+
import hashlib
|
5 |
|
6 |
import streamlit as st
|
7 |
from streamlit.delta_generator import DeltaGenerator
|
8 |
+
from streamlit.runtime.uploaded_file_manager import UploadedFile
|
9 |
|
10 |
import cv2
|
11 |
import numpy as np
|
|
|
34 |
}
|
35 |
|
36 |
def check_inputs_are_set(empty_ok:bool=False, debug:bool=False) -> bool:
|
37 |
+
return check_inputs_are_set_by_hash(empty_ok=empty_ok, debug=debug)
|
38 |
+
|
39 |
+
|
40 |
+
def check_inputs_are_set_by_hash(empty_ok:bool=False, debug:bool=False) -> bool:
|
41 |
+
"""
|
42 |
+
Checks if all expected inputs have been entered
|
43 |
+
|
44 |
+
Implementation: via the Streamlit session state.
|
45 |
+
|
46 |
+
Args:
|
47 |
+
empty_ok (bool): If True, returns True if no inputs are set. Default is False.
|
48 |
+
debug (bool): If True, prints and logs the status of each expected input key. Default is False.
|
49 |
+
Returns:
|
50 |
+
bool: True if all expected input keys are set, False otherwise.
|
51 |
+
"""
|
52 |
+
image_hashes = st.session_state.image_hashes
|
53 |
+
if len(image_hashes) == 0:
|
54 |
+
return empty_ok
|
55 |
+
|
56 |
+
|
57 |
+
exp_input_key_stubs = ["input_latitude", "input_longitude"]
|
58 |
+
#exp_input_key_stubs = ["input_latitude", "input_longitude", "input_author_email", "input_date", "input_time", "input_image_selector"]
|
59 |
+
vals = []
|
60 |
+
for image_hash in image_hashes:
|
61 |
+
for stub in exp_input_key_stubs:
|
62 |
+
key = f"{stub}_{image_hash}"
|
63 |
+
val = None
|
64 |
+
if key in st.session_state:
|
65 |
+
val = st.session_state[key]
|
66 |
+
vals.append(val)
|
67 |
+
if debug:
|
68 |
+
msg = f"{key:15}, {(val is not None):8}, {val}"
|
69 |
+
m_logger.debug(msg)
|
70 |
+
print(msg)
|
71 |
+
|
72 |
+
|
73 |
+
|
74 |
+
return all([v is not None for v in vals])
|
75 |
+
|
76 |
+
|
77 |
+
def check_inputs_are_set_by_fname(empty_ok:bool=False, debug:bool=False) -> bool:
|
78 |
"""
|
79 |
Checks if all expected inputs have been entered
|
80 |
|
|
|
109 |
|
110 |
return all([v is not None for v in vals])
|
111 |
|
112 |
+
|
113 |
+
def process_one_file(file:UploadedFile) -> Tuple[np.ndarray, str, str, InputObservation]:
|
114 |
+
# do all the non-UI calcs
|
115 |
+
# add the UI elements
|
116 |
+
# and in-line, do processing/validation of the inputs
|
117 |
+
# - how to deal with the gathered data? a) push into session state, b) return all the elements needed?
|
118 |
+
|
119 |
+
viewcontainer = st.sidebar
|
120 |
+
|
121 |
+
# do all the non-UI calcs first
|
122 |
+
## get the bytes first, then convert into 1) image, 2) md5
|
123 |
+
_bytes = file.read()
|
124 |
+
image_hash = hashlib.md5(_bytes).hexdigest()
|
125 |
+
#file_bytes = np.asarray(bytearray(_bytes), dtype=np.uint8)
|
126 |
+
image: np.ndarray = cv2.imdecode(np.asarray(bytearray(_bytes), dtype=np.uint8), 1)
|
127 |
+
filename:str = file.name
|
128 |
+
image_datetime = get_image_datetime(file)
|
129 |
+
m_logger.debug(f"image date extracted as {image_datetime} (from {file})")
|
130 |
+
|
131 |
+
author_email = st.session_state["input_author_email"]
|
132 |
+
|
133 |
+
|
134 |
+
# add the UI elements
|
135 |
+
viewcontainer.title(f"Metadata for {filename}")
|
136 |
+
ukey = image_hash
|
137 |
+
|
138 |
+
# 3. Latitude Entry Box
|
139 |
+
latitude = viewcontainer.text_input(
|
140 |
+
"Latitude for " + filename,
|
141 |
+
spoof_metadata.get('latitude', ""),
|
142 |
+
key=f"input_latitude_{ukey}")
|
143 |
+
if latitude and not is_valid_number(latitude):
|
144 |
+
viewcontainer.error("Please enter a valid latitude (numerical only).")
|
145 |
+
m_logger.error(f"Invalid latitude entered: {latitude}.")
|
146 |
+
# 4. Longitude Entry Box
|
147 |
+
longitude = viewcontainer.text_input(
|
148 |
+
"Longitude for " + filename,
|
149 |
+
spoof_metadata.get('longitude', ""),
|
150 |
+
key=f"input_longitude_{ukey}")
|
151 |
+
if longitude and not is_valid_number(longitude):
|
152 |
+
viewcontainer.error("Please enter a valid longitude (numerical only).")
|
153 |
+
m_logger.error(f"Invalid latitude entered: {latitude}.")
|
154 |
+
|
155 |
+
# 5. Date/time
|
156 |
+
## first from image metadata
|
157 |
+
if image_datetime is not None:
|
158 |
+
time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
|
159 |
+
date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
|
160 |
+
else:
|
161 |
+
time_value = datetime.datetime.now().time() # Default to current time
|
162 |
+
date_value = datetime.datetime.now().date()
|
163 |
+
|
164 |
+
## if not, give user the option to enter manually
|
165 |
+
date_option = st.sidebar.date_input("Date for "+filename, value=date_value, key=f"input_date_{ukey}")
|
166 |
+
time_option = st.sidebar.time_input("Time for "+filename, time_value, key=f"input_time_{ukey}")
|
167 |
+
|
168 |
+
observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
|
169 |
+
author_email=author_email, date=image_datetime, time=None,
|
170 |
+
date_option=date_option, time_option=time_option,
|
171 |
+
uploaded_filename=file,
|
172 |
+
)
|
173 |
+
|
174 |
+
#the_data = [] \
|
175 |
+
# + [image, file, image_hash, filename, ] \
|
176 |
+
# + [latitude, longitude, date_option, time_option]
|
177 |
+
# TODO: pass in the hash to InputObservation, so it is done once only. (need to refactor the class a bit)
|
178 |
+
|
179 |
+
the_data = (image, image_hash, filename, observation)
|
180 |
+
|
181 |
+
return the_data
|
182 |
+
|
183 |
+
|
184 |
+
|
185 |
+
|
186 |
+
#
|
187 |
+
|
188 |
+
|
189 |
+
|
190 |
+
|
191 |
+
def process_files():
|
192 |
+
# this is triggered whenever the uploaded files are changed.
|
193 |
+
|
194 |
+
# process one file: add UI elements, and process the inputs
|
195 |
+
# generate an observation from the return info
|
196 |
+
# finally, put all the relevant stuff into the session state
|
197 |
+
# - note: here we overwrite the session state, we aren't extending it.
|
198 |
+
|
199 |
+
# get files from state
|
200 |
+
uploaded_files = st.session_state.file_uploader_data
|
201 |
+
|
202 |
+
observations = {}
|
203 |
+
images = {}
|
204 |
+
image_hashes = []
|
205 |
+
filenames = []
|
206 |
+
|
207 |
+
for file in uploaded_files:
|
208 |
+
(image, image_hash, filename, observation) = process_one_file(file)
|
209 |
+
# big old debug because of pain.
|
210 |
+
|
211 |
+
filenames.append(filename)
|
212 |
+
image_hashes.append(image_hash)
|
213 |
+
|
214 |
+
observations[image_hash] = observation
|
215 |
+
images[image_hash] = image
|
216 |
+
|
217 |
+
st.session_state.images = images
|
218 |
+
st.session_state.files = uploaded_files
|
219 |
+
st.session_state.observations = observations
|
220 |
+
st.session_state.image_hashes = image_hashes
|
221 |
+
st.session_state.image_filenames = filenames
|
222 |
+
|
223 |
|
224 |
+
|
225 |
+
|
226 |
+
|
227 |
|
228 |
+
|
229 |
+
def _setup_oneoff_inputs() -> None:
|
230 |
+
'''
|
231 |
+
Add the UI input elements for which we have one each
|
232 |
+
|
233 |
+
'''
|
234 |
+
viewcontainer = st.sidebar
|
235 |
+
viewcontainer.title("Input image and data")
|
236 |
+
|
237 |
+
# 1. Input the author email
|
238 |
+
author_email = viewcontainer.text_input("Author Email", spoof_metadata.get('author_email', ""),
|
239 |
+
key="input_author_email")
|
240 |
+
if author_email and not is_valid_email(author_email):
|
241 |
+
viewcontainer.error("Please enter a valid email address.")
|
242 |
+
|
243 |
+
# 2. Image Selector
|
244 |
+
#uploaded_files = viewcontainer.file_uploader("Upload an image", type=allowed_image_types, accept_multiple_files=True)
|
245 |
+
|
246 |
+
st.file_uploader("Upload one or more images", type=["png", 'jpg', 'jpeg', 'webp'],
|
247 |
+
accept_multiple_files=True,
|
248 |
+
key="file_uploader_data",
|
249 |
+
on_change=process_files)
|
250 |
+
|
251 |
+
|
252 |
+
|
253 |
def setup_input(
|
254 |
+
viewcontainer: DeltaGenerator=None,
|
255 |
+
_allowed_image_types: list=None, ) -> None:
|
256 |
+
'''
|
257 |
+
Set up the input handling for the whale observation guidance tool
|
258 |
+
|
259 |
+
'''
|
260 |
+
_setup_oneoff_inputs()
|
261 |
+
# amazingly we just have to add the uploader and its callback, and the rest is dynamic.
|
262 |
+
|
263 |
+
# # check if the inputs are set
|
264 |
+
# if check_inputs_are_set(empty_ok=True):
|
265 |
+
# st.sidebar.success("All inputs are set.")
|
266 |
+
# else:
|
267 |
+
# st.sidebar.warning("Please fill in all the required inputs.")
|
268 |
+
|
269 |
+
def setup_input_monolithic(
|
270 |
viewcontainer: DeltaGenerator=None,
|
271 |
_allowed_image_types: list=None, ) -> InputObservation:
|
272 |
"""
|
@@ -44,6 +44,9 @@ class InputObservation:
|
|
44 |
from_input(input):
|
45 |
Creates an observation from another input observation.
|
46 |
"""
|
|
|
|
|
|
|
47 |
def __init__(self, image=None, latitude=None, longitude=None,
|
48 |
author_email=None, date=None, time=None, date_option=None, time_option=None,
|
49 |
uploaded_filename=None):
|
@@ -56,8 +59,13 @@ class InputObservation:
|
|
56 |
self.date_option = date_option
|
57 |
self.time_option = time_option
|
58 |
self.uploaded_filename = uploaded_filename
|
|
|
59 |
self._top_predictions = []
|
60 |
|
|
|
|
|
|
|
|
|
61 |
def set_top_predictions(self, top_predictions:list):
|
62 |
self._top_predictions = top_predictions
|
63 |
|
@@ -66,6 +74,11 @@ class InputObservation:
|
|
66 |
def top_predictions(self):
|
67 |
return self._top_predictions
|
68 |
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
def __str__(self):
|
71 |
return f"Observation: {self.image}, {self.latitude}, {self.longitude}, {self.author_email}, {self.date}, {self.time}, {self.date_option}, {self.time_option}, {self.uploaded_filename}"
|
@@ -88,7 +101,8 @@ class InputObservation:
|
|
88 |
return {
|
89 |
#"image": self.image,
|
90 |
"image_filename": self.uploaded_filename.name if self.uploaded_filename else None,
|
91 |
-
"image_md5":
|
|
|
92 |
"latitude": self.latitude,
|
93 |
"longitude": self.longitude,
|
94 |
"author_email": self.author_email,
|
|
|
44 |
from_input(input):
|
45 |
Creates an observation from another input observation.
|
46 |
"""
|
47 |
+
|
48 |
+
_inst_count = 0
|
49 |
+
|
50 |
def __init__(self, image=None, latitude=None, longitude=None,
|
51 |
author_email=None, date=None, time=None, date_option=None, time_option=None,
|
52 |
uploaded_filename=None):
|
|
|
59 |
self.date_option = date_option
|
60 |
self.time_option = time_option
|
61 |
self.uploaded_filename = uploaded_filename
|
62 |
+
self._image_md5 = None
|
63 |
self._top_predictions = []
|
64 |
|
65 |
+
InputObservation._inst_count += 1
|
66 |
+
self._inst_id = InputObservation._inst_count
|
67 |
+
self.assign_image_md5()
|
68 |
+
|
69 |
def set_top_predictions(self, top_predictions:list):
|
70 |
self._top_predictions = top_predictions
|
71 |
|
|
|
74 |
def top_predictions(self):
|
75 |
return self._top_predictions
|
76 |
|
77 |
+
# add a method to assign the image_md5 only once
|
78 |
+
def assign_image_md5(self):
|
79 |
+
if not self._image_md5:
|
80 |
+
self._image_md5 = hashlib.md5(self.uploaded_filename.read()).hexdigest() if self.uploaded_filename else generate_random_md5()
|
81 |
+
|
82 |
|
83 |
def __str__(self):
|
84 |
return f"Observation: {self.image}, {self.latitude}, {self.longitude}, {self.author_email}, {self.date}, {self.time}, {self.date_option}, {self.time_option}, {self.uploaded_filename}"
|
|
|
101 |
return {
|
102 |
#"image": self.image,
|
103 |
"image_filename": self.uploaded_filename.name if self.uploaded_filename else None,
|
104 |
+
"image_md5": self._image_md5,
|
105 |
+
#"image_md5": hashlib.md5(self.uploaded_filename.read()).hexdigest() if self.uploaded_filename else generate_random_md5(),
|
106 |
"latitude": self.latitude,
|
107 |
"longitude": self.longitude,
|
108 |
"author_email": self.author_email,
|
@@ -97,6 +97,12 @@ if "progress" not in st.session_state:
|
|
97 |
st.sidebar.button("Refresh Progress", on_click=refresh_progress)
|
98 |
|
99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
def main() -> None:
|
102 |
"""
|
@@ -134,7 +140,8 @@ def main() -> None:
|
|
134 |
refresh_progress()
|
135 |
|
136 |
# create a sidebar, and parse all the input (returned as `observations` object)
|
137 |
-
|
|
|
138 |
|
139 |
|
140 |
if 0:## WIP
|
@@ -250,6 +257,9 @@ def main() -> None:
|
|
250 |
# 6. manual validation done -> enable the upload buttons
|
251 |
#
|
252 |
with tab_inference:
|
|
|
|
|
|
|
253 |
add_classifier_header()
|
254 |
# if we are before data_entry_validated, show the button, disabled.
|
255 |
if not st.session_state.workflow_fsm.is_in_state_or_beyond('data_entry_validated'):
|
|
|
97 |
st.sidebar.button("Refresh Progress", on_click=refresh_progress)
|
98 |
|
99 |
|
100 |
+
def dbg_show_obs_hashes():
|
101 |
+
# a debug: we seem to be losing the whale classes?
|
102 |
+
st.write(f"[D] num observations: {len(st.session_state.observations)}")
|
103 |
+
for hash in st.session_state.observations.keys():
|
104 |
+
st.markdown(f"- [D] observation {hash} has {len(st.session_state.observations[hash].top_predictions)} predictions")
|
105 |
+
|
106 |
|
107 |
def main() -> None:
|
108 |
"""
|
|
|
140 |
refresh_progress()
|
141 |
|
142 |
# create a sidebar, and parse all the input (returned as `observations` object)
|
143 |
+
with st.sidebar:
|
144 |
+
setup_input(viewcontainer=st.sidebar)
|
145 |
|
146 |
|
147 |
if 0:## WIP
|
|
|
257 |
# 6. manual validation done -> enable the upload buttons
|
258 |
#
|
259 |
with tab_inference:
|
260 |
+
|
261 |
+
dbg_show_obs_hashes()
|
262 |
+
|
263 |
add_classifier_header()
|
264 |
# if we are before data_entry_validated, show the button, disabled.
|
265 |
if not st.session_state.workflow_fsm.is_in_state_or_beyond('data_entry_validated'):
|