Spaces:

Saving-Willy
/

saving-willy-dev

Sleeping

rmm commited on Jan 29

Commit

fd18838

1 Parent(s): 4d0f7fd

feat: nearly complete input handling with stable state

- main bug was that every interaction with the UI led to the
file_uploader being re-instantiated, and then all the inputs
got re-parsed, the hashes recalculated, and the data lost.

- solution is via callback, and using the session state to implicitly
store the file_uploader return value (not well documented)

- on change of the file_uploader state, we dynamically generate
the input elements to supply the metadata. And process them inline.
- TODO: the data is stable in the session_state, but the UI loses the
elements for the list -- because the list hasn't changed! the
callback doesn't get triggered.
- Good: we don't overwrite our loaded data, and the ML/presentation
can continue, but...
- Bad: we don't redraw the elements. -> more caching I suppose.

Files changed (3) hide show

src/input/input_handling.py +199 -0
src/input/input_observation.py +15 -1
src/main.py +11 -1

src/input/input_handling.py CHANGED Viewed

@@ -1,8 +1,11 @@
 import datetime
 import logging
 import streamlit as st
 from streamlit.delta_generator import DeltaGenerator
 import cv2
 import numpy as np
@@ -31,6 +34,47 @@ spoof_metadata = {
 }
 def check_inputs_are_set(empty_ok:bool=False, debug:bool=False) -> bool:
     """
     Checks if all expected inputs have been entered
@@ -65,9 +109,164 @@ def check_inputs_are_set(empty_ok:bool=False, debug:bool=False) -> bool:
     return all([v is not None for v in vals])
 def setup_input(
     viewcontainer: DeltaGenerator=None,
     _allowed_image_types: list=None, ) -> InputObservation:
     """

+from typing import List, Tuple
 import datetime
 import logging
+import hashlib
 import streamlit as st
 from streamlit.delta_generator import DeltaGenerator
+from streamlit.runtime.uploaded_file_manager import UploadedFile
 import cv2
 import numpy as np
 }
 def check_inputs_are_set(empty_ok:bool=False, debug:bool=False) -> bool:
+    return check_inputs_are_set_by_hash(empty_ok=empty_ok, debug=debug)
+def check_inputs_are_set_by_hash(empty_ok:bool=False, debug:bool=False) -> bool:
+    """
+    Checks if all expected inputs have been entered
+    Implementation: via the Streamlit session state.
+    Args:
+        empty_ok (bool): If True, returns True if no inputs are set. Default is False.
+        debug (bool): If True, prints and logs the status of each expected input key. Default is False.
+    Returns:
+        bool: True if all expected input keys are set, False otherwise.
+    """
+    image_hashes = st.session_state.image_hashes
+    if len(image_hashes) == 0:
+        return empty_ok
+    exp_input_key_stubs = ["input_latitude", "input_longitude"]
+    #exp_input_key_stubs = ["input_latitude", "input_longitude", "input_author_email", "input_date", "input_time", "input_image_selector"]
+    vals = []
+    for image_hash in image_hashes:
+        for stub in exp_input_key_stubs:
+            key = f"{stub}_{image_hash}"
+            val = None
+            if key in st.session_state:
+                val = st.session_state[key]
+            vals.append(val)
+            if debug:
+                msg = f"{key:15}, {(val is not None):8}, {val}"
+                m_logger.debug(msg)
+                print(msg)
+    return all([v is not None for v in vals])
+def check_inputs_are_set_by_fname(empty_ok:bool=False, debug:bool=False) -> bool:
     """
     Checks if all expected inputs have been entered
     return all([v is not None for v in vals])
+def process_one_file(file:UploadedFile) -> Tuple[np.ndarray, str, str, InputObservation]:
+    # do all the non-UI calcs
+    # add the UI elements
+    # and in-line, do processing/validation of the inputs
+    # - how to deal with the gathered data? a) push into session state, b) return all the elements needed?
+    viewcontainer = st.sidebar
+    # do all the non-UI calcs first
+    ## get the bytes first, then convert into 1) image, 2) md5
+    _bytes = file.read()
+    image_hash = hashlib.md5(_bytes).hexdigest()
+    #file_bytes = np.asarray(bytearray(_bytes), dtype=np.uint8)
+    image: np.ndarray = cv2.imdecode(np.asarray(bytearray(_bytes), dtype=np.uint8), 1)
+    filename:str = file.name
+    image_datetime = get_image_datetime(file)
+    m_logger.debug(f"image date extracted as {image_datetime} (from {file})")
+    author_email = st.session_state["input_author_email"]
+    # add the UI elements
+    viewcontainer.title(f"Metadata for {filename}")
+    ukey = image_hash
+    # 3. Latitude Entry Box
+    latitude = viewcontainer.text_input(
+        "Latitude for " + filename,
+        spoof_metadata.get('latitude', ""),
+        key=f"input_latitude_{ukey}")
+    if latitude and not is_valid_number(latitude):
+        viewcontainer.error("Please enter a valid latitude (numerical only).")
+        m_logger.error(f"Invalid latitude entered: {latitude}.")
+    # 4. Longitude Entry Box
+    longitude = viewcontainer.text_input(
+        "Longitude for " + filename,
+        spoof_metadata.get('longitude', ""),
+        key=f"input_longitude_{ukey}")
+    if longitude and not is_valid_number(longitude):
+        viewcontainer.error("Please enter a valid longitude (numerical only).")
+        m_logger.error(f"Invalid latitude entered: {latitude}.")
+    # 5. Date/time
+    ## first from image metadata
+    if image_datetime is not None:
+        time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
+        date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
+    else:
+        time_value = datetime.datetime.now().time()  # Default to current time
+        date_value = datetime.datetime.now().date()
+    ## if not, give user the option to enter manually
+    date_option = st.sidebar.date_input("Date for "+filename, value=date_value, key=f"input_date_{ukey}")
+    time_option = st.sidebar.time_input("Time for "+filename, time_value, key=f"input_time_{ukey}")
+    observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
+                                author_email=author_email, date=image_datetime, time=None,
+                                date_option=date_option, time_option=time_option,
+                                uploaded_filename=file,
+                                )
+    #the_data = [] \
+    #    + [image, file, image_hash, filename, ] \
+    #    + [latitude, longitude, date_option, time_option]
+    # TODO: pass in the hash to InputObservation, so it is done once only. (need to refactor the class a bit)
+    the_data = (image, image_hash, filename, observation)
+    return the_data
+    #
+def process_files():
+    # this is triggered whenever the uploaded files are changed.
+    # process one file: add UI elements, and process the inputs
+    # generate an observation from the return info
+    # finally, put all the relevant stuff into the session state
+    # - note: here we overwrite the session state, we aren't extending it.
+    # get files from state
+    uploaded_files = st.session_state.file_uploader_data
+    observations = {}
+    images = {}
+    image_hashes = []
+    filenames = []
+    for file in uploaded_files:
+        (image, image_hash, filename, observation) = process_one_file(file)
+        # big old debug because of pain.
+        filenames.append(filename)
+        image_hashes.append(image_hash)
+        observations[image_hash] = observation
+        images[image_hash] = image
+    st.session_state.images = images
+    st.session_state.files = uploaded_files
+    st.session_state.observations = observations
+    st.session_state.image_hashes = image_hashes
+    st.session_state.image_filenames = filenames
+def _setup_oneoff_inputs() -> None:
+    '''
+    Add the UI input elements for which we have one each
+    '''
+    viewcontainer = st.sidebar
+    viewcontainer.title("Input image and data")
+    # 1. Input the author email
+    author_email = viewcontainer.text_input("Author Email", spoof_metadata.get('author_email', ""),
+                                            key="input_author_email")
+    if author_email and not is_valid_email(author_email):
+        viewcontainer.error("Please enter a valid email address.")
+    # 2. Image Selector
+    #uploaded_files = viewcontainer.file_uploader("Upload an image", type=allowed_image_types, accept_multiple_files=True)
+    st.file_uploader("Upload one or more images", type=["png", 'jpg', 'jpeg', 'webp'],
+                                    accept_multiple_files=True,
+                                    key="file_uploader_data",
+                                    on_change=process_files)
 def setup_input(
+    viewcontainer: DeltaGenerator=None,
+    _allowed_image_types: list=None, ) -> None:
+    '''
+    Set up the input handling for the whale observation guidance tool
+    '''
+    _setup_oneoff_inputs()
+    # amazingly we just have to add the uploader and its callback, and the rest is dynamic.
+    # # check if the inputs are set
+    # if check_inputs_are_set(empty_ok=True):
+    #     st.sidebar.success("All inputs are set.")
+    # else:
+    #     st.sidebar.warning("Please fill in all the required inputs.")
+def setup_input_monolithic(
     viewcontainer: DeltaGenerator=None,
     _allowed_image_types: list=None, ) -> InputObservation:
     """

src/input/input_observation.py CHANGED Viewed

@@ -44,6 +44,9 @@ class InputObservation:
         from_input(input):
             Creates an observation from another input observation.
     """
     def __init__(self, image=None, latitude=None, longitude=None,
                  author_email=None, date=None, time=None, date_option=None, time_option=None,
                  uploaded_filename=None):
@@ -56,8 +59,13 @@ class InputObservation:
         self.date_option = date_option
         self.time_option = time_option
         self.uploaded_filename = uploaded_filename
         self._top_predictions = []
     def set_top_predictions(self, top_predictions:list):
         self._top_predictions = top_predictions
@@ -66,6 +74,11 @@ class InputObservation:
     def top_predictions(self):
         return self._top_predictions
     def __str__(self):
         return f"Observation: {self.image}, {self.latitude}, {self.longitude}, {self.author_email}, {self.date}, {self.time}, {self.date_option}, {self.time_option}, {self.uploaded_filename}"
@@ -88,7 +101,8 @@ class InputObservation:
         return {
             #"image": self.image,
             "image_filename": self.uploaded_filename.name if self.uploaded_filename else None,
-            "image_md5": hashlib.md5(self.uploaded_filename.read()).hexdigest() if self.uploaded_filename else generate_random_md5(),
             "latitude": self.latitude,
             "longitude": self.longitude,
             "author_email": self.author_email,

         from_input(input):
             Creates an observation from another input observation.
     """
+    _inst_count = 0
     def __init__(self, image=None, latitude=None, longitude=None,
                  author_email=None, date=None, time=None, date_option=None, time_option=None,
                  uploaded_filename=None):
         self.date_option = date_option
         self.time_option = time_option
         self.uploaded_filename = uploaded_filename
+        self._image_md5 = None
         self._top_predictions = []
+        InputObservation._inst_count += 1
+        self._inst_id = InputObservation._inst_count
+        self.assign_image_md5()
     def set_top_predictions(self, top_predictions:list):
         self._top_predictions = top_predictions
     def top_predictions(self):
         return self._top_predictions
+    # add a method to assign the image_md5 only once
+    def assign_image_md5(self):
+        if not self._image_md5:
+            self._image_md5 = hashlib.md5(self.uploaded_filename.read()).hexdigest() if self.uploaded_filename else generate_random_md5()
     def __str__(self):
         return f"Observation: {self.image}, {self.latitude}, {self.longitude}, {self.author_email}, {self.date}, {self.time}, {self.date_option}, {self.time_option}, {self.uploaded_filename}"
         return {
             #"image": self.image,
             "image_filename": self.uploaded_filename.name if self.uploaded_filename else None,
+            "image_md5": self._image_md5,
+            #"image_md5": hashlib.md5(self.uploaded_filename.read()).hexdigest() if self.uploaded_filename else generate_random_md5(),
             "latitude": self.latitude,
             "longitude": self.longitude,
             "author_email": self.author_email,

src/main.py CHANGED Viewed

@@ -97,6 +97,12 @@ if "progress" not in st.session_state:
         st.sidebar.button("Refresh Progress", on_click=refresh_progress)
 def main() -> None:
     """
@@ -134,7 +140,8 @@ def main() -> None:
     refresh_progress()
     # create a sidebar, and parse all the input (returned as `observations` object)
-    setup_input(viewcontainer=st.sidebar)
     if 0:## WIP
@@ -250,6 +257,9 @@ def main() -> None:
     # 6. manual validation done -> enable the upload buttons
     #
     with tab_inference:
         add_classifier_header()
         # if we are before data_entry_validated, show the button, disabled.
         if not st.session_state.workflow_fsm.is_in_state_or_beyond('data_entry_validated'):

         st.sidebar.button("Refresh Progress", on_click=refresh_progress)
+def dbg_show_obs_hashes():
+    # a debug: we seem to be losing the whale classes?
+    st.write(f"[D] num observations: {len(st.session_state.observations)}")
+    for hash in st.session_state.observations.keys():
+        st.markdown(f"- [D] observation {hash} has {len(st.session_state.observations[hash].top_predictions)} predictions")
 def main() -> None:
     """
     refresh_progress()
     # create a sidebar, and parse all the input (returned as `observations` object)
+    with st.sidebar:
+        setup_input(viewcontainer=st.sidebar)
     if 0:## WIP
     # 6. manual validation done -> enable the upload buttons
     #
     with tab_inference:
+        dbg_show_obs_hashes()
         add_classifier_header()
         # if we are before data_entry_validated, show the button, disabled.
         if not st.session_state.workflow_fsm.is_in_state_or_beyond('data_entry_validated'):