saving-willy-dev / src /input /input_handling.py
rmm
chore: cleanup input_handling - removing unused functions
cd85869
raw
history blame
12.1 kB
from typing import List, Tuple
import datetime
import logging
import hashlib
import streamlit as st
from streamlit.delta_generator import DeltaGenerator
from streamlit.runtime.uploaded_file_manager import UploadedFile
import cv2
import numpy as np
from input.input_observation import InputObservation
from input.input_validator import get_image_datetime, is_valid_email, is_valid_number
m_logger = logging.getLogger(__name__)
m_logger.setLevel(logging.INFO)
'''
A module to setup the input handling for the whale observation guidance tool
both the UI elements (setup_input_UI) and the validation functions.
'''
allowed_image_types = ['jpg', 'jpeg', 'png', 'webp']
# an arbitrary set of defaults so testing is less painful...
# ideally we add in some randomization to the defaults
spoof_metadata = {
"latitude": 0.5,
"longitude": 44,
"author_email": "[email protected]",
"date": None,
"time": None,
}
def check_inputs_are_set(empty_ok:bool=False, debug:bool=False) -> bool:
"""
Checks if all expected inputs have been entered
Implementation: via the Streamlit session state.
Args:
empty_ok (bool): If True, returns True if no inputs are set. Default is False.
debug (bool): If True, prints and logs the status of each expected input key. Default is False.
Returns:
bool: True if all expected input keys are set, False otherwise.
"""
image_hashes = st.session_state.image_hashes
if len(image_hashes) == 0:
return empty_ok
exp_input_key_stubs = ["input_latitude", "input_longitude", "input_date", "input_time"]
#exp_input_key_stubs = ["input_latitude", "input_longitude", "input_author_email", "input_date", "input_time",
vals = []
# the author_email is global/one-off - no hash extension.
if "input_author_email" in st.session_state:
val = st.session_state["input_author_email"]
vals.append(val)
if debug:
msg = f"{'input_author_email':15}, {(val is not None):8}, {val}"
m_logger.debug(msg)
print(msg)
for image_hash in image_hashes:
for stub in exp_input_key_stubs:
key = f"{stub}_{image_hash}"
val = None
if key in st.session_state:
val = st.session_state[key]
# handle cases where it is defined but empty
# if val is a string and empty, set to None
if isinstance(val, str) and not val:
val = None
# if val is a list and empty, set to None (not sure what UI elements would return a list?)
if isinstance(val, list) and not val:
val = None
# number 0 is ok - possibly. could be on the equator, e.g.
vals.append(val)
if debug:
msg = f"{key:15}, {(val is not None):8}, {val}"
m_logger.debug(msg)
print(msg)
return all([v is not None for v in vals])
def buffer_files():
# buffer info from the file_uploader that doesn't require further user input
# - the image, the hash, the filename
# a separate function takes care of per-file user inputs for metadata
# - this is necessary because dynamically producing more widgets should be
# avoided inside callbacks (tl;dr: they dissapear)
# - note that the UploadedFile objects have file_ids, which are unique to each file
# - these file_ids are not persistent between sessions, seem to just be random identifiers.
# get files from state
uploaded_files = st.session_state.file_uploader_data
filenames = []
images = {}
image_hashes = []
for ix, file in enumerate(uploaded_files):
filename:str = file.name
print(f"[D] processing {ix}th file {filename}. {file.file_id} {file.type} {file.size}")
# image to np and hash both require reading the file so do together
image, image_hash = load_file_and_hash(file)
filenames.append(filename)
image_hashes.append(image_hash)
images[image_hash] = image
st.session_state.images = images
st.session_state.files = uploaded_files
st.session_state.image_hashes = image_hashes
st.session_state.image_filenames = filenames
def load_file_and_hash(file:UploadedFile) -> Tuple[np.ndarray, str]:
# two operations that require reading the file done together for efficiency
# load the file, compute the hash, return the image and hash
_bytes = file.read()
image_hash = hashlib.md5(_bytes).hexdigest()
image: np.ndarray = cv2.imdecode(np.asarray(bytearray(_bytes), dtype=np.uint8), 1)
return (image, image_hash)
def metadata_inputs_one_file(file:UploadedFile, image_hash:str, dbg_ix:int=0) -> InputObservation:
# dbg_ix is a hack to have different data in each input group, checking persistence
if st.session_state.container_metadata_inputs is not None:
_viewcontainer = st.session_state.container_metadata_inputs
else:
_viewcontainer = st.sidebar
print(f"[W] `container_metadata_inputs` is None, using sidebar")
author_email = st.session_state["input_author_email"]
filename = file.name
image_datetime = get_image_datetime(file)
image = st.session_state.images.get(image_hash, None)
# add the UI elements
#viewcontainer.title(f"Metadata for {filename}")
viewcontainer = _viewcontainer.expander(f"Metadata for {file.name}", expanded=True)
# TODO: use session state so any changes are persisted within session -- currently I think
# we are going to take the defaults over and over again -- if the user adjusts coords, or date, it will get lost
# - it is a bit complicated, if no values change, they persist (the widget definition: params, name, key, etc)
# even if the code is re-run. but if the value changes, it is lost.
# 3. Latitude Entry Box
latitude = viewcontainer.text_input(
"Latitude for " + filename,
spoof_metadata.get('latitude', 0) + dbg_ix,
key=f"input_latitude_{image_hash}")
if latitude and not is_valid_number(latitude):
viewcontainer.error("Please enter a valid latitude (numerical only).")
m_logger.error(f"Invalid latitude entered: {latitude}.")
# 4. Longitude Entry Box
longitude = viewcontainer.text_input(
"Longitude for " + filename,
spoof_metadata.get('longitude', ""),
key=f"input_longitude_{image_hash}")
if longitude and not is_valid_number(longitude):
viewcontainer.error("Please enter a valid longitude (numerical only).")
m_logger.error(f"Invalid latitude entered: {latitude}.")
# 5. Date/time
## first from image metadata
if image_datetime is not None:
time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
else:
time_value = datetime.datetime.now().time() # Default to current time
date_value = datetime.datetime.now().date()
## if not, give user the option to enter manually
date_option = viewcontainer.date_input("Date for "+filename, value=date_value, key=f"input_date_{image_hash}")
time_option = viewcontainer.time_input("Time for "+filename, time_value, key=f"input_time_{image_hash}")
observation = InputObservation(image=image, latitude=latitude, longitude=longitude,
author_email=author_email, date=image_datetime, time=None,
date_option=date_option, time_option=time_option,
uploaded_file=file, image_md5=image_hash
)
# TODO: pass in the hash to InputObservation, so it is done once only. (need to refactor the class a bit)
return observation
def _setup_dynamic_inputs() -> None:
# for each file uploaded,
# - add the UI elements for the metadata
# - validate the data
# end of cycle should have observation objects set for each file.
# - and these go into session state
# load the files from the session state
uploaded_files = st.session_state.files
hashes = st.session_state.image_hashes
#images = st.session_state.images
observations = {}
for ix, file in enumerate(uploaded_files):
hash = hashes[ix]
observation = metadata_inputs_one_file(file, hash, ix)
old_obs = st.session_state.observations.get(hash, None)
if old_obs is not None:
if old_obs == observation:
m_logger.debug(f"[D] {ix}th observation is the same as before. retaining")
observations[hash] = old_obs
else:
m_logger.debug(f"[D] {ix}th observation is different from before. updating")
observations[hash] = observation
observation.show_diff(old_obs)
else:
m_logger.debug(f"[D] {ix}th observation is new (image_hash not seen before). Storing")
observations[hash] = observation
st.session_state.observations = observations
def _setup_oneoff_inputs() -> None:
'''
Add the UI input elements for which we have one each
'''
st.title("Input image and data")
# setup containers for consistent layout order with dynamic elements
#container_file_uploader = st.container(border=False, key="container_file_uploader")
container_file_uploader = st.session_state.container_file_uploader
# - a container for the dynamic input elements (this one matters)
#if "container_per_file_input_elems" not in st.session_state:
# if st.session_state.container_per_file_input_elems is None:
# #st.session_state.container_per_file_input_elems = None
# c = st.container(border=True, key="container_per_file_input_elems")
# with c:
# st.write("No files uploaded yet.")
# print(f"[D] initialised the container..... {id(c)} | {c=}")
# st.session_state.container_per_file_input_elems = c
# else:
# print(f"[D] already present, don't redo... {id(st.session_state.container_per_file_input_elems)} | {st.session_state.container_per_file_input_elems=}")
with container_file_uploader:
# 1. Input the author email
author_email = st.text_input("Author Email", spoof_metadata.get('author_email', ""),
key="input_author_email")
if author_email and not is_valid_email(author_email):
st.error("Please enter a valid email address.")
# 2. Image Selector
st.file_uploader("Upload one or more images", type=["png", 'jpg', 'jpeg', 'webp'],
accept_multiple_files=True,
key="file_uploader_data",
on_change=buffer_files)
if 1:
uploaded_files = st.session_state.file_uploader_data
for ix, file in enumerate(uploaded_files):
print(f"[DD] rechecking file {file.name}. {file.file_id} {file.type} {file.size}")
pass
def setup_input() -> None:
'''
Set up the user input handling (files and metadata)
It provides input fields for an image upload, and author email.
Then for each uploaded image,
- it provides input fields for lat/lon, date-time.
- In the ideal case, the image metadata will be used to populate location and datetime.
Data is stored in the Streamlit session state for downstream processing,
nothing is returned
'''
# configure the author email and file_uploader (with callback to buffer files)
_setup_oneoff_inputs()
# setup dynamic UI input elements, based on the data that is buffered in session_state
_setup_dynamic_inputs()