saving-willy-dev / src /input /input_handling.py
rmm
feat: separated file input and metadata input functions
6a02bc4
raw
history blame
20.5 kB
from typing import List, Tuple
import datetime
import logging
import hashlib
import streamlit as st
from streamlit.delta_generator import DeltaGenerator
from streamlit.runtime.uploaded_file_manager import UploadedFile
import cv2
import numpy as np
from input.input_observation import InputObservation
from input.input_validator import get_image_datetime, is_valid_email, is_valid_number
m_logger = logging.getLogger(__name__)
m_logger.setLevel(logging.INFO)
'''
A module to setup the input handling for the whale observation guidance tool
both the UI elements (setup_input_UI) and the validation functions.
'''
allowed_image_types = ['jpg', 'jpeg', 'png', 'webp']
# an arbitrary set of defaults so testing is less painful...
# ideally we add in some randomization to the defaults
spoof_metadata = {
"latitude": 0.5,
"longitude": 44,
"author_email": "[email protected]",
"date": None,
"time": None,
}
def check_inputs_are_set(empty_ok:bool=False, debug:bool=False) -> bool:
"""
Checks if all expected inputs have been entered
Implementation: via the Streamlit session state.
Args:
empty_ok (bool): If True, returns True if no inputs are set. Default is False.
debug (bool): If True, prints and logs the status of each expected input key. Default is False.
Returns:
bool: True if all expected input keys are set, False otherwise.
"""
image_hashes = st.session_state.image_hashes
if len(image_hashes) == 0:
return empty_ok
exp_input_key_stubs = ["input_latitude", "input_longitude", "input_date", "input_time"]
#exp_input_key_stubs = ["input_latitude", "input_longitude", "input_author_email", "input_date", "input_time",
vals = []
# the author_email is global/one-off - no hash extension.
if "input_author_email" in st.session_state:
val = st.session_state["input_author_email"]
vals.append(val)
if debug:
msg = f"{'input_author_email':15}, {(val is not None):8}, {val}"
m_logger.debug(msg)
print(msg)
for image_hash in image_hashes:
for stub in exp_input_key_stubs:
key = f"{stub}_{image_hash}"
val = None
if key in st.session_state:
val = st.session_state[key]
# handle cases where it is defined but empty
# if val is a string and empty, set to None
if isinstance(val, str) and not val:
val = None
# if val is a list and empty, set to None (not sure what UI elements would return a list?)
if isinstance(val, list) and not val:
val = None
# number 0 is ok - possibly. could be on the equator, e.g.
vals.append(val)
if debug:
msg = f"{key:15}, {(val is not None):8}, {val}"
m_logger.debug(msg)
print(msg)
return all([v is not None for v in vals])
def process_one_file(file:UploadedFile, ix:int=0) -> Tuple[np.ndarray, str, str, InputObservation]:
# do all the non-UI calcs
# add the UI elements
# and in-line, do processing/validation of the inputs
# - how to deal with the gathered data? a) push into session state, b) return all the elements needed?
#viewcontainer = st.sidebarif st.session_state.container_per_file_input_elems is None:
if st.session_state.container_metadata_inputs is not None:
viewcontainer = st.session_state.container_metadata_inputs
else:
viewcontainer = st.sidebar
msg = f"[W] `container_metadata_inputs` is None, using sidebar"
m_logger.warning(msg) ; print(msg)
# do all the non-UI calcs first
## get the bytes first, then convert into 1) image, 2) md5
_bytes = file.read()
image_hash = hashlib.md5(_bytes).hexdigest()
#file_bytes = np.asarray(bytearray(_bytes), dtype=np.uint8)
image: np.ndarray = cv2.imdecode(np.asarray(bytearray(_bytes), dtype=np.uint8), 1)
filename:str = file.name
image_datetime = get_image_datetime(file)
m_logger.debug(f"image date extracted as {image_datetime} (from {file})")
author_email = st.session_state["input_author_email"]
# add the UI elements
viewcontainer.title(f"Metadata for {filename}")
ukey = image_hash
# 3. Latitude Entry Box
latitude = viewcontainer.text_input(
"Latitude for " + filename,
spoof_metadata.get('latitude', 0) + ix,
key=f"input_latitude_{ukey}")
if latitude and not is_valid_number(latitude):
viewcontainer.error("Please enter a valid latitude (numerical only).")
m_logger.error(f"Invalid latitude entered: {latitude}.")
# 4. Longitude Entry Box
longitude = viewcontainer.text_input(
"Longitude for " + filename,
spoof_metadata.get('longitude', ""),
key=f"input_longitude_{ukey}")
if longitude and not is_valid_number(longitude):
viewcontainer.error("Please enter a valid longitude (numerical only).")
m_logger.error(f"Invalid latitude entered: {latitude}.")
# 5. Date/time
## first from image metadata
if image_datetime is not None:
time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
else:
time_value = datetime.datetime.now().time() # Default to current time
date_value = datetime.datetime.now().date()
## if not, give user the option to enter manually
date_option = viewcontainer.date_input("Date for "+filename, value=date_value, key=f"input_date_{ukey}")
time_option = viewcontainer.time_input("Time for "+filename, time_value, key=f"input_time_{ukey}")
observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
author_email=author_email, date=image_datetime, time=None,
date_option=date_option, time_option=time_option,
uploaded_filename=file,
)
#the_data = [] \
# + [image, file, image_hash, filename, ] \
# + [latitude, longitude, date_option, time_option]
# TODO: pass in the hash to InputObservation, so it is done once only. (need to refactor the class a bit)
the_data = (image, image_hash, filename, observation)
return the_data
def buffer_files():
# buffer info from the file_uploader that doesn't require further user input
# - the image, the hash, the filename
# a separate function takes care of per-file user inputs for metadata
# - this is necessary because dynamically producing more widgets should be
# avoided inside callbacks (tl;dr: they dissapear)
# - note that the UploadedFile objects have file_ids, which are unique to each file
# - these file_ids are not persistent between sessions, seem to just be random identifiers.
# get files from state
uploaded_files = st.session_state.file_uploader_data
filenames = []
images = {}
image_hashes = []
for ix, file in enumerate(uploaded_files):
filename:str = file.name
print(f"[D] processing {ix}th file {filename}. {file.file_id} {file.type} {file.size}")
# image to np and hash both require reading the file so do together
image, image_hash = load_file_and_hash(file)
filenames.append(filename)
image_hashes.append(image_hash)
images[image_hash] = image
st.session_state.images = images
st.session_state.files = uploaded_files
st.session_state.image_hashes = image_hashes
st.session_state.image_filenames = filenames
def load_file_and_hash(file:UploadedFile) -> Tuple[np.ndarray, str]:
# two operations that require reading the file done together for efficiency
# load the file, compute the hash, return the image and hash
_bytes = file.read()
image_hash = hashlib.md5(_bytes).hexdigest()
image: np.ndarray = cv2.imdecode(np.asarray(bytearray(_bytes), dtype=np.uint8), 1)
return (image, image_hash)
def process_files():
# this is triggered whenever the uploaded files are changed.
# process one file: add UI elements, and process the inputs
# generate an observation from the return info
# finally, put all the relevant stuff into the session state
# - note: here we overwrite the session state, we aren't extending it.
# get files from state
uploaded_files = st.session_state.file_uploader_data
observations = {}
images = {}
image_hashes = []
filenames = []
for ix, file in enumerate(uploaded_files):
print(f"[D] processing file {file.name}. {file.file_id} {file.type} {file.size}")
(image, image_hash, filename, observation) = process_one_file(file, ix)
# big old debug because of pain.
filenames.append(filename)
image_hashes.append(image_hash)
observations[image_hash] = observation
images[image_hash] = image
st.session_state.images = images
st.session_state.files = uploaded_files
st.session_state.observations = observations
st.session_state.image_hashes = image_hashes
st.session_state.image_filenames = filenames
def metadata_inputs_one_file(file:UploadedFile, ukey:str, dbg_ix:int=0) -> InputObservation:
# dbg_ix is a hack to have different data in each input group, checking persistence
if st.session_state.container_metadata_inputs is not None:
_viewcontainer = st.session_state.container_metadata_inputs
else:
_viewcontainer = st.sidebar
print(f"[W] `container_metadata_inputs` is None, using sidebar")
author_email = st.session_state["input_author_email"]
filename = file.name
image_datetime = get_image_datetime(file)
# add the UI elements
#viewcontainer.title(f"Metadata for {filename}")
viewcontainer = _viewcontainer.expander(f"Metadata for {file.name}", expanded=True)
# TODO: use session state so any changes are persisted within session -- currently I think
# we are going to take the defaults over and over again -- if the user adjusts coords, or date, it will get lost
# - it is a bit complicated, if no values change, they persist (the widget definition: params, name, key, etc)
# even if the code is re-run. but if the value changes, it is lost.
# 3. Latitude Entry Box
latitude = viewcontainer.text_input(
"Latitude for " + filename,
spoof_metadata.get('latitude', 0) + dbg_ix,
key=f"input_latitude_{ukey}")
if latitude and not is_valid_number(latitude):
viewcontainer.error("Please enter a valid latitude (numerical only).")
m_logger.error(f"Invalid latitude entered: {latitude}.")
# 4. Longitude Entry Box
longitude = viewcontainer.text_input(
"Longitude for " + filename,
spoof_metadata.get('longitude', ""),
key=f"input_longitude_{ukey}")
if longitude and not is_valid_number(longitude):
viewcontainer.error("Please enter a valid longitude (numerical only).")
m_logger.error(f"Invalid latitude entered: {latitude}.")
# 5. Date/time
## first from image metadata
if image_datetime is not None:
time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
else:
time_value = datetime.datetime.now().time() # Default to current time
date_value = datetime.datetime.now().date()
## if not, give user the option to enter manually
date_option = viewcontainer.date_input("Date for "+filename, value=date_value, key=f"input_date_{ukey}")
time_option = viewcontainer.time_input("Time for "+filename, time_value, key=f"input_time_{ukey}")
observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
author_email=author_email, date=image_datetime, time=None,
date_option=date_option, time_option=time_option,
uploaded_filename=file,
)
# TODO: pass in the hash to InputObservation, so it is done once only. (need to refactor the class a bit)
return observation
def _setup_dynamic_inputs() -> None:
# for each file uploaded,
# - add the UI elements for the metadata
# - validate the data
# end of cycle should have observation objects set for each file.
# - and these go into session state
# load the files from the session state
uploaded_files = st.session_state.files
hashes = st.session_state.image_hashes
#images = st.session_state.images
observations = {}
for ix, file in enumerate(uploaded_files):
hash = hashes[ix]
observation = metadata_inputs_one_file(file, hash, ix)
observations[hash] = observation
st.session_state.observations = observations
def _setup_oneoff_inputs() -> None:
'''
Add the UI input elements for which we have one each
'''
st.title("Input image and data")
# setup containers for consistent layout order with dynamic elements
#container_file_uploader = st.container(border=False, key="container_file_uploader")
container_file_uploader = st.session_state.container_file_uploader
# - a container for the dynamic input elements (this one matters)
#if "container_per_file_input_elems" not in st.session_state:
# if st.session_state.container_per_file_input_elems is None:
# #st.session_state.container_per_file_input_elems = None
# c = st.container(border=True, key="container_per_file_input_elems")
# with c:
# st.write("No files uploaded yet.")
# print(f"[D] initialised the container..... {id(c)} | {c=}")
# st.session_state.container_per_file_input_elems = c
# else:
# print(f"[D] already present, don't redo... {id(st.session_state.container_per_file_input_elems)} | {st.session_state.container_per_file_input_elems=}")
with container_file_uploader:
# 1. Input the author email
author_email = st.text_input("Author Email", spoof_metadata.get('author_email', ""),
key="input_author_email")
if author_email and not is_valid_email(author_email):
st.error("Please enter a valid email address.")
# 2. Image Selector
st.file_uploader("Upload one or more images", type=["png", 'jpg', 'jpeg', 'webp'],
accept_multiple_files=True,
key="file_uploader_data",
#on_change=process_files)
on_change=buffer_files)
if 1:
uploaded_files = st.session_state.file_uploader_data
for ix, file in enumerate(uploaded_files):
print(f"[DD] rechecking file {file.name}. {file.file_id} {file.type} {file.size}")
pass
def setup_input(
viewcontainer: DeltaGenerator=None,
_allowed_image_types: list=None, ) -> None:
'''
Set up the input handling for the whale observation guidance tool
'''
_setup_oneoff_inputs()
# amazingly we just have to add the uploader and its callback, and the rest is dynamic.
# or not... the situation is more complex :(
# setup dynamic UI input elements, based on the data that is buffered in session_state
_setup_dynamic_inputs()
def setup_input_monolithic(
viewcontainer: DeltaGenerator=None,
_allowed_image_types: list=None, ) -> InputObservation:
"""
Sets up the input interface for uploading an image and entering metadata.
It provides input fields for an image upload, lat/lon, author email, and date-time.
In the ideal case, the image metadata will be used to populate location and datetime.
Parameters:
viewcontainer (DeltaGenerator, optional): The Streamlit container to use for the input interface. Defaults to st.sidebar.
_allowed_image_types (list, optional): List of allowed image file types for upload. Defaults to allowed_image_types.
Returns:
InputObservation: An object containing the uploaded image and entered metadata.
"""
if viewcontainer is None:
viewcontainer = st.sidebar
if _allowed_image_types is None:
_allowed_image_types = allowed_image_types
viewcontainer.title("Input image and data")
# 1. Input the author email
author_email = viewcontainer.text_input("Author Email", spoof_metadata.get('author_email', ""))
if author_email and not is_valid_email(author_email):
viewcontainer.error("Please enter a valid email address.")
# 2. Image Selector
uploaded_files = viewcontainer.file_uploader("Upload an image", type=allowed_image_types, accept_multiple_files=True)
observations = {}
images = {}
image_hashes = []
filenames = []
if uploaded_files is not None:
for file in uploaded_files:
viewcontainer.title(f"Metadata for {file.name}")
# Display the uploaded image
# load image using cv2 format, so it is compatible with the ML models
file_bytes = np.asarray(bytearray(file.read()), dtype=np.uint8)
filename = file.name
filenames.append(filename)
image = cv2.imdecode(file_bytes, 1)
# Extract and display image date-time
image_datetime = None # For storing date-time from image
image_datetime = get_image_datetime(file)
m_logger.debug(f"image date extracted as {image_datetime} (from {uploaded_files})")
# 3. Latitude Entry Box
latitude = viewcontainer.text_input(
"Latitude for "+filename,
spoof_metadata.get('latitude', ""),
key=f"input_latitude_{filename}")
if latitude and not is_valid_number(latitude):
viewcontainer.error("Please enter a valid latitude (numerical only).")
m_logger.error(f"Invalid latitude entered: {latitude}.")
# 4. Longitude Entry Box
longitude = viewcontainer.text_input(
"Longitude for "+filename,
spoof_metadata.get('longitude', ""),
key=f"input_longitude_{filename}")
if longitude and not is_valid_number(longitude):
viewcontainer.error("Please enter a valid longitude (numerical only).")
m_logger.error(f"Invalid latitude entered: {latitude}.")
# 5. Date/time
## first from image metadata
if image_datetime is not None:
time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
else:
time_value = datetime.datetime.now().time() # Default to current time
date_value = datetime.datetime.now().date()
## if not, give user the option to enter manually
date_option = st.sidebar.date_input("Date for "+filename, value=date_value)
time_option = st.sidebar.time_input("Time for "+filename, time_value)
observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
author_email=author_email, date=image_datetime, time=None,
date_option=date_option, time_option=time_option)
image_hash = observation.to_dict()["image_md5"]
observations[image_hash] = observation
images[image_hash] = image
image_hashes.append(image_hash)
st.session_state.images = images
st.session_state.files = uploaded_files
st.session_state.observations = observations
st.session_state.image_hashes = image_hashes
st.session_state.image_filenames = filenames