Spaces:
Sleeping
Sleeping
from typing import List, Tuple | |
import datetime | |
import logging | |
import hashlib | |
import streamlit as st | |
from streamlit.delta_generator import DeltaGenerator | |
from streamlit.runtime.uploaded_file_manager import UploadedFile | |
import cv2 | |
import numpy as np | |
from input.input_observation import InputObservation | |
from input.input_validator import get_image_datetime, is_valid_email, is_valid_number | |
m_logger = logging.getLogger(__name__) | |
m_logger.setLevel(logging.INFO) | |
''' | |
A module to setup the input handling for the whale observation guidance tool | |
both the UI elements (setup_input_UI) and the validation functions. | |
''' | |
allowed_image_types = ['jpg', 'jpeg', 'png', 'webp'] | |
# an arbitrary set of defaults so testing is less painful... | |
# ideally we add in some randomization to the defaults | |
spoof_metadata = { | |
"latitude": 0.5, | |
"longitude": 44, | |
"author_email": "[email protected]", | |
"date": None, | |
"time": None, | |
} | |
def check_inputs_are_set(empty_ok:bool=False, debug:bool=False) -> bool: | |
""" | |
Checks if all expected inputs have been entered | |
Implementation: via the Streamlit session state. | |
Args: | |
empty_ok (bool): If True, returns True if no inputs are set. Default is False. | |
debug (bool): If True, prints and logs the status of each expected input key. Default is False. | |
Returns: | |
bool: True if all expected input keys are set, False otherwise. | |
""" | |
image_hashes = st.session_state.image_hashes | |
if len(image_hashes) == 0: | |
return empty_ok | |
exp_input_key_stubs = ["input_latitude", "input_longitude", "input_date", "input_time"] | |
#exp_input_key_stubs = ["input_latitude", "input_longitude", "input_author_email", "input_date", "input_time", | |
vals = [] | |
# the author_email is global/one-off - no hash extension. | |
if "input_author_email" in st.session_state: | |
val = st.session_state["input_author_email"] | |
vals.append(val) | |
if debug: | |
msg = f"{'input_author_email':15}, {(val is not None):8}, {val}" | |
m_logger.debug(msg) | |
print(msg) | |
for image_hash in image_hashes: | |
for stub in exp_input_key_stubs: | |
key = f"{stub}_{image_hash}" | |
val = None | |
if key in st.session_state: | |
val = st.session_state[key] | |
# handle cases where it is defined but empty | |
# if val is a string and empty, set to None | |
if isinstance(val, str) and not val: | |
val = None | |
# if val is a list and empty, set to None (not sure what UI elements would return a list?) | |
if isinstance(val, list) and not val: | |
val = None | |
# number 0 is ok - possibly. could be on the equator, e.g. | |
vals.append(val) | |
if debug: | |
msg = f"{key:15}, {(val is not None):8}, {val}" | |
m_logger.debug(msg) | |
print(msg) | |
return all([v is not None for v in vals]) | |
def process_one_file(file:UploadedFile, ix:int=0) -> Tuple[np.ndarray, str, str, InputObservation]: | |
# do all the non-UI calcs | |
# add the UI elements | |
# and in-line, do processing/validation of the inputs | |
# - how to deal with the gathered data? a) push into session state, b) return all the elements needed? | |
#viewcontainer = st.sidebarif st.session_state.container_per_file_input_elems is None: | |
if st.session_state.container_metadata_inputs is not None: | |
viewcontainer = st.session_state.container_metadata_inputs | |
else: | |
viewcontainer = st.sidebar | |
msg = f"[W] `container_metadata_inputs` is None, using sidebar" | |
m_logger.warning(msg) ; print(msg) | |
# do all the non-UI calcs first | |
## get the bytes first, then convert into 1) image, 2) md5 | |
_bytes = file.read() | |
image_hash = hashlib.md5(_bytes).hexdigest() | |
#file_bytes = np.asarray(bytearray(_bytes), dtype=np.uint8) | |
image: np.ndarray = cv2.imdecode(np.asarray(bytearray(_bytes), dtype=np.uint8), 1) | |
filename:str = file.name | |
image_datetime = get_image_datetime(file) | |
m_logger.debug(f"image date extracted as {image_datetime} (from {file})") | |
author_email = st.session_state["input_author_email"] | |
# add the UI elements | |
viewcontainer.title(f"Metadata for {filename}") | |
ukey = image_hash | |
# 3. Latitude Entry Box | |
latitude = viewcontainer.text_input( | |
"Latitude for " + filename, | |
spoof_metadata.get('latitude', 0) + ix, | |
key=f"input_latitude_{ukey}") | |
if latitude and not is_valid_number(latitude): | |
viewcontainer.error("Please enter a valid latitude (numerical only).") | |
m_logger.error(f"Invalid latitude entered: {latitude}.") | |
# 4. Longitude Entry Box | |
longitude = viewcontainer.text_input( | |
"Longitude for " + filename, | |
spoof_metadata.get('longitude', ""), | |
key=f"input_longitude_{ukey}") | |
if longitude and not is_valid_number(longitude): | |
viewcontainer.error("Please enter a valid longitude (numerical only).") | |
m_logger.error(f"Invalid latitude entered: {latitude}.") | |
# 5. Date/time | |
## first from image metadata | |
if image_datetime is not None: | |
time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time() | |
date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date() | |
else: | |
time_value = datetime.datetime.now().time() # Default to current time | |
date_value = datetime.datetime.now().date() | |
## if not, give user the option to enter manually | |
date_option = viewcontainer.date_input("Date for "+filename, value=date_value, key=f"input_date_{ukey}") | |
time_option = viewcontainer.time_input("Time for "+filename, time_value, key=f"input_time_{ukey}") | |
observation = InputObservation(image=file, latitude=latitude, longitude=longitude, | |
author_email=author_email, date=image_datetime, time=None, | |
date_option=date_option, time_option=time_option, | |
uploaded_filename=file, | |
) | |
#the_data = [] \ | |
# + [image, file, image_hash, filename, ] \ | |
# + [latitude, longitude, date_option, time_option] | |
# TODO: pass in the hash to InputObservation, so it is done once only. (need to refactor the class a bit) | |
the_data = (image, image_hash, filename, observation) | |
return the_data | |
def buffer_files(): | |
# buffer info from the file_uploader that doesn't require further user input | |
# - the image, the hash, the filename | |
# a separate function takes care of per-file user inputs for metadata | |
# - this is necessary because dynamically producing more widgets should be | |
# avoided inside callbacks (tl;dr: they dissapear) | |
# - note that the UploadedFile objects have file_ids, which are unique to each file | |
# - these file_ids are not persistent between sessions, seem to just be random identifiers. | |
# get files from state | |
uploaded_files = st.session_state.file_uploader_data | |
filenames = [] | |
images = {} | |
image_hashes = [] | |
for ix, file in enumerate(uploaded_files): | |
filename:str = file.name | |
print(f"[D] processing {ix}th file {filename}. {file.file_id} {file.type} {file.size}") | |
# image to np and hash both require reading the file so do together | |
image, image_hash = load_file_and_hash(file) | |
filenames.append(filename) | |
image_hashes.append(image_hash) | |
images[image_hash] = image | |
st.session_state.images = images | |
st.session_state.files = uploaded_files | |
st.session_state.image_hashes = image_hashes | |
st.session_state.image_filenames = filenames | |
def load_file_and_hash(file:UploadedFile) -> Tuple[np.ndarray, str]: | |
# two operations that require reading the file done together for efficiency | |
# load the file, compute the hash, return the image and hash | |
_bytes = file.read() | |
image_hash = hashlib.md5(_bytes).hexdigest() | |
image: np.ndarray = cv2.imdecode(np.asarray(bytearray(_bytes), dtype=np.uint8), 1) | |
return (image, image_hash) | |
def process_files(): | |
# this is triggered whenever the uploaded files are changed. | |
# process one file: add UI elements, and process the inputs | |
# generate an observation from the return info | |
# finally, put all the relevant stuff into the session state | |
# - note: here we overwrite the session state, we aren't extending it. | |
# get files from state | |
uploaded_files = st.session_state.file_uploader_data | |
observations = {} | |
images = {} | |
image_hashes = [] | |
filenames = [] | |
for ix, file in enumerate(uploaded_files): | |
print(f"[D] processing file {file.name}. {file.file_id} {file.type} {file.size}") | |
(image, image_hash, filename, observation) = process_one_file(file, ix) | |
# big old debug because of pain. | |
filenames.append(filename) | |
image_hashes.append(image_hash) | |
observations[image_hash] = observation | |
images[image_hash] = image | |
st.session_state.images = images | |
st.session_state.files = uploaded_files | |
st.session_state.observations = observations | |
st.session_state.image_hashes = image_hashes | |
st.session_state.image_filenames = filenames | |
def metadata_inputs_one_file(file:UploadedFile, ukey:str, dbg_ix:int=0) -> InputObservation: | |
# dbg_ix is a hack to have different data in each input group, checking persistence | |
if st.session_state.container_metadata_inputs is not None: | |
_viewcontainer = st.session_state.container_metadata_inputs | |
else: | |
_viewcontainer = st.sidebar | |
print(f"[W] `container_metadata_inputs` is None, using sidebar") | |
author_email = st.session_state["input_author_email"] | |
filename = file.name | |
image_datetime = get_image_datetime(file) | |
# add the UI elements | |
#viewcontainer.title(f"Metadata for {filename}") | |
viewcontainer = _viewcontainer.expander(f"Metadata for {file.name}", expanded=True) | |
# TODO: use session state so any changes are persisted within session -- currently I think | |
# we are going to take the defaults over and over again -- if the user adjusts coords, or date, it will get lost | |
# - it is a bit complicated, if no values change, they persist (the widget definition: params, name, key, etc) | |
# even if the code is re-run. but if the value changes, it is lost. | |
# 3. Latitude Entry Box | |
latitude = viewcontainer.text_input( | |
"Latitude for " + filename, | |
spoof_metadata.get('latitude', 0) + dbg_ix, | |
key=f"input_latitude_{ukey}") | |
if latitude and not is_valid_number(latitude): | |
viewcontainer.error("Please enter a valid latitude (numerical only).") | |
m_logger.error(f"Invalid latitude entered: {latitude}.") | |
# 4. Longitude Entry Box | |
longitude = viewcontainer.text_input( | |
"Longitude for " + filename, | |
spoof_metadata.get('longitude', ""), | |
key=f"input_longitude_{ukey}") | |
if longitude and not is_valid_number(longitude): | |
viewcontainer.error("Please enter a valid longitude (numerical only).") | |
m_logger.error(f"Invalid latitude entered: {latitude}.") | |
# 5. Date/time | |
## first from image metadata | |
if image_datetime is not None: | |
time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time() | |
date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date() | |
else: | |
time_value = datetime.datetime.now().time() # Default to current time | |
date_value = datetime.datetime.now().date() | |
## if not, give user the option to enter manually | |
date_option = viewcontainer.date_input("Date for "+filename, value=date_value, key=f"input_date_{ukey}") | |
time_option = viewcontainer.time_input("Time for "+filename, time_value, key=f"input_time_{ukey}") | |
observation = InputObservation(image=file, latitude=latitude, longitude=longitude, | |
author_email=author_email, date=image_datetime, time=None, | |
date_option=date_option, time_option=time_option, | |
uploaded_filename=file, | |
) | |
# TODO: pass in the hash to InputObservation, so it is done once only. (need to refactor the class a bit) | |
return observation | |
def _setup_dynamic_inputs() -> None: | |
# for each file uploaded, | |
# - add the UI elements for the metadata | |
# - validate the data | |
# end of cycle should have observation objects set for each file. | |
# - and these go into session state | |
# load the files from the session state | |
uploaded_files = st.session_state.files | |
hashes = st.session_state.image_hashes | |
#images = st.session_state.images | |
observations = {} | |
for ix, file in enumerate(uploaded_files): | |
hash = hashes[ix] | |
observation = metadata_inputs_one_file(file, hash, ix) | |
observations[hash] = observation | |
st.session_state.observations = observations | |
def _setup_oneoff_inputs() -> None: | |
''' | |
Add the UI input elements for which we have one each | |
''' | |
st.title("Input image and data") | |
# setup containers for consistent layout order with dynamic elements | |
#container_file_uploader = st.container(border=False, key="container_file_uploader") | |
container_file_uploader = st.session_state.container_file_uploader | |
# - a container for the dynamic input elements (this one matters) | |
#if "container_per_file_input_elems" not in st.session_state: | |
# if st.session_state.container_per_file_input_elems is None: | |
# #st.session_state.container_per_file_input_elems = None | |
# c = st.container(border=True, key="container_per_file_input_elems") | |
# with c: | |
# st.write("No files uploaded yet.") | |
# print(f"[D] initialised the container..... {id(c)} | {c=}") | |
# st.session_state.container_per_file_input_elems = c | |
# else: | |
# print(f"[D] already present, don't redo... {id(st.session_state.container_per_file_input_elems)} | {st.session_state.container_per_file_input_elems=}") | |
with container_file_uploader: | |
# 1. Input the author email | |
author_email = st.text_input("Author Email", spoof_metadata.get('author_email', ""), | |
key="input_author_email") | |
if author_email and not is_valid_email(author_email): | |
st.error("Please enter a valid email address.") | |
# 2. Image Selector | |
st.file_uploader("Upload one or more images", type=["png", 'jpg', 'jpeg', 'webp'], | |
accept_multiple_files=True, | |
key="file_uploader_data", | |
#on_change=process_files) | |
on_change=buffer_files) | |
if 1: | |
uploaded_files = st.session_state.file_uploader_data | |
for ix, file in enumerate(uploaded_files): | |
print(f"[DD] rechecking file {file.name}. {file.file_id} {file.type} {file.size}") | |
pass | |
def setup_input( | |
viewcontainer: DeltaGenerator=None, | |
_allowed_image_types: list=None, ) -> None: | |
''' | |
Set up the input handling for the whale observation guidance tool | |
''' | |
_setup_oneoff_inputs() | |
# amazingly we just have to add the uploader and its callback, and the rest is dynamic. | |
# or not... the situation is more complex :( | |
# setup dynamic UI input elements, based on the data that is buffered in session_state | |
_setup_dynamic_inputs() | |
def setup_input_monolithic( | |
viewcontainer: DeltaGenerator=None, | |
_allowed_image_types: list=None, ) -> InputObservation: | |
""" | |
Sets up the input interface for uploading an image and entering metadata. | |
It provides input fields for an image upload, lat/lon, author email, and date-time. | |
In the ideal case, the image metadata will be used to populate location and datetime. | |
Parameters: | |
viewcontainer (DeltaGenerator, optional): The Streamlit container to use for the input interface. Defaults to st.sidebar. | |
_allowed_image_types (list, optional): List of allowed image file types for upload. Defaults to allowed_image_types. | |
Returns: | |
InputObservation: An object containing the uploaded image and entered metadata. | |
""" | |
if viewcontainer is None: | |
viewcontainer = st.sidebar | |
if _allowed_image_types is None: | |
_allowed_image_types = allowed_image_types | |
viewcontainer.title("Input image and data") | |
# 1. Input the author email | |
author_email = viewcontainer.text_input("Author Email", spoof_metadata.get('author_email', "")) | |
if author_email and not is_valid_email(author_email): | |
viewcontainer.error("Please enter a valid email address.") | |
# 2. Image Selector | |
uploaded_files = viewcontainer.file_uploader("Upload an image", type=allowed_image_types, accept_multiple_files=True) | |
observations = {} | |
images = {} | |
image_hashes = [] | |
filenames = [] | |
if uploaded_files is not None: | |
for file in uploaded_files: | |
viewcontainer.title(f"Metadata for {file.name}") | |
# Display the uploaded image | |
# load image using cv2 format, so it is compatible with the ML models | |
file_bytes = np.asarray(bytearray(file.read()), dtype=np.uint8) | |
filename = file.name | |
filenames.append(filename) | |
image = cv2.imdecode(file_bytes, 1) | |
# Extract and display image date-time | |
image_datetime = None # For storing date-time from image | |
image_datetime = get_image_datetime(file) | |
m_logger.debug(f"image date extracted as {image_datetime} (from {uploaded_files})") | |
# 3. Latitude Entry Box | |
latitude = viewcontainer.text_input( | |
"Latitude for "+filename, | |
spoof_metadata.get('latitude', ""), | |
key=f"input_latitude_{filename}") | |
if latitude and not is_valid_number(latitude): | |
viewcontainer.error("Please enter a valid latitude (numerical only).") | |
m_logger.error(f"Invalid latitude entered: {latitude}.") | |
# 4. Longitude Entry Box | |
longitude = viewcontainer.text_input( | |
"Longitude for "+filename, | |
spoof_metadata.get('longitude', ""), | |
key=f"input_longitude_{filename}") | |
if longitude and not is_valid_number(longitude): | |
viewcontainer.error("Please enter a valid longitude (numerical only).") | |
m_logger.error(f"Invalid latitude entered: {latitude}.") | |
# 5. Date/time | |
## first from image metadata | |
if image_datetime is not None: | |
time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time() | |
date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date() | |
else: | |
time_value = datetime.datetime.now().time() # Default to current time | |
date_value = datetime.datetime.now().date() | |
## if not, give user the option to enter manually | |
date_option = st.sidebar.date_input("Date for "+filename, value=date_value) | |
time_option = st.sidebar.time_input("Time for "+filename, time_value) | |
observation = InputObservation(image=file, latitude=latitude, longitude=longitude, | |
author_email=author_email, date=image_datetime, time=None, | |
date_option=date_option, time_option=time_option) | |
image_hash = observation.to_dict()["image_md5"] | |
observations[image_hash] = observation | |
images[image_hash] = image | |
image_hashes.append(image_hash) | |
st.session_state.images = images | |
st.session_state.files = uploaded_files | |
st.session_state.observations = observations | |
st.session_state.image_hashes = image_hashes | |
st.session_state.image_filenames = filenames | |