Spaces:

Saving-Willy
/

saving-willy-dev

Sleeping

saving-willy-dev / src /input /input_handling.py

rmm

feat: separated file input and metadata input functions

6a02bc4 3 months ago

20.5 kB

	from typing import List, Tuple
	import datetime
	import logging
	import hashlib

	import streamlit as st
	from streamlit.delta_generator import DeltaGenerator
	from streamlit.runtime.uploaded_file_manager import UploadedFile

	import cv2
	import numpy as np

	from input.input_observation import InputObservation
	from input.input_validator import get_image_datetime, is_valid_email, is_valid_number

	m_logger = logging.getLogger(__name__)
	m_logger.setLevel(logging.INFO)

	'''
	A module to setup the input handling for the whale observation guidance tool

	both the UI elements (setup_input_UI) and the validation functions.
	'''
	allowed_image_types = ['jpg', 'jpeg', 'png', 'webp']

	# an arbitrary set of defaults so testing is less painful...
	# ideally we add in some randomization to the defaults
	spoof_metadata = {
	"latitude": 0.5,
	"longitude": 44,
	"author_email": "[email protected]",
	"date": None,
	"time": None,
	}

	def check_inputs_are_set(empty_ok:bool=False, debug:bool=False) -> bool:
	"""
	Checks if all expected inputs have been entered

	Implementation: via the Streamlit session state.

	Args:
	empty_ok (bool): If True, returns True if no inputs are set. Default is False.
	debug (bool): If True, prints and logs the status of each expected input key. Default is False.
	Returns:
	bool: True if all expected input keys are set, False otherwise.
	"""
	image_hashes = st.session_state.image_hashes
	if len(image_hashes) == 0:
	return empty_ok

	exp_input_key_stubs = ["input_latitude", "input_longitude", "input_date", "input_time"]
	#exp_input_key_stubs = ["input_latitude", "input_longitude", "input_author_email", "input_date", "input_time",

	vals = []
	# the author_email is global/one-off - no hash extension.
	if "input_author_email" in st.session_state:
	val = st.session_state["input_author_email"]
	vals.append(val)
	if debug:
	msg = f"{'input_author_email':15}, {(val is not None):8}, {val}"
	m_logger.debug(msg)
	print(msg)


	for image_hash in image_hashes:
	for stub in exp_input_key_stubs:
	key = f"{stub}_{image_hash}"
	val = None
	if key in st.session_state:
	val = st.session_state[key]

	# handle cases where it is defined but empty
	# if val is a string and empty, set to None
	if isinstance(val, str) and not val:
	val = None
	# if val is a list and empty, set to None (not sure what UI elements would return a list?)
	if isinstance(val, list) and not val:
	val = None
	# number 0 is ok - possibly. could be on the equator, e.g.

	vals.append(val)
	if debug:
	msg = f"{key:15}, {(val is not None):8}, {val}"
	m_logger.debug(msg)
	print(msg)



	return all([v is not None for v in vals])


	def process_one_file(file:UploadedFile, ix:int=0) -> Tuple[np.ndarray, str, str, InputObservation]:
	# do all the non-UI calcs
	# add the UI elements
	# and in-line, do processing/validation of the inputs
	# - how to deal with the gathered data? a) push into session state, b) return all the elements needed?

	#viewcontainer = st.sidebarif st.session_state.container_per_file_input_elems is None:
	if st.session_state.container_metadata_inputs is not None:
	viewcontainer = st.session_state.container_metadata_inputs
	else:
	viewcontainer = st.sidebar
	msg = f"[W] `container_metadata_inputs` is None, using sidebar"
	m_logger.warning(msg) ; print(msg)


	# do all the non-UI calcs first
	## get the bytes first, then convert into 1) image, 2) md5
	_bytes = file.read()
	image_hash = hashlib.md5(_bytes).hexdigest()
	#file_bytes = np.asarray(bytearray(_bytes), dtype=np.uint8)
	image: np.ndarray = cv2.imdecode(np.asarray(bytearray(_bytes), dtype=np.uint8), 1)
	filename:str = file.name
	image_datetime = get_image_datetime(file)
	m_logger.debug(f"image date extracted as {image_datetime} (from {file})")

	author_email = st.session_state["input_author_email"]


	# add the UI elements
	viewcontainer.title(f"Metadata for {filename}")
	ukey = image_hash

	# 3. Latitude Entry Box
	latitude = viewcontainer.text_input(
	"Latitude for " + filename,
	spoof_metadata.get('latitude', 0) + ix,
	key=f"input_latitude_{ukey}")
	if latitude and not is_valid_number(latitude):
	viewcontainer.error("Please enter a valid latitude (numerical only).")
	m_logger.error(f"Invalid latitude entered: {latitude}.")
	# 4. Longitude Entry Box
	longitude = viewcontainer.text_input(
	"Longitude for " + filename,
	spoof_metadata.get('longitude', ""),
	key=f"input_longitude_{ukey}")
	if longitude and not is_valid_number(longitude):
	viewcontainer.error("Please enter a valid longitude (numerical only).")
	m_logger.error(f"Invalid latitude entered: {latitude}.")

	# 5. Date/time
	## first from image metadata
	if image_datetime is not None:
	time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
	date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
	else:
	time_value = datetime.datetime.now().time() # Default to current time
	date_value = datetime.datetime.now().date()

	## if not, give user the option to enter manually
	date_option = viewcontainer.date_input("Date for "+filename, value=date_value, key=f"input_date_{ukey}")
	time_option = viewcontainer.time_input("Time for "+filename, time_value, key=f"input_time_{ukey}")

	observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
	author_email=author_email, date=image_datetime, time=None,
	date_option=date_option, time_option=time_option,
	uploaded_filename=file,
	)

	#the_data = [] \
	# + [image, file, image_hash, filename, ] \
	# + [latitude, longitude, date_option, time_option]
	# TODO: pass in the hash to InputObservation, so it is done once only. (need to refactor the class a bit)

	the_data = (image, image_hash, filename, observation)

	return the_data


	def buffer_files():
	# buffer info from the file_uploader that doesn't require further user input
	# - the image, the hash, the filename
	# a separate function takes care of per-file user inputs for metadata
	# - this is necessary because dynamically producing more widgets should be
	# avoided inside callbacks (tl;dr: they dissapear)

	# - note that the UploadedFile objects have file_ids, which are unique to each file
	# - these file_ids are not persistent between sessions, seem to just be random identifiers.


	# get files from state
	uploaded_files = st.session_state.file_uploader_data

	filenames = []
	images = {}
	image_hashes = []

	for ix, file in enumerate(uploaded_files):
	filename:str = file.name
	print(f"[D] processing {ix}th file {filename}. {file.file_id} {file.type} {file.size}")
	# image to np and hash both require reading the file so do together
	image, image_hash = load_file_and_hash(file)

	filenames.append(filename)
	image_hashes.append(image_hash)

	images[image_hash] = image

	st.session_state.images = images
	st.session_state.files = uploaded_files
	st.session_state.image_hashes = image_hashes
	st.session_state.image_filenames = filenames


	def load_file_and_hash(file:UploadedFile) -> Tuple[np.ndarray, str]:
	# two operations that require reading the file done together for efficiency
	# load the file, compute the hash, return the image and hash
	_bytes = file.read()
	image_hash = hashlib.md5(_bytes).hexdigest()
	image: np.ndarray = cv2.imdecode(np.asarray(bytearray(_bytes), dtype=np.uint8), 1)

	return (image, image_hash)



	def process_files():
	# this is triggered whenever the uploaded files are changed.

	# process one file: add UI elements, and process the inputs
	# generate an observation from the return info
	# finally, put all the relevant stuff into the session state
	# - note: here we overwrite the session state, we aren't extending it.

	# get files from state
	uploaded_files = st.session_state.file_uploader_data

	observations = {}
	images = {}
	image_hashes = []
	filenames = []

	for ix, file in enumerate(uploaded_files):
	print(f"[D] processing file {file.name}. {file.file_id} {file.type} {file.size}")
	(image, image_hash, filename, observation) = process_one_file(file, ix)
	# big old debug because of pain.

	filenames.append(filename)
	image_hashes.append(image_hash)

	observations[image_hash] = observation
	images[image_hash] = image

	st.session_state.images = images
	st.session_state.files = uploaded_files
	st.session_state.observations = observations
	st.session_state.image_hashes = image_hashes
	st.session_state.image_filenames = filenames


	def metadata_inputs_one_file(file:UploadedFile, ukey:str, dbg_ix:int=0) -> InputObservation:
	# dbg_ix is a hack to have different data in each input group, checking persistence

	if st.session_state.container_metadata_inputs is not None:
	_viewcontainer = st.session_state.container_metadata_inputs
	else:
	_viewcontainer = st.sidebar
	print(f"[W] `container_metadata_inputs` is None, using sidebar")



	author_email = st.session_state["input_author_email"]
	filename = file.name
	image_datetime = get_image_datetime(file)
	# add the UI elements
	#viewcontainer.title(f"Metadata for {filename}")
	viewcontainer = _viewcontainer.expander(f"Metadata for {file.name}", expanded=True)

	# TODO: use session state so any changes are persisted within session -- currently I think
	# we are going to take the defaults over and over again -- if the user adjusts coords, or date, it will get lost
	# - it is a bit complicated, if no values change, they persist (the widget definition: params, name, key, etc)
	# even if the code is re-run. but if the value changes, it is lost.


	# 3. Latitude Entry Box
	latitude = viewcontainer.text_input(
	"Latitude for " + filename,
	spoof_metadata.get('latitude', 0) + dbg_ix,
	key=f"input_latitude_{ukey}")
	if latitude and not is_valid_number(latitude):
	viewcontainer.error("Please enter a valid latitude (numerical only).")
	m_logger.error(f"Invalid latitude entered: {latitude}.")
	# 4. Longitude Entry Box
	longitude = viewcontainer.text_input(
	"Longitude for " + filename,
	spoof_metadata.get('longitude', ""),
	key=f"input_longitude_{ukey}")
	if longitude and not is_valid_number(longitude):
	viewcontainer.error("Please enter a valid longitude (numerical only).")
	m_logger.error(f"Invalid latitude entered: {latitude}.")

	# 5. Date/time
	## first from image metadata
	if image_datetime is not None:
	time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
	date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
	else:
	time_value = datetime.datetime.now().time() # Default to current time
	date_value = datetime.datetime.now().date()

	## if not, give user the option to enter manually
	date_option = viewcontainer.date_input("Date for "+filename, value=date_value, key=f"input_date_{ukey}")
	time_option = viewcontainer.time_input("Time for "+filename, time_value, key=f"input_time_{ukey}")

	observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
	author_email=author_email, date=image_datetime, time=None,
	date_option=date_option, time_option=time_option,
	uploaded_filename=file,
	)

	# TODO: pass in the hash to InputObservation, so it is done once only. (need to refactor the class a bit)
	return observation



	def _setup_dynamic_inputs() -> None:

	# for each file uploaded,
	# - add the UI elements for the metadata
	# - validate the data
	# end of cycle should have observation objects set for each file.
	# - and these go into session state

	# load the files from the session state
	uploaded_files = st.session_state.files
	hashes = st.session_state.image_hashes
	#images = st.session_state.images
	observations = {}

	for ix, file in enumerate(uploaded_files):
	hash = hashes[ix]
	observation = metadata_inputs_one_file(file, hash, ix)
	observations[hash] = observation

	st.session_state.observations = observations


	def _setup_oneoff_inputs() -> None:
	'''
	Add the UI input elements for which we have one each

	'''
	st.title("Input image and data")

	# setup containers for consistent layout order with dynamic elements
	#container_file_uploader = st.container(border=False, key="container_file_uploader")
	container_file_uploader = st.session_state.container_file_uploader
	# - a container for the dynamic input elements (this one matters)
	#if "container_per_file_input_elems" not in st.session_state:
	# if st.session_state.container_per_file_input_elems is None:
	# #st.session_state.container_per_file_input_elems = None
	# c = st.container(border=True, key="container_per_file_input_elems")
	# with c:
	# st.write("No files uploaded yet.")
	# print(f"[D] initialised the container..... {id(c)} \| {c=}")
	# st.session_state.container_per_file_input_elems = c
	# else:
	# print(f"[D] already present, don't redo... {id(st.session_state.container_per_file_input_elems)} \| {st.session_state.container_per_file_input_elems=}")


	with container_file_uploader:
	# 1. Input the author email
	author_email = st.text_input("Author Email", spoof_metadata.get('author_email', ""),
	key="input_author_email")
	if author_email and not is_valid_email(author_email):
	st.error("Please enter a valid email address.")

	# 2. Image Selector
	st.file_uploader("Upload one or more images", type=["png", 'jpg', 'jpeg', 'webp'],
	accept_multiple_files=True,
	key="file_uploader_data",
	#on_change=process_files)
	on_change=buffer_files)
	if 1:

	uploaded_files = st.session_state.file_uploader_data

	for ix, file in enumerate(uploaded_files):
	print(f"[DD] rechecking file {file.name}. {file.file_id} {file.type} {file.size}")
	pass






	def setup_input(
	viewcontainer: DeltaGenerator=None,
	_allowed_image_types: list=None, ) -> None:
	'''
	Set up the input handling for the whale observation guidance tool

	'''
	_setup_oneoff_inputs()
	# amazingly we just have to add the uploader and its callback, and the rest is dynamic.
	# or not... the situation is more complex :(

	# setup dynamic UI input elements, based on the data that is buffered in session_state
	_setup_dynamic_inputs()



	def setup_input_monolithic(
	viewcontainer: DeltaGenerator=None,
	_allowed_image_types: list=None, ) -> InputObservation:
	"""
	Sets up the input interface for uploading an image and entering metadata.

	It provides input fields for an image upload, lat/lon, author email, and date-time.
	In the ideal case, the image metadata will be used to populate location and datetime.

	Parameters:
	viewcontainer (DeltaGenerator, optional): The Streamlit container to use for the input interface. Defaults to st.sidebar.
	_allowed_image_types (list, optional): List of allowed image file types for upload. Defaults to allowed_image_types.

	Returns:
	InputObservation: An object containing the uploaded image and entered metadata.

	"""

	if viewcontainer is None:
	viewcontainer = st.sidebar

	if _allowed_image_types is None:
	_allowed_image_types = allowed_image_types


	viewcontainer.title("Input image and data")

	# 1. Input the author email
	author_email = viewcontainer.text_input("Author Email", spoof_metadata.get('author_email', ""))
	if author_email and not is_valid_email(author_email):
	viewcontainer.error("Please enter a valid email address.")

	# 2. Image Selector
	uploaded_files = viewcontainer.file_uploader("Upload an image", type=allowed_image_types, accept_multiple_files=True)
	observations = {}
	images = {}
	image_hashes = []
	filenames = []
	if uploaded_files is not None:
	for file in uploaded_files:

	viewcontainer.title(f"Metadata for {file.name}")

	# Display the uploaded image
	# load image using cv2 format, so it is compatible with the ML models
	file_bytes = np.asarray(bytearray(file.read()), dtype=np.uint8)
	filename = file.name
	filenames.append(filename)
	image = cv2.imdecode(file_bytes, 1)
	# Extract and display image date-time
	image_datetime = None # For storing date-time from image
	image_datetime = get_image_datetime(file)
	m_logger.debug(f"image date extracted as {image_datetime} (from {uploaded_files})")


	# 3. Latitude Entry Box
	latitude = viewcontainer.text_input(
	"Latitude for "+filename,
	spoof_metadata.get('latitude', ""),
	key=f"input_latitude_{filename}")
	if latitude and not is_valid_number(latitude):
	viewcontainer.error("Please enter a valid latitude (numerical only).")
	m_logger.error(f"Invalid latitude entered: {latitude}.")
	# 4. Longitude Entry Box
	longitude = viewcontainer.text_input(
	"Longitude for "+filename,
	spoof_metadata.get('longitude', ""),
	key=f"input_longitude_{filename}")
	if longitude and not is_valid_number(longitude):
	viewcontainer.error("Please enter a valid longitude (numerical only).")
	m_logger.error(f"Invalid latitude entered: {latitude}.")
	# 5. Date/time
	## first from image metadata
	if image_datetime is not None:
	time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
	date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
	else:
	time_value = datetime.datetime.now().time() # Default to current time
	date_value = datetime.datetime.now().date()

	## if not, give user the option to enter manually
	date_option = st.sidebar.date_input("Date for "+filename, value=date_value)
	time_option = st.sidebar.time_input("Time for "+filename, time_value)

	observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
	author_email=author_email, date=image_datetime, time=None,
	date_option=date_option, time_option=time_option)
	image_hash = observation.to_dict()["image_md5"]
	observations[image_hash] = observation
	images[image_hash] = image
	image_hashes.append(image_hash)

	st.session_state.images = images
	st.session_state.files = uploaded_files
	st.session_state.observations = observations
	st.session_state.image_hashes = image_hashes
	st.session_state.image_filenames = filenames