File size: 20,540 Bytes
fd18838
0e8c927
 
fd18838
0e8c927
 
 
fd18838
0e8c927
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a02bc4
0e8c927
 
 
 
 
 
00bdefd
fd18838
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3eaf0a5
 
fd18838
 
3eaf0a5
 
 
 
 
 
 
 
 
 
fd18838
 
 
 
 
 
3eaf0a5
 
 
 
 
 
 
 
 
 
fd18838
 
 
 
 
 
 
 
 
 
 
6a02bc4
fd18838
 
 
 
 
6a02bc4
 
 
 
 
 
 
 
fd18838
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a02bc4
fd18838
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a02bc4
 
fd18838
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a02bc4
 
 
 
 
 
 
 
 
 
fd18838
6a02bc4
 
fd18838
6a02bc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd18838
 
6a02bc4
 
 
 
 
 
 
 
 
fd18838
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a02bc4
 
 
fd18838
00bdefd
fd18838
 
00bdefd
fd18838
 
 
 
 
 
 
 
 
 
6a02bc4
 
fd18838
6a02bc4
 
 
 
 
 
fd18838
 
6a02bc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd18838
 
 
 
 
 
 
6a02bc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd18838
6a02bc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd18838
 
 
0e8c927
fd18838
 
 
 
 
 
 
 
6a02bc4
 
 
 
 
fd18838
 
 
0e8c927
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00bdefd
 
0e8c927
 
 
 
 
 
 
 
 
00bdefd
0e8c927
 
 
 
 
 
 
 
00bdefd
 
 
 
0e8c927
 
 
 
00bdefd
 
 
 
0e8c927
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c0e2a5
 
 
 
0e8c927
 
 
1c0e2a5
 
00bdefd
 
0e8c927
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
from typing import List, Tuple
import datetime
import logging
import hashlib

import streamlit as st
from streamlit.delta_generator import DeltaGenerator
from streamlit.runtime.uploaded_file_manager import UploadedFile

import cv2
import numpy as np

from input.input_observation import InputObservation
from input.input_validator import get_image_datetime, is_valid_email, is_valid_number

m_logger = logging.getLogger(__name__)
m_logger.setLevel(logging.INFO)

''' 
A module to setup the input handling for the whale observation guidance tool

both the UI elements (setup_input_UI) and the validation functions.
'''
allowed_image_types = ['jpg', 'jpeg', 'png', 'webp']

# an arbitrary set of defaults so testing is less painful...
# ideally we add in some randomization to the defaults
spoof_metadata = {
    "latitude": 0.5,
    "longitude": 44,
    "author_email": "[email protected]",
    "date": None,
    "time": None,
}

def check_inputs_are_set(empty_ok:bool=False, debug:bool=False) -> bool:
    """
    Checks if all expected inputs have been entered 
    
    Implementation: via the Streamlit session state.

    Args:
        empty_ok (bool): If True, returns True if no inputs are set. Default is False.
        debug (bool): If True, prints and logs the status of each expected input key. Default is False.
    Returns:
        bool: True if all expected input keys are set, False otherwise.
    """
    image_hashes = st.session_state.image_hashes
    if len(image_hashes) == 0:
        return empty_ok
    
    exp_input_key_stubs = ["input_latitude", "input_longitude", "input_date", "input_time"]
    #exp_input_key_stubs = ["input_latitude", "input_longitude", "input_author_email", "input_date", "input_time", 

    vals = []
    # the author_email is global/one-off - no hash extension.
    if "input_author_email" in st.session_state:
        val = st.session_state["input_author_email"]
        vals.append(val)
        if debug:
            msg = f"{'input_author_email':15}, {(val is not None):8}, {val}"
            m_logger.debug(msg)
            print(msg)


    for image_hash in image_hashes:
        for stub in exp_input_key_stubs:
            key = f"{stub}_{image_hash}"
            val = None
            if key in st.session_state:
                val = st.session_state[key]
            
            # handle cases where it is defined but empty 
            # if val is a string and empty, set to None
            if isinstance(val, str) and not val:
                val = None
            # if val is a list and empty, set to None (not sure what UI elements would return a list?)
            if isinstance(val, list) and not val:
                val = None
            # number 0 is ok - possibly. could be on the equator, e.g.
            
            vals.append(val)
            if debug:
                msg = f"{key:15}, {(val is not None):8}, {val}"
                m_logger.debug(msg)
                print(msg)


    
    return all([v is not None for v in vals])


def process_one_file(file:UploadedFile, ix:int=0) -> Tuple[np.ndarray, str, str, InputObservation]:
    # do all the non-UI calcs
    # add the UI elements
    # and in-line, do processing/validation of the inputs
    # - how to deal with the gathered data? a) push into session state, b) return all the elements needed?
    
    #viewcontainer = st.sidebarif st.session_state.container_per_file_input_elems is None:
    if st.session_state.container_metadata_inputs is not None:
        viewcontainer = st.session_state.container_metadata_inputs
    else:
        viewcontainer = st.sidebar
        msg = f"[W] `container_metadata_inputs` is None, using sidebar"
        m_logger.warning(msg) ; print(msg)
        

    # do all the non-UI calcs first
    ## get the bytes first, then convert into 1) image, 2) md5
    _bytes = file.read()
    image_hash = hashlib.md5(_bytes).hexdigest()
    #file_bytes = np.asarray(bytearray(_bytes), dtype=np.uint8)
    image: np.ndarray = cv2.imdecode(np.asarray(bytearray(_bytes), dtype=np.uint8), 1)
    filename:str = file.name 
    image_datetime = get_image_datetime(file)
    m_logger.debug(f"image date extracted as {image_datetime} (from {file})")

    author_email = st.session_state["input_author_email"]
    

    # add the UI elements
    viewcontainer.title(f"Metadata for {filename}")
    ukey = image_hash

    # 3. Latitude Entry Box
    latitude = viewcontainer.text_input(
        "Latitude for " + filename, 
        spoof_metadata.get('latitude', 0) + ix,
        key=f"input_latitude_{ukey}")
    if latitude and not is_valid_number(latitude):
        viewcontainer.error("Please enter a valid latitude (numerical only).")
        m_logger.error(f"Invalid latitude entered: {latitude}.")
    # 4. Longitude Entry Box
    longitude = viewcontainer.text_input(
        "Longitude for " + filename, 
        spoof_metadata.get('longitude', ""),
        key=f"input_longitude_{ukey}")
    if longitude and not is_valid_number(longitude):
        viewcontainer.error("Please enter a valid longitude (numerical only).")
        m_logger.error(f"Invalid latitude entered: {latitude}.")

    # 5. Date/time
    ## first from image metadata
    if image_datetime is not None:
        time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
        date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
    else:
        time_value = datetime.datetime.now().time()  # Default to current time
        date_value = datetime.datetime.now().date()

    ## if not, give user the option to enter manually
    date_option = viewcontainer.date_input("Date for "+filename, value=date_value, key=f"input_date_{ukey}")
    time_option = viewcontainer.time_input("Time for "+filename, time_value, key=f"input_time_{ukey}")

    observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
                                author_email=author_email, date=image_datetime, time=None,
                                date_option=date_option, time_option=time_option,
                                uploaded_filename=file,
                                )

    #the_data = [] \
    #    + [image, file, image_hash, filename, ] \
    #    + [latitude, longitude, date_option, time_option]
    # TODO: pass in the hash to InputObservation, so it is done once only. (need to refactor the class a bit)
    
    the_data = (image, image_hash, filename, observation)
    
    return the_data


def buffer_files():
    # buffer info from the file_uploader that doesn't require further user input
    # - the image, the hash, the filename
    # a separate function takes care of per-file user inputs for metadata
    # - this is necessary because dynamically producing more widgets should be
    #   avoided inside callbacks (tl;dr: they dissapear)
    
    # - note that the UploadedFile objects have file_ids, which are unique to each file
    #   - these file_ids are not persistent between sessions, seem to just be random identifiers. 
    

    # get files from state 
    uploaded_files = st.session_state.file_uploader_data
    
    filenames = []
    images = {}
    image_hashes = []
    
    for ix, file in enumerate(uploaded_files):
        filename:str = file.name
        print(f"[D] processing {ix}th file {filename}. {file.file_id} {file.type} {file.size}")
        # image to np and hash both require reading the file so do together
        image, image_hash = load_file_and_hash(file)
        
        filenames.append(filename)
        image_hashes.append(image_hash)

        images[image_hash] = image
        
    st.session_state.images = images
    st.session_state.files = uploaded_files
    st.session_state.image_hashes = image_hashes
    st.session_state.image_filenames = filenames

    
def load_file_and_hash(file:UploadedFile) -> Tuple[np.ndarray, str]:
    # two operations that require reading the file done together for efficiency
    # load the file, compute the hash, return the image and hash
    _bytes = file.read()
    image_hash = hashlib.md5(_bytes).hexdigest()
    image: np.ndarray = cv2.imdecode(np.asarray(bytearray(_bytes), dtype=np.uint8), 1)
    
    return (image, image_hash)

    

def process_files():
    # this is triggered whenever the uploaded files are changed.
    
    # process one file: add UI elements, and process the inputs
    # generate an observation from the return info
    # finally, put all the relevant stuff into the session state
    # - note: here we overwrite the session state, we aren't extending it. 

    # get files from state
    uploaded_files = st.session_state.file_uploader_data
    
    observations = {}
    images = {}
    image_hashes = []
    filenames = []
    
    for ix, file in enumerate(uploaded_files):
        print(f"[D] processing file {file.name}. {file.file_id} {file.type} {file.size}")
        (image, image_hash, filename, observation) = process_one_file(file, ix)
        # big old debug because of pain.
        
        filenames.append(filename)
        image_hashes.append(image_hash)

        observations[image_hash] = observation
        images[image_hash] = image
        
    st.session_state.images = images
    st.session_state.files = uploaded_files
    st.session_state.observations = observations
    st.session_state.image_hashes = image_hashes
    st.session_state.image_filenames = filenames

        
def metadata_inputs_one_file(file:UploadedFile, ukey:str, dbg_ix:int=0) -> InputObservation:
    # dbg_ix is a hack to have different data in each input group, checking persistence
    
    if st.session_state.container_metadata_inputs is not None:
        _viewcontainer = st.session_state.container_metadata_inputs
    else:
        _viewcontainer = st.sidebar
        print(f"[W] `container_metadata_inputs` is None, using sidebar")
        


    author_email = st.session_state["input_author_email"]
    filename = file.name
    image_datetime = get_image_datetime(file)
    # add the UI elements
    #viewcontainer.title(f"Metadata for {filename}")
    viewcontainer = _viewcontainer.expander(f"Metadata for {file.name}", expanded=True)

    # TODO: use session state so any changes are persisted within session -- currently I think
    # we are going to take the defaults over and over again -- if the user adjusts coords, or date, it will get lost
    # - it is a bit complicated, if no values change, they persist (the widget definition: params, name, key, etc)
    #   even if the code is re-run. but if the value changes, it is lost.
    

    # 3. Latitude Entry Box
    latitude = viewcontainer.text_input(
        "Latitude for " + filename, 
        spoof_metadata.get('latitude', 0) + dbg_ix,
        key=f"input_latitude_{ukey}")
    if latitude and not is_valid_number(latitude):
        viewcontainer.error("Please enter a valid latitude (numerical only).")
        m_logger.error(f"Invalid latitude entered: {latitude}.")
    # 4. Longitude Entry Box
    longitude = viewcontainer.text_input(
        "Longitude for " + filename, 
        spoof_metadata.get('longitude', ""),
        key=f"input_longitude_{ukey}")
    if longitude and not is_valid_number(longitude):
        viewcontainer.error("Please enter a valid longitude (numerical only).")
        m_logger.error(f"Invalid latitude entered: {latitude}.")

    # 5. Date/time
    ## first from image metadata
    if image_datetime is not None:
        time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
        date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
    else:
        time_value = datetime.datetime.now().time()  # Default to current time
        date_value = datetime.datetime.now().date()

    ## if not, give user the option to enter manually
    date_option = viewcontainer.date_input("Date for "+filename, value=date_value, key=f"input_date_{ukey}")
    time_option = viewcontainer.time_input("Time for "+filename, time_value, key=f"input_time_{ukey}")

    observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
                                author_email=author_email, date=image_datetime, time=None,
                                date_option=date_option, time_option=time_option,
                                uploaded_filename=file,
                                )

    # TODO: pass in the hash to InputObservation, so it is done once only. (need to refactor the class a bit)
    return observation
    

    
def _setup_dynamic_inputs() -> None:

    # for each file uploaded,
    # - add the UI elements for the metadata
    # - validate the data
    # end of cycle should have observation objects set for each file.
    # - and these go into session state
    
    # load the files from the session state
    uploaded_files = st.session_state.files
    hashes = st.session_state.image_hashes
    #images = st.session_state.images
    observations = {}
    
    for ix, file in enumerate(uploaded_files):
        hash = hashes[ix]
        observation = metadata_inputs_one_file(file, hash, ix)
        observations[hash] = observation
        
    st.session_state.observations = observations


def _setup_oneoff_inputs() -> None:
    '''
    Add the UI input elements for which we have one each
    
    '''
    st.title("Input image and data")

    # setup containers for consistent layout order with dynamic elements
    #container_file_uploader = st.container(border=False, key="container_file_uploader")
    container_file_uploader = st.session_state.container_file_uploader 
    # - a container for the dynamic input elements (this one matters)
    #if "container_per_file_input_elems" not in st.session_state:
    # if st.session_state.container_per_file_input_elems is None:
    #     #st.session_state.container_per_file_input_elems = None
    #     c = st.container(border=True, key="container_per_file_input_elems")
    #     with c:
    #         st.write("No files uploaded yet.")
    #     print(f"[D] initialised the container..... {id(c)} | {c=}")
    #     st.session_state.container_per_file_input_elems = c
    # else:
    #     print(f"[D] already present, don't redo... {id(st.session_state.container_per_file_input_elems)} | {st.session_state.container_per_file_input_elems=}")
        

    with container_file_uploader:
        # 1. Input the author email 
        author_email = st.text_input("Author Email", spoof_metadata.get('author_email', ""),
                                                key="input_author_email")
        if author_email and not is_valid_email(author_email):   
            st.error("Please enter a valid email address.")

        # 2. Image Selector
        st.file_uploader("Upload one or more images", type=["png", 'jpg', 'jpeg', 'webp'],
                                        accept_multiple_files=True, 
                                        key="file_uploader_data", 
                                        #on_change=process_files)
                                        on_change=buffer_files)
    if 1:

        uploaded_files = st.session_state.file_uploader_data
    
        for ix, file in enumerate(uploaded_files):
            print(f"[DD] rechecking file {file.name}. {file.file_id} {file.type} {file.size}")
            pass
                                        
                                    
                                    
    

        
def setup_input(
    viewcontainer: DeltaGenerator=None,
    _allowed_image_types: list=None, ) -> None:
    '''
    Set up the input handling for the whale observation guidance tool
    
    '''
    _setup_oneoff_inputs()
    # amazingly we just have to add the uploader and its callback, and the rest is dynamic.
    # or not... the situation is more complex :( 
    
    # setup dynamic UI input elements, based on the data that is buffered in session_state
    _setup_dynamic_inputs()
    
    

def setup_input_monolithic(
    viewcontainer: DeltaGenerator=None,
    _allowed_image_types: list=None, ) -> InputObservation:
    """
    Sets up the input interface for uploading an image and entering metadata.

    It provides input fields for an image upload, lat/lon, author email, and date-time. 
    In the ideal case, the image metadata will be used to populate location and datetime.

    Parameters:
        viewcontainer (DeltaGenerator, optional): The Streamlit container to use for the input interface. Defaults to st.sidebar.
        _allowed_image_types (list, optional): List of allowed image file types for upload. Defaults to allowed_image_types.

    Returns:
        InputObservation: An object containing the uploaded image and entered metadata.

    """
                
    if viewcontainer is None:
        viewcontainer = st.sidebar
        
    if _allowed_image_types is None:
        _allowed_image_types = allowed_image_types
    

    viewcontainer.title("Input image and data")

    # 1. Input the author email 
    author_email = viewcontainer.text_input("Author Email", spoof_metadata.get('author_email', ""))
    if author_email and not is_valid_email(author_email):   
        viewcontainer.error("Please enter a valid email address.")

    # 2. Image Selector
    uploaded_files = viewcontainer.file_uploader("Upload an image", type=allowed_image_types, accept_multiple_files=True)
    observations = {}
    images = {}
    image_hashes = []
    filenames = []
    if uploaded_files is not None:
        for file in uploaded_files:

            viewcontainer.title(f"Metadata for {file.name}")

            # Display the uploaded image
            # load image using cv2 format, so it is compatible with the ML models
            file_bytes = np.asarray(bytearray(file.read()), dtype=np.uint8)
            filename = file.name
            filenames.append(filename) 
            image = cv2.imdecode(file_bytes, 1)
            # Extract and display image date-time
            image_datetime = None  # For storing date-time from image
            image_datetime = get_image_datetime(file)
            m_logger.debug(f"image date extracted as {image_datetime} (from {uploaded_files})")
        

            # 3. Latitude Entry Box
            latitude = viewcontainer.text_input(
                "Latitude for "+filename, 
                spoof_metadata.get('latitude', ""),
                key=f"input_latitude_{filename}")
            if latitude and not is_valid_number(latitude):
                viewcontainer.error("Please enter a valid latitude (numerical only).")
                m_logger.error(f"Invalid latitude entered: {latitude}.")
            # 4. Longitude Entry Box
            longitude = viewcontainer.text_input(
                "Longitude for "+filename, 
                spoof_metadata.get('longitude', ""),
                key=f"input_longitude_{filename}")
            if longitude and not is_valid_number(longitude):
                viewcontainer.error("Please enter a valid longitude (numerical only).")
                m_logger.error(f"Invalid latitude entered: {latitude}.")
            # 5. Date/time
            ## first from image metadata
            if image_datetime is not None:
                time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
                date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
            else:
                time_value = datetime.datetime.now().time()  # Default to current time
                date_value = datetime.datetime.now().date()

            ## if not, give user the option to enter manually
            date_option = st.sidebar.date_input("Date for "+filename, value=date_value)
            time_option = st.sidebar.time_input("Time for "+filename, time_value)

            observation = InputObservation(image=file, latitude=latitude, longitude=longitude, 
                                        author_email=author_email, date=image_datetime, time=None, 
                                        date_option=date_option, time_option=time_option)
            image_hash = observation.to_dict()["image_md5"]
            observations[image_hash] = observation
            images[image_hash] = image
            image_hashes.append(image_hash)
    
    st.session_state.images = images
    st.session_state.files = uploaded_files
    st.session_state.observations = observations
    st.session_state.image_hashes = image_hashes
    st.session_state.image_filenames = filenames