File size: 17,904 Bytes
f8bf7d4
90b0271
 
f8bf7d4
 
 
 
0e8c927
90b0271
 
f8bf7d4
4854d2c
 
3b7d130
 
 
0e8c927
 
00bdefd
0e8c927
 
 
00bdefd
4d0f7fd
 
 
0e8c927
f8bf7d4
 
 
 
b582a0e
 
f8bf7d4
f22fcc7
f8bf7d4
 
 
 
 
 
 
 
 
 
 
 
 
 
0e8c927
 
1c0e2a5
 
 
00bdefd
 
 
 
 
0e8c927
 
 
 
 
f8bf7d4
0e8c927
 
 
 
 
f8bf7d4
 
7a5f0ca
f8bf7d4
 
7a5f0ca
f8bf7d4
 
 
 
00bdefd
 
 
 
6a02bc4
 
 
 
 
 
 
 
 
00bdefd
 
4854d2c
00bdefd
 
 
 
4854d2c
 
 
 
 
 
 
00bdefd
fd18838
 
 
6a02bc4
fd18838
6a02bc4
 
 
 
 
 
fd18838
f8bf7d4
c3a2524
 
 
 
 
 
0e8c927
c3a2524
 
 
 
 
 
 
 
 
 
 
 
f8bf7d4
 
5c7e462
f8bf7d4
 
 
 
 
 
e3408e4
 
f8bf7d4
 
4854d2c
00bdefd
f8bf7d4
54319e9
fd18838
6a02bc4
 
 
 
 
 
 
 
 
 
fd18838
f8bf7d4
 
 
54319e9
0e8c927
f8bf7d4
 
 
0e8c927
 
f8bf7d4
0e8c927
f8bf7d4
 
 
 
 
4854d2c
f8bf7d4
 
 
 
 
 
 
 
0e8c927
f8bf7d4
 
 
 
 
0e8c927
f8bf7d4
 
 
 
 
0e8c927
f8bf7d4
 
 
 
 
 
 
e3408e4
f8bf7d4
e3408e4
 
 
 
 
f8bf7d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e8c927
f8bf7d4
 
4854d2c
 
 
 
 
 
 
 
 
 
3eaf0a5
4854d2c
00bdefd
4854d2c
 
 
 
 
 
 
 
4d0f7fd
00bdefd
 
 
 
 
4854d2c
 
 
 
 
 
 
 
 
 
 
 
 
fd18838
 
 
4854d2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d0f7fd
4854d2c
4d0f7fd
 
 
 
6a02bc4
 
4854d2c
4d0f7fd
4854d2c
 
 
 
 
 
 
 
 
 
 
 
 
 
4d0f7fd
 
4854d2c
4d0f7fd
4854d2c
 
 
 
 
 
 
 
 
 
4d0f7fd
 
4854d2c
4d0f7fd
4854d2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00bdefd
 
f8bf7d4
 
 
 
 
 
 
0e8c927
4854d2c
 
 
f8bf7d4
e3408e4
f8bf7d4
4854d2c
 
 
 
 
 
 
 
 
 
 
 
f8bf7d4
 
 
 
 
 
 
 
e3408e4
 
 
 
 
f8bf7d4
 
 
4f11b2f
f8bf7d4
 
 
0e8c927
f8bf7d4
 
0e8c927
f8bf7d4
 
00bdefd
 
 
c3a2524
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
import logging
import os

import pandas as pd
import streamlit as st
import folium
from streamlit_folium import st_folium

from transformers import pipeline
from transformers import AutoModelForImageClassification

from maps.obs_map import add_header_text as add_obs_map_header
from classifier.classifier_image import add_header_text as add_classifier_header
from datasets import disable_caching
disable_caching()

import whale_gallery as gallery
import whale_viewer as viewer
from input.input_handling import setup_input, check_inputs_are_set
from maps.alps_map import present_alps_map
from maps.obs_map import present_obs_map
from utils.st_logs import setup_logging, parse_log_buffer
from utils.workflow_state import WorkflowFSM, FSM_STATES
#from classifier.classifier_image import cetacean_classify
from classifier.classifier_image import cetacean_just_classify, cetacean_show_results_and_review, cetacean_show_results

from classifier.classifier_hotdog import hotdog_classify


# setup for the ML model on huggingface (our wrapper)
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
#classifier_revision = '0f9c15e2db4d64e7f622ade518854b488d8d35e6'
classifier_revision = 'main' # default/latest version
# and the dataset of observations (hf dataset in our space)
dataset_id = "Saving-Willy/temp_dataset"
data_files = "data/train-00000-of-00001.parquet"

USE_BASIC_MAP = False
DEV_SIDEBAR_LIB = True

# get a global var for logger accessor in this module
LOG_LEVEL = logging.DEBUG
g_logger = logging.getLogger(__name__)
g_logger.setLevel(LOG_LEVEL)

st.set_page_config(layout="wide")

# initialise various session state variables
if "handler" not in st.session_state:
    st.session_state['handler'] = setup_logging()

if "image_hashes" not in st.session_state:
    st.session_state.image_hashes = []

# TODO: ideally just use image_hashes, but need a unique key for the ui elements
# to track the user input phase; and these are created before the hash is generated. 
if "image_filenames" not in st.session_state:
    st.session_state.image_filenames = []

if "observations" not in st.session_state:
    st.session_state.observations = {}

if "images" not in st.session_state:
    st.session_state.images = {}

if "files" not in st.session_state:
    st.session_state.files = {}

if "public_observation" not in st.session_state:
    st.session_state.public_observation = {}

if "classify_whale_done" not in st.session_state:
    st.session_state.classify_whale_done = {}

if "whale_prediction1" not in st.session_state:
    st.session_state.whale_prediction1 = {}

if "tab_log" not in st.session_state:
    st.session_state.tab_log = None
    
if "workflow_fsm" not in st.session_state:
    # create and init the state machine
    st.session_state.workflow_fsm = WorkflowFSM(FSM_STATES)
    
if "container_per_file_input_elems" not in st.session_state:
    st.session_state.container_per_file_input_elems = None

if "container_file_uploader" not in st.session_state:
    st.session_state.container_file_uploader = None

if "container_metadata_inputs" not in st.session_state:
    st.session_state.container_metadata_inputs = None
    
def refresh_progress():
    with st.sidebar:
        tot = st.session_state.workflow_fsm.num_states - 1
        cur_i = st.session_state.workflow_fsm.current_state_index
        cur_t = st.session_state.workflow_fsm.current_state
        st.session_state.disp_progress[0].markdown(f"*Progress: {cur_i}/{tot}. Current: {cur_t}.*")
        st.session_state.disp_progress[1].progress(cur_i/tot)
# add progress indicator to session_state
if "progress" not in st.session_state:
    with st.sidebar:
        st.session_state.disp_progress = [st.empty(), st.empty()]
        # add button to sidebar, with the callback to refesh_progress
        st.sidebar.button("Refresh Progress", on_click=refresh_progress)

        
def dbg_show_obs_hashes():
    # a debug: we seem to be losing the whale classes?
    st.write(f"[D] num observations: {len(st.session_state.observations)}")
    s = ""
    for hash in st.session_state.observations.keys():
        obs = st.session_state.observations[hash]
        s += f"- [D] observation {hash} ({obs._inst_id}) has {len(obs.top_predictions)} predictions\n"
        
        #st.markdown(f"- [D] observation {hash} has {len(st.session_state.observations[hash].top_predictions)} predictions")
    
    st.markdown(s)


def main() -> None:
    """
    Main entry point to set up the streamlit UI and run the application.

    The organisation is as follows:

    1. observation input (a new observations) is handled in the sidebar
    2. the rest of the interface is organised in tabs:
    
        - cetean classifier
        - hotdog classifier
        - map to present the obersvations
        - table of recent log entries
        - gallery of whale images
    
    The majority of the tabs are instantiated from modules. Currently the two 
    classifiers are still in-line here.
    
    """

    g_logger.info("App started.")
    g_logger.warning(f"[D] Streamlit version: {st.__version__}. Python version: {os.sys.version}")

    #g_logger.debug("debug message")
    #g_logger.info("info message")
    #g_logger.warning("warning message")

    # Streamlit app
    tab_inference, tab_hotdogs, tab_map, tab_coords, tab_log, tab_gallery = \
        st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "*:gray[Dev:coordinates]*", "Log", "Beautiful cetaceans"])
    st.session_state.tab_log = tab_log

    # put this early so the progress indicator is at the top (also refreshed at end)
    refresh_progress()    

    # create a sidebar, and parse all the input (returned as `observations` object)
    with st.sidebar:
        st.divider()
        
        st.markdown('<style>.st-key-container_file_uploader_id { border: 1px solid skyblue; border-radius: 5px; }</style>', unsafe_allow_html=True)
        container_file_uploader = st.container(border=True, key="container_file_uploader_id")
        st.session_state.container_file_uploader = container_file_uploader
        st.markdown('<style>.st-key-container_metadata_inputs_id { border: 1px solid lightgreen; border-radius: 5px; }</style>', unsafe_allow_html=True)
        container_metadata_inputs = st.container(border=True, key="container_metadata_inputs_id")
        container_metadata_inputs.write("Metadata Inputs... wait for file upload ")
        st.session_state.container_metadata_inputs = container_metadata_inputs

        setup_input(viewcontainer=st.sidebar)

        
    if 0:## WIP
        # goal of this code is to allow the user to override the ML prediction, before transmitting an observations
        predicted_class = st.sidebar.selectbox("Predicted Class", viewer.WHALE_CLASSES)
        override_prediction = st.sidebar.checkbox("Override Prediction")

        if override_prediction:
            overridden_class = st.sidebar.selectbox("Override Class", viewer.WHALE_CLASSES)
            st.session_state.observations['class_overriden'] = overridden_class
        else:
            st.session_state.observations['class_overriden'] = None


    with tab_map:
        # visual structure: a couple of toggles at the top, then the map inlcuding a
        # dropdown for tileset selection.
        add_obs_map_header()
        tab_map_ui_cols = st.columns(2)
        with tab_map_ui_cols[0]:
            show_db_points = st.toggle("Show Points from DB", True)
        with tab_map_ui_cols[1]:
            dbg_show_extra = st.toggle("Show Extra points (test)", False)
            
        if show_db_points:
            # show a nicer map, observations marked, tileset selectable.
            st_observation = present_obs_map(
                dataset_id=dataset_id, data_files=data_files,
                dbg_show_extra=dbg_show_extra)
            
        else:
            # development map.
            st_observation = present_alps_map()
            

    with tab_log:
        handler = st.session_state['handler']
        if handler is not None:
            records = parse_log_buffer(handler.buffer)
            st.dataframe(records[::-1], use_container_width=True,)
            st.info(f"Length of records: {len(records)}")
        else:
            st.error("⚠️ No log handler found!")

        
        
    with tab_coords:
        # the goal of this tab is to allow selection of the new obsvation's location by map click/adjust.
        st.markdown("Coming later! :construction:")
        st.markdown(
            f"""*The goal is to allow interactive definition for the coordinates of a new
            observation, by click/drag points on the map.*""")
        

        st.write("Click on the map to capture a location.")
        #m = folium.Map(location=visp_loc, zoom_start=7)
        mm = folium.Map(location=[39.949610, -75.150282], zoom_start=16)
        folium.Marker( [39.949610, -75.150282], popup="Liberty Bell", tooltip="Liberty Bell"
    ).add_to(mm)

        st_data2 = st_folium(mm, width=725)
        st.write("below the map...")
        if st_data2['last_clicked'] is not None:
            print(st_data2)
            st.info(st_data2['last_clicked'])


    with tab_gallery:
        # here we make a container to allow filtering css properties 
        # specific to the gallery (otherwise we get side effects)
        tg_cont = st.container(key="swgallery")
        with tg_cont:
            gallery.render_whale_gallery(n_cols=4)
        

    # state handling re data_entry phases
    # 0. no data entered yet -> display the file uploader thing
    # 1. we have some images, but not all the metadata fields are done -> validate button shown, disabled
    # 2. all data entered -> validate button enabled
    # 3. validation button pressed, validation done -> enable the inference button. 
    #    - at this point do we also want to disable changes to the metadata selectors?
    #    anyway, simple first. 

    if st.session_state.workflow_fsm.is_in_state('doing_data_entry'):
        # can we advance state? - only when all inputs are set for all uploaded files
        all_inputs_set = check_inputs_are_set(debug=True, empty_ok=False)
        if all_inputs_set:
            st.session_state.workflow_fsm.complete_current_state()
            # -> data_entry_complete
        else: 
            # button, disabled; no state change yet.
            st.sidebar.button(":gray[*Validate*]", disabled=True, help="Please fill in all fields.")
            
    
    if st.session_state.workflow_fsm.is_in_state('data_entry_complete'):
        # can we advance state? - only when the validate button is pressed
        if st.sidebar.button(":white_check_mark:[**Validate**]"):
            # create a dictionary with the submitted observation
            tab_log.info(f"{st.session_state.observations}")
            df = pd.DataFrame(st.session_state.observations, index=[0])
            with tab_coords:
                st.table(df)
            # there doesn't seem to be any actual validation here?? TODO: find validator function (each element is validated by the input box, but is there something at the whole image level?)
            # hmm, maybe it should actually just be "I'm done with data entry"
            st.session_state.workflow_fsm.complete_current_state()
            # -> data_entry_validated
    
    # state handling re inference phases (tab_inference)
    # 3. validation button pressed, validation done -> enable the inference button.
    # 4. inference button pressed -> ML started. | let's cut this one out, since it would only
    #      make sense if we did it as an async action
    # 5. ML done -> show results, and manual validation options
    # 6. manual validation done -> enable the upload buttons
    # 
    with tab_inference:
        
        dbg_show_obs_hashes()

        add_classifier_header()
        # if we are before data_entry_validated, show the button, disabled.
        if not st.session_state.workflow_fsm.is_in_state_or_beyond('data_entry_validated'):
            tab_inference.button(":gray[*Identify with cetacean classifier*]", disabled=True, 
                                help="Please validate inputs before proceeding", 
                                key="button_infer_ceteans")
        
        if st.session_state.workflow_fsm.is_in_state('data_entry_validated'):
            # show the button, enabled. If pressed, we start the ML model (And advance state)
            if tab_inference.button("Identify with cetacean classifier"):
                cetacean_classifier = AutoModelForImageClassification.from_pretrained(
                    "Saving-Willy/cetacean-classifier", 
                    revision=classifier_revision, 
                    trust_remote_code=True)

                cetacean_just_classify(cetacean_classifier)
                st.session_state.workflow_fsm.complete_current_state()
                # trigger a refresh too (refreshhing the prog indicator means the script reruns and 
                # we can enter the next state - visualising the results / review)
                # ok it doesn't if done programmatically. maybe interacting with teh button? check docs.
                refresh_progress()
                #TODO: validate this doesn't harm performance adversely.
                st.rerun()
        
        elif st.session_state.workflow_fsm.is_in_state('ml_classification_completed'):
            # show the results, and allow manual validation
            s = ""
            for k, v in st.session_state.whale_prediction1.items():
                s += f"* Image {k}: {v}\n"
                
            st.markdown("""
                        ### Inference Results and manual validation/adjustment
                        :construction: for now we just show the num images processed.
                        """)
            st.markdown(s)
            # add a button to advance the state
            if st.button("mock: manual validation done."):
                st.session_state.workflow_fsm.complete_current_state()
                # -> manual_inspection_completed
            
            cetacean_show_results_and_review()

        elif st.session_state.workflow_fsm.is_in_state('manual_inspection_completed'):
            # show the ML results, and allow the user to upload the observation
            st.markdown("""
                        ### Inference Results (after manual validation)
                        :construction: for now we just show the button.
                        """)
            
            
            if st.button("(nooop) Upload observation to THE INTERNET!"):
                st.session_state.workflow_fsm.complete_current_state()
                # -> data_uploaded

            cetacean_show_results()
        
        elif st.session_state.workflow_fsm.is_in_state('data_uploaded'):
            # the data has been sent. Lets show the observations again
            # but no buttons to upload (or greyed out ok)
            st.markdown("""
                        ### Observation(s) uploaded
                        :construction: for now we just show the observations.
                        """)
            df = pd.DataFrame(st.session_state.observations, index=[0])
            st.table(df)

            # didn't decide what the next state is here - I think we are in the terminal state.
            #st.session_state.workflow_fsm.complete_current_state()
            
            
            
    
            
            

        
    # inside the inference tab, on button press we call the model (on huggingface hub)
    # which will be run locally. 
    # - the model predicts the top 3 most likely species from the input image
    # - these species are shown
    # - the user can override the species prediction using the dropdown 
    # - an observation is uploaded if the user chooses.

    # with tab_inference:
    #     add_classifier_header()
        

        
    # if tab_inference.button("Identify with cetacean classifier"):
    #     #pipe = pipeline("image-classification", model="Saving-Willy/cetacean-classifier", trust_remote_code=True)
    #     cetacean_classifier = AutoModelForImageClassification.from_pretrained("Saving-Willy/cetacean-classifier", 
    #                                                                         revision=classifier_revision,
    #                                                                         trust_remote_code=True)

        
    #     if st.session_state.images is None:
    #         # TODO: cleaner design to disable the button until data input done?
    #         st.info("Please upload an image first.")
    #     else:
    #         cetacean_classify(cetacean_classifier)
                
        

        
    # inside the hotdog tab, on button press we call a 2nd model (totally unrelated at present, just for demo
    # purposes, an hotdog image classifier) which will be run locally.
    # - this model predicts if the image is a hotdog or not, and returns probabilities
    # - the input image is the same as for the ceteacean classifier - defined in the sidebar
    tab_hotdogs.title("Hot Dog? Or Not?")
    tab_hotdogs.write("""
                *Run alternative classifer on input images. Here we are using
                a binary classifier - hotdog or not - from
                huggingface.co/julien-c/hotdog-not-hotdog.*""")

    if tab_hotdogs.button("Get Hotdog Prediction"):   
        
        pipeline_hot_dog = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")

        if st.session_state.image is None:
            st.info("Please upload an image first.")
            #st.info(str(observations.to_dict()))
            
        else:
            hotdog_classify(pipeline_hot_dog, tab_hotdogs)
            
            
    # after all other processing, we can show the stage/state
    refresh_progress()


if __name__ == "__main__":
    main()