File size: 12,108 Bytes
f8bf7d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
import datetime
import os
import json
import logging
import tempfile
import pandas as pd
import streamlit as st
import folium
from streamlit_folium import st_folium
from huggingface_hub import HfApi
#from datasets import load_dataset
#from fix_tabrender import js_show_zeroheight_iframe

import whale_viewer as sw_wv
import input_handling as sw_inp
import alps_map as sw_am
import whale_gallery as sw_wg
import obs_map as sw_map
import st_logs as sw_logs



from transformers import pipeline
from transformers import AutoModelForImageClassification

# setup for the ML model on huggingface (our wrapper)
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
# and the dataset of observations (hf dataset in our space)
dataset_id = "Saving-Willy/Happywhale-kaggle"
data_files = "data/train-00000-of-00001.parquet"

USE_BASIC_MAP = False
DEV_SIDEBAR_LIB = True

# get a global var for logger accessor in this module
LOG_LEVEL = logging.DEBUG
g_logger = logging.getLogger(__name__)
g_logger.setLevel(LOG_LEVEL)

st.set_page_config(layout="wide")
#sw_logs.setup_logging(level=LOG_LEVEL, buffer_len=40)



# initialise various session state variables
if "handler" not in st.session_state:
    st.session_state['handler'] = sw_logs.setup_logging()

if "full_data" not in st.session_state:
    st.session_state.full_data = {}

if "classify_whale_done" not in st.session_state:
    st.session_state.classify_whale_done = False

if "whale_prediction1" not in st.session_state:
    st.session_state.whale_prediction1 = None
    
if "image" not in st.session_state:
    st.session_state.image = None

if "tab_log" not in st.session_state:
    st.session_state.tab_log = None
    

def metadata2md():
    markdown_str = "\n"
    for key, value in st.session_state.full_data.items():
            markdown_str += f"- **{key}**: {value}\n"
    return markdown_str


def push_observation(tab_log=None):
    # we get the data from session state: 1 is the dict 2 is the image.
    # first, lets do an info display (popup)
    metadata_str = json.dumps(st.session_state.full_data)
    
    st.toast(f"Uploading observation: {metadata_str}", icon="🦭")
    tab_log = st.session_state.tab_log
    if tab_log is not None:
        tab_log.info(f"Uploading observation: {metadata_str}")
        
    # get huggingface api
    api = HfApi()

    f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
    f.write(metadata_str)
    f.close()
    st.info(f"temp file: {f.name} with metadata written...")

    path_in_repo= f"metadata/{st.session_state.full_data['author_email']}/{st.session_state.full_data['image_md5']}.json"
    msg = f"fname: {f.name} | path: {path_in_repo}"
    print(msg)
    st.warning(msg)
    rv = api.upload_file(
        path_or_fileobj=f.name,
        path_in_repo=path_in_repo,
        repo_id="Saving-Willy/Happywhale-kaggle",
        repo_type="dataset",
    )
    print(rv)
    msg = f"data attempted tx to repo happy walrus: {rv}"
    g_logger.info(msg)
    st.info(msg)
    

if __name__ == "__main__":

    g_logger.info("App started.")

    #g_logger.debug("debug message")
    #g_logger.info("info message")
    #g_logger.warning("warning message")

    # Streamlit app
    #tab_gallery, tab_inference, tab_hotdogs, tab_map, tab_data, tab_log = st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "Data", "Log", "Beautiful cetaceans"])
    tab_inference, tab_hotdogs, tab_map, tab_data, tab_log, tab_gallery = st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "Data", "Log", "Beautiful cetaceans"])
    st.session_state.tab_log = tab_log


    # create a sidebar, and parse all the input (returned as `observation` object)
    observation = sw_inp.setup_input(viewcontainer=st.sidebar)

        
    if 0:## WIP
        # goal of this code is to allow the user to override the ML prediction, before transmitting an observation
        predicted_class = st.sidebar.selectbox("Predicted Class", sw_wv.WHALE_CLASSES)
        override_prediction = st.sidebar.checkbox("Override Prediction")

        if override_prediction:
            overridden_class = st.sidebar.selectbox("Override Class", sw_wv.WHALE_CLASSES)
            st.session_state.full_data['class_overriden'] = overridden_class
        else:
            st.session_state.full_data['class_overriden'] = None


    with tab_map:
        # visual structure: a couple of toggles at the top, then the map inlcuding a
        # dropdown for tileset selection.
        tab_map_ui_cols = st.columns(2)
        with tab_map_ui_cols[0]:
            show_db_points = st.toggle("Show Points from DB", True)
        with tab_map_ui_cols[1]:
            dbg_show_extra = st.toggle("Show Extra points (test)", False)
            
        if show_db_points:
            # show a nicer map, observations marked, tileset selectable.
            st_data = sw_map.present_obs_map(
                dataset_id=dataset_id, data_files=data_files,
                dbg_show_extra=dbg_show_extra)
            
        else:
            # development map.
            st_data = sw_am.present_alps_map()
            

    with tab_log:
        handler = st.session_state['handler']
        if handler is not None:
            records = sw_logs.parse_log_buffer(handler.buffer)
            st.dataframe(records[::-1], use_container_width=True,)
            st.info(f"Length of records: {len(records)}")
        else:
            st.error("⚠️ No log handler found!")

        
        
    with tab_data:
        # the goal of this tab is to allow selection of the new obsvation's location by map click/adjust.
        st.markdown("Coming later hope! :construction:")

        st.write("Click on the map to capture a location.")
        #m = folium.Map(location=visp_loc, zoom_start=7)
        mm = folium.Map(location=[39.949610, -75.150282], zoom_start=16)
        folium.Marker( [39.949610, -75.150282], popup="Liberty Bell", tooltip="Liberty Bell"
    ).add_to(mm)

        st_data2 = st_folium(mm, width=725)
        st.write("below the map...")
        if st_data2['last_clicked'] is not None:
            print(st_data2)
            st.info(st_data2['last_clicked'])


    with tab_gallery:
        # here we make a container to allow filtering css properties 
        # specific to the gallery (otherwise we get side effects)
        tg_cont = st.container(key="swgallery")
        with tg_cont:
            sw_wg.render_whale_gallery(n_cols=4)
        

    # Display submitted data
    if st.sidebar.button("Validate"):
        # create a dictionary with the submitted data
        submitted_data = observation.to_dict()
        #print(submitted_data)
        
        #full_data.update(**submitted_data)
        for k, v in submitted_data.items():
            st.session_state.full_data[k] = v
            
        #st.write(f"full dict of data: {json.dumps(submitted_data)}")
        #tab_inference.info(f"{st.session_state.full_data}")
        tab_log.info(f"{st.session_state.full_data}")

        df = pd.DataFrame(submitted_data, index=[0])
        with tab_data:
            st.table(df)
        
        

        
    # inside the inference tab, on button press we call the model (on huggingface hub)
    # which will be run locally. 
    # - the model predicts the top 3 most likely species from the input image
    # - these species are shown
    # - the user can override the species prediction using the dropdown 
    # - an observation is uploaded if the user chooses.
        
    if tab_inference.button("Identify with cetacean classifier"):
        #pipe = pipeline("image-classification", model="Saving-Willy/cetacean-classifier", trust_remote_code=True)
        cetacean_classifier = AutoModelForImageClassification.from_pretrained("Saving-Willy/cetacean-classifier", 
                                                                            revision='0f9c15e2db4d64e7f622ade518854b488d8d35e6', trust_remote_code=True)
        
        if st.session_state.image is None:
            # TODO: cleaner design to disable the button until data input done?
            st.info("Please upload an image first.")
        else:
            # run classifier model on `image`, and persistently store the output
            out = cetacean_classifier(st.session_state.image) # get top 3 matches
            st.session_state.whale_prediction1 = out['predictions'][0]
            st.session_state.classify_whale_done = True
            msg = f"[D]2 classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}"
            st.info(msg)
            g_logger.info(msg)
            
            # dropdown for selecting/overriding the species prediction
            #st.info(f"[D] classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}")
            if not st.session_state.classify_whale_done:
                selected_class = tab_inference.sidebar.selectbox("Species", sw_wv.WHALE_CLASSES, index=None, placeholder="Species not yet identified...", disabled=True)
            else:
                pred1 = st.session_state.whale_prediction1
                # get index of pred1 from WHALE_CLASSES, none if not present
                print(f"[D] pred1: {pred1}")
                ix = sw_wv.WHALE_CLASSES.index(pred1) if pred1 in sw_wv.WHALE_CLASSES else None
                selected_class = tab_inference.selectbox("Species", sw_wv.WHALE_CLASSES, index=ix)
            
            st.session_state.full_data['predicted_class'] = selected_class
            if selected_class != st.session_state.whale_prediction1:
                st.session_state.full_data['class_overriden'] = selected_class
                
            btn = st.button("Upload observation to THE INTERNET!", on_click=push_observation)
            # TODO: the metadata only fills properly if `validate` was clicked.
            tab_inference.markdown(metadata2md())

            msg = f"[D] full data after inference: {st.session_state.full_data}"
            g_logger.debug(msg)
            print(msg)
            # TODO: add a link to more info on the model, next to the button.

            whale_classes = out['predictions'][:]
            # render images for the top 3 (that is what the model api returns)
            with tab_inference:
                st.markdown("## Species detected")
                for i in range(len(whale_classes)):
                    sw_wv.display_whale(whale_classes, i)
            
        

        
    # inside the hotdog tab, on button press we call a 2nd model (totally unrelated at present, just for demo
    # purposes, an hotdog image classifier) which will be run locally.
    # - this model predicts if the image is a hotdog or not, and returns probabilities
    # - the input image is the same as for the ceteacean classifier - defined in the sidebar

    if tab_hotdogs.button("Get Hotdog Prediction"):   
        
        pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
        tab_hotdogs.title("Hot Dog? Or Not?")

        if st.session_state.image is None:
            st.info("Please upload an image first.")
            st.info(str(observation.to_dict()))
            
        else:
            col1, col2 = tab_hotdogs.columns(2)

            # display the image (use cached version, no need to reread)
            col1.image(st.session_state.image, use_column_width=True)
            # and then run inference on the image
            predictions = pipeline(st.session_state.image)

            col2.header("Probabilities")
            first = True
            for p in predictions:
                col2.subheader(f"{ p['label'] }: { round(p['score'] * 100, 1)}%")
                if first:
                    st.session_state.full_data['predicted_class'] = p['label']
                    st.session_state.full_data['predicted_score'] = round(p['score'] * 100, 1)
                    first = False
            
            tab_hotdogs.write(f"Session Data: {json.dumps(st.session_state.full_data)}")