Spaces:
Sleeping
Sleeping
File size: 17,904 Bytes
f8bf7d4 90b0271 f8bf7d4 0e8c927 90b0271 f8bf7d4 4854d2c 3b7d130 0e8c927 00bdefd 0e8c927 00bdefd 4d0f7fd 0e8c927 f8bf7d4 b582a0e f8bf7d4 f22fcc7 f8bf7d4 0e8c927 1c0e2a5 00bdefd 0e8c927 f8bf7d4 0e8c927 f8bf7d4 7a5f0ca f8bf7d4 7a5f0ca f8bf7d4 00bdefd 6a02bc4 00bdefd 4854d2c 00bdefd 4854d2c 00bdefd fd18838 6a02bc4 fd18838 6a02bc4 fd18838 f8bf7d4 c3a2524 0e8c927 c3a2524 f8bf7d4 5c7e462 f8bf7d4 e3408e4 f8bf7d4 4854d2c 00bdefd f8bf7d4 54319e9 fd18838 6a02bc4 fd18838 f8bf7d4 54319e9 0e8c927 f8bf7d4 0e8c927 f8bf7d4 0e8c927 f8bf7d4 4854d2c f8bf7d4 0e8c927 f8bf7d4 0e8c927 f8bf7d4 0e8c927 f8bf7d4 e3408e4 f8bf7d4 e3408e4 f8bf7d4 0e8c927 f8bf7d4 4854d2c 3eaf0a5 4854d2c 00bdefd 4854d2c 4d0f7fd 00bdefd 4854d2c fd18838 4854d2c 4d0f7fd 4854d2c 4d0f7fd 6a02bc4 4854d2c 4d0f7fd 4854d2c 4d0f7fd 4854d2c 4d0f7fd 4854d2c 4d0f7fd 4854d2c 4d0f7fd 4854d2c 00bdefd f8bf7d4 0e8c927 4854d2c f8bf7d4 e3408e4 f8bf7d4 4854d2c f8bf7d4 e3408e4 f8bf7d4 4f11b2f f8bf7d4 0e8c927 f8bf7d4 0e8c927 f8bf7d4 00bdefd c3a2524 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 |
import logging
import os
import pandas as pd
import streamlit as st
import folium
from streamlit_folium import st_folium
from transformers import pipeline
from transformers import AutoModelForImageClassification
from maps.obs_map import add_header_text as add_obs_map_header
from classifier.classifier_image import add_header_text as add_classifier_header
from datasets import disable_caching
disable_caching()
import whale_gallery as gallery
import whale_viewer as viewer
from input.input_handling import setup_input, check_inputs_are_set
from maps.alps_map import present_alps_map
from maps.obs_map import present_obs_map
from utils.st_logs import setup_logging, parse_log_buffer
from utils.workflow_state import WorkflowFSM, FSM_STATES
#from classifier.classifier_image import cetacean_classify
from classifier.classifier_image import cetacean_just_classify, cetacean_show_results_and_review, cetacean_show_results
from classifier.classifier_hotdog import hotdog_classify
# setup for the ML model on huggingface (our wrapper)
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
#classifier_revision = '0f9c15e2db4d64e7f622ade518854b488d8d35e6'
classifier_revision = 'main' # default/latest version
# and the dataset of observations (hf dataset in our space)
dataset_id = "Saving-Willy/temp_dataset"
data_files = "data/train-00000-of-00001.parquet"
USE_BASIC_MAP = False
DEV_SIDEBAR_LIB = True
# get a global var for logger accessor in this module
LOG_LEVEL = logging.DEBUG
g_logger = logging.getLogger(__name__)
g_logger.setLevel(LOG_LEVEL)
st.set_page_config(layout="wide")
# initialise various session state variables
if "handler" not in st.session_state:
st.session_state['handler'] = setup_logging()
if "image_hashes" not in st.session_state:
st.session_state.image_hashes = []
# TODO: ideally just use image_hashes, but need a unique key for the ui elements
# to track the user input phase; and these are created before the hash is generated.
if "image_filenames" not in st.session_state:
st.session_state.image_filenames = []
if "observations" not in st.session_state:
st.session_state.observations = {}
if "images" not in st.session_state:
st.session_state.images = {}
if "files" not in st.session_state:
st.session_state.files = {}
if "public_observation" not in st.session_state:
st.session_state.public_observation = {}
if "classify_whale_done" not in st.session_state:
st.session_state.classify_whale_done = {}
if "whale_prediction1" not in st.session_state:
st.session_state.whale_prediction1 = {}
if "tab_log" not in st.session_state:
st.session_state.tab_log = None
if "workflow_fsm" not in st.session_state:
# create and init the state machine
st.session_state.workflow_fsm = WorkflowFSM(FSM_STATES)
if "container_per_file_input_elems" not in st.session_state:
st.session_state.container_per_file_input_elems = None
if "container_file_uploader" not in st.session_state:
st.session_state.container_file_uploader = None
if "container_metadata_inputs" not in st.session_state:
st.session_state.container_metadata_inputs = None
def refresh_progress():
with st.sidebar:
tot = st.session_state.workflow_fsm.num_states - 1
cur_i = st.session_state.workflow_fsm.current_state_index
cur_t = st.session_state.workflow_fsm.current_state
st.session_state.disp_progress[0].markdown(f"*Progress: {cur_i}/{tot}. Current: {cur_t}.*")
st.session_state.disp_progress[1].progress(cur_i/tot)
# add progress indicator to session_state
if "progress" not in st.session_state:
with st.sidebar:
st.session_state.disp_progress = [st.empty(), st.empty()]
# add button to sidebar, with the callback to refesh_progress
st.sidebar.button("Refresh Progress", on_click=refresh_progress)
def dbg_show_obs_hashes():
# a debug: we seem to be losing the whale classes?
st.write(f"[D] num observations: {len(st.session_state.observations)}")
s = ""
for hash in st.session_state.observations.keys():
obs = st.session_state.observations[hash]
s += f"- [D] observation {hash} ({obs._inst_id}) has {len(obs.top_predictions)} predictions\n"
#st.markdown(f"- [D] observation {hash} has {len(st.session_state.observations[hash].top_predictions)} predictions")
st.markdown(s)
def main() -> None:
"""
Main entry point to set up the streamlit UI and run the application.
The organisation is as follows:
1. observation input (a new observations) is handled in the sidebar
2. the rest of the interface is organised in tabs:
- cetean classifier
- hotdog classifier
- map to present the obersvations
- table of recent log entries
- gallery of whale images
The majority of the tabs are instantiated from modules. Currently the two
classifiers are still in-line here.
"""
g_logger.info("App started.")
g_logger.warning(f"[D] Streamlit version: {st.__version__}. Python version: {os.sys.version}")
#g_logger.debug("debug message")
#g_logger.info("info message")
#g_logger.warning("warning message")
# Streamlit app
tab_inference, tab_hotdogs, tab_map, tab_coords, tab_log, tab_gallery = \
st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "*:gray[Dev:coordinates]*", "Log", "Beautiful cetaceans"])
st.session_state.tab_log = tab_log
# put this early so the progress indicator is at the top (also refreshed at end)
refresh_progress()
# create a sidebar, and parse all the input (returned as `observations` object)
with st.sidebar:
st.divider()
st.markdown('<style>.st-key-container_file_uploader_id { border: 1px solid skyblue; border-radius: 5px; }</style>', unsafe_allow_html=True)
container_file_uploader = st.container(border=True, key="container_file_uploader_id")
st.session_state.container_file_uploader = container_file_uploader
st.markdown('<style>.st-key-container_metadata_inputs_id { border: 1px solid lightgreen; border-radius: 5px; }</style>', unsafe_allow_html=True)
container_metadata_inputs = st.container(border=True, key="container_metadata_inputs_id")
container_metadata_inputs.write("Metadata Inputs... wait for file upload ")
st.session_state.container_metadata_inputs = container_metadata_inputs
setup_input(viewcontainer=st.sidebar)
if 0:## WIP
# goal of this code is to allow the user to override the ML prediction, before transmitting an observations
predicted_class = st.sidebar.selectbox("Predicted Class", viewer.WHALE_CLASSES)
override_prediction = st.sidebar.checkbox("Override Prediction")
if override_prediction:
overridden_class = st.sidebar.selectbox("Override Class", viewer.WHALE_CLASSES)
st.session_state.observations['class_overriden'] = overridden_class
else:
st.session_state.observations['class_overriden'] = None
with tab_map:
# visual structure: a couple of toggles at the top, then the map inlcuding a
# dropdown for tileset selection.
add_obs_map_header()
tab_map_ui_cols = st.columns(2)
with tab_map_ui_cols[0]:
show_db_points = st.toggle("Show Points from DB", True)
with tab_map_ui_cols[1]:
dbg_show_extra = st.toggle("Show Extra points (test)", False)
if show_db_points:
# show a nicer map, observations marked, tileset selectable.
st_observation = present_obs_map(
dataset_id=dataset_id, data_files=data_files,
dbg_show_extra=dbg_show_extra)
else:
# development map.
st_observation = present_alps_map()
with tab_log:
handler = st.session_state['handler']
if handler is not None:
records = parse_log_buffer(handler.buffer)
st.dataframe(records[::-1], use_container_width=True,)
st.info(f"Length of records: {len(records)}")
else:
st.error("⚠️ No log handler found!")
with tab_coords:
# the goal of this tab is to allow selection of the new obsvation's location by map click/adjust.
st.markdown("Coming later! :construction:")
st.markdown(
f"""*The goal is to allow interactive definition for the coordinates of a new
observation, by click/drag points on the map.*""")
st.write("Click on the map to capture a location.")
#m = folium.Map(location=visp_loc, zoom_start=7)
mm = folium.Map(location=[39.949610, -75.150282], zoom_start=16)
folium.Marker( [39.949610, -75.150282], popup="Liberty Bell", tooltip="Liberty Bell"
).add_to(mm)
st_data2 = st_folium(mm, width=725)
st.write("below the map...")
if st_data2['last_clicked'] is not None:
print(st_data2)
st.info(st_data2['last_clicked'])
with tab_gallery:
# here we make a container to allow filtering css properties
# specific to the gallery (otherwise we get side effects)
tg_cont = st.container(key="swgallery")
with tg_cont:
gallery.render_whale_gallery(n_cols=4)
# state handling re data_entry phases
# 0. no data entered yet -> display the file uploader thing
# 1. we have some images, but not all the metadata fields are done -> validate button shown, disabled
# 2. all data entered -> validate button enabled
# 3. validation button pressed, validation done -> enable the inference button.
# - at this point do we also want to disable changes to the metadata selectors?
# anyway, simple first.
if st.session_state.workflow_fsm.is_in_state('doing_data_entry'):
# can we advance state? - only when all inputs are set for all uploaded files
all_inputs_set = check_inputs_are_set(debug=True, empty_ok=False)
if all_inputs_set:
st.session_state.workflow_fsm.complete_current_state()
# -> data_entry_complete
else:
# button, disabled; no state change yet.
st.sidebar.button(":gray[*Validate*]", disabled=True, help="Please fill in all fields.")
if st.session_state.workflow_fsm.is_in_state('data_entry_complete'):
# can we advance state? - only when the validate button is pressed
if st.sidebar.button(":white_check_mark:[**Validate**]"):
# create a dictionary with the submitted observation
tab_log.info(f"{st.session_state.observations}")
df = pd.DataFrame(st.session_state.observations, index=[0])
with tab_coords:
st.table(df)
# there doesn't seem to be any actual validation here?? TODO: find validator function (each element is validated by the input box, but is there something at the whole image level?)
# hmm, maybe it should actually just be "I'm done with data entry"
st.session_state.workflow_fsm.complete_current_state()
# -> data_entry_validated
# state handling re inference phases (tab_inference)
# 3. validation button pressed, validation done -> enable the inference button.
# 4. inference button pressed -> ML started. | let's cut this one out, since it would only
# make sense if we did it as an async action
# 5. ML done -> show results, and manual validation options
# 6. manual validation done -> enable the upload buttons
#
with tab_inference:
dbg_show_obs_hashes()
add_classifier_header()
# if we are before data_entry_validated, show the button, disabled.
if not st.session_state.workflow_fsm.is_in_state_or_beyond('data_entry_validated'):
tab_inference.button(":gray[*Identify with cetacean classifier*]", disabled=True,
help="Please validate inputs before proceeding",
key="button_infer_ceteans")
if st.session_state.workflow_fsm.is_in_state('data_entry_validated'):
# show the button, enabled. If pressed, we start the ML model (And advance state)
if tab_inference.button("Identify with cetacean classifier"):
cetacean_classifier = AutoModelForImageClassification.from_pretrained(
"Saving-Willy/cetacean-classifier",
revision=classifier_revision,
trust_remote_code=True)
cetacean_just_classify(cetacean_classifier)
st.session_state.workflow_fsm.complete_current_state()
# trigger a refresh too (refreshhing the prog indicator means the script reruns and
# we can enter the next state - visualising the results / review)
# ok it doesn't if done programmatically. maybe interacting with teh button? check docs.
refresh_progress()
#TODO: validate this doesn't harm performance adversely.
st.rerun()
elif st.session_state.workflow_fsm.is_in_state('ml_classification_completed'):
# show the results, and allow manual validation
s = ""
for k, v in st.session_state.whale_prediction1.items():
s += f"* Image {k}: {v}\n"
st.markdown("""
### Inference Results and manual validation/adjustment
:construction: for now we just show the num images processed.
""")
st.markdown(s)
# add a button to advance the state
if st.button("mock: manual validation done."):
st.session_state.workflow_fsm.complete_current_state()
# -> manual_inspection_completed
cetacean_show_results_and_review()
elif st.session_state.workflow_fsm.is_in_state('manual_inspection_completed'):
# show the ML results, and allow the user to upload the observation
st.markdown("""
### Inference Results (after manual validation)
:construction: for now we just show the button.
""")
if st.button("(nooop) Upload observation to THE INTERNET!"):
st.session_state.workflow_fsm.complete_current_state()
# -> data_uploaded
cetacean_show_results()
elif st.session_state.workflow_fsm.is_in_state('data_uploaded'):
# the data has been sent. Lets show the observations again
# but no buttons to upload (or greyed out ok)
st.markdown("""
### Observation(s) uploaded
:construction: for now we just show the observations.
""")
df = pd.DataFrame(st.session_state.observations, index=[0])
st.table(df)
# didn't decide what the next state is here - I think we are in the terminal state.
#st.session_state.workflow_fsm.complete_current_state()
# inside the inference tab, on button press we call the model (on huggingface hub)
# which will be run locally.
# - the model predicts the top 3 most likely species from the input image
# - these species are shown
# - the user can override the species prediction using the dropdown
# - an observation is uploaded if the user chooses.
# with tab_inference:
# add_classifier_header()
# if tab_inference.button("Identify with cetacean classifier"):
# #pipe = pipeline("image-classification", model="Saving-Willy/cetacean-classifier", trust_remote_code=True)
# cetacean_classifier = AutoModelForImageClassification.from_pretrained("Saving-Willy/cetacean-classifier",
# revision=classifier_revision,
# trust_remote_code=True)
# if st.session_state.images is None:
# # TODO: cleaner design to disable the button until data input done?
# st.info("Please upload an image first.")
# else:
# cetacean_classify(cetacean_classifier)
# inside the hotdog tab, on button press we call a 2nd model (totally unrelated at present, just for demo
# purposes, an hotdog image classifier) which will be run locally.
# - this model predicts if the image is a hotdog or not, and returns probabilities
# - the input image is the same as for the ceteacean classifier - defined in the sidebar
tab_hotdogs.title("Hot Dog? Or Not?")
tab_hotdogs.write("""
*Run alternative classifer on input images. Here we are using
a binary classifier - hotdog or not - from
huggingface.co/julien-c/hotdog-not-hotdog.*""")
if tab_hotdogs.button("Get Hotdog Prediction"):
pipeline_hot_dog = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
if st.session_state.image is None:
st.info("Please upload an image first.")
#st.info(str(observations.to_dict()))
else:
hotdog_classify(pipeline_hot_dog, tab_hotdogs)
# after all other processing, we can show the stage/state
refresh_progress()
if __name__ == "__main__":
main()
|