Spaces:
Sleeping
Sleeping
import logging | |
import os | |
import pandas as pd | |
import streamlit as st | |
import folium | |
from streamlit_folium import st_folium | |
from transformers import pipeline | |
from transformers import AutoModelForImageClassification | |
from maps.obs_map import add_obs_map_header | |
from classifier.classifier_image import add_classifier_header | |
from datasets import disable_caching | |
disable_caching() | |
import whale_gallery as gallery | |
import whale_viewer as viewer | |
from input.input_handling import setup_input, check_inputs_are_set | |
from input.input_handling import init_input_container_states, add_input_UI_elements, init_input_data_session_states | |
from input.input_handling import dbg_show_observation_hashes | |
from maps.alps_map import present_alps_map | |
from maps.obs_map import present_obs_map | |
from utils.st_logs import parse_log_buffer, init_logging_session_states | |
from utils.workflow_ui import refresh_progress_display, init_workflow_viz, init_workflow_session_states | |
from hf_push_observations import push_all_observations | |
from classifier.classifier_image import cetacean_just_classify, cetacean_show_results_and_review, cetacean_show_results, init_classifier_session_states | |
from classifier.classifier_hotdog import hotdog_classify | |
# setup for the ML model on huggingface (our wrapper) | |
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" | |
#classifier_revision = '0f9c15e2db4d64e7f622ade518854b488d8d35e6' | |
classifier_revision = 'main' # default/latest version | |
# and the dataset of observations (hf dataset in our space) | |
dataset_id = "Saving-Willy/temp_dataset" | |
data_files = "data/train-00000-of-00001.parquet" | |
USE_BASIC_MAP = False | |
DEV_SIDEBAR_LIB = True | |
# one toggle for all the extra debug text | |
if "MODE_DEV_STATEFUL" not in st.session_state: | |
st.session_state.MODE_DEV_STATEFUL = False | |
# get a global var for logger accessor in this module | |
LOG_LEVEL = logging.DEBUG | |
g_logger = logging.getLogger(__name__) | |
g_logger.setLevel(LOG_LEVEL) | |
st.set_page_config(layout="wide") | |
# initialise various session state variables | |
init_logging_session_states() # logging init should be early | |
init_workflow_session_states() | |
init_input_data_session_states() | |
init_input_container_states() | |
init_workflow_viz() | |
init_classifier_session_states() | |
def main() -> None: | |
""" | |
Main entry point to set up the streamlit UI and run the application. | |
The organisation is as follows: | |
1. observation input (a new observations) is handled in the sidebar | |
2. the rest of the interface is organised in tabs: | |
- cetean classifier | |
- hotdog classifier | |
- map to present the obersvations | |
- table of recent log entries | |
- gallery of whale images | |
The majority of the tabs are instantiated from modules. Currently the two | |
classifiers are still in-line here. | |
""" | |
g_logger.info("App started.") | |
g_logger.warning(f"[D] Streamlit version: {st.__version__}. Python version: {os.sys.version}") | |
#g_logger.debug("debug message") | |
#g_logger.info("info message") | |
#g_logger.warning("warning message") | |
# Streamlit app | |
tab_inference, tab_hotdogs, tab_map, tab_coords, tab_log, tab_gallery = \ | |
st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "*:gray[Dev:coordinates]*", "Log", "Beautiful cetaceans"]) | |
# put this early so the progress indicator is at the top (also refreshed at end) | |
refresh_progress_display() | |
# create a sidebar, and parse all the input (returned as `observations` object) | |
with st.sidebar: | |
# layout handling | |
add_input_UI_elements() | |
# input elements (file upload, text input, etc) | |
setup_input() | |
with tab_map: | |
# visual structure: a couple of toggles at the top, then the map inlcuding a | |
# dropdown for tileset selection. | |
add_obs_map_header() | |
tab_map_ui_cols = st.columns(2) | |
with tab_map_ui_cols[0]: | |
show_db_points = st.toggle("Show Points from DB", True) | |
with tab_map_ui_cols[1]: | |
dbg_show_extra = st.toggle("Show Extra points (test)", False) | |
if show_db_points: | |
# show a nicer map, observations marked, tileset selectable. | |
st_observation = present_obs_map( | |
dataset_id=dataset_id, data_files=data_files, | |
dbg_show_extra=dbg_show_extra) | |
else: | |
# development map. | |
st_observation = present_alps_map() | |
with tab_log: | |
handler = st.session_state['handler'] | |
if handler is not None: | |
records = parse_log_buffer(handler.buffer) | |
st.dataframe(records[::-1], use_container_width=True,) | |
st.info(f"Length of records: {len(records)}") | |
else: | |
st.error("⚠️ No log handler found!") | |
with tab_coords: | |
# the goal of this tab is to allow selection of the new obsvation's location by map click/adjust. | |
st.markdown("Coming later! :construction:") | |
st.markdown( | |
"""*The goal is to allow interactive definition for the coordinates of a new | |
observation, by click/drag points on the map.*""") | |
st.write("Click on the map to capture a location.") | |
#m = folium.Map(location=visp_loc, zoom_start=7) | |
mm = folium.Map(location=[39.949610, -75.150282], zoom_start=16) | |
folium.Marker( [39.949610, -75.150282], popup="Liberty Bell", tooltip="Liberty Bell" | |
).add_to(mm) | |
st_data2 = st_folium(mm, width=725) | |
st.write("below the map...") | |
if st_data2['last_clicked'] is not None: | |
print(st_data2) | |
st.info(st_data2['last_clicked']) | |
with tab_gallery: | |
# here we make a container to allow filtering css properties | |
# specific to the gallery (otherwise we get side effects) | |
tg_cont = st.container(key="swgallery") | |
with tg_cont: | |
gallery.render_whale_gallery(n_cols=4) | |
# state handling re data_entry phases | |
# 0. no data entered yet -> display the file uploader thing | |
# 1. we have some images, but not all the metadata fields are done -> validate button shown, disabled | |
# 2. all data entered -> validate button enabled | |
# 3. validation button pressed, validation done -> enable the inference button. | |
# - at this point do we also want to disable changes to the metadata selectors? | |
# anyway, simple first. | |
if st.session_state.workflow_fsm.is_in_state('doing_data_entry'): | |
# can we advance state? - only when all inputs are set for all uploaded files | |
all_inputs_set = check_inputs_are_set(debug=True, empty_ok=False) | |
if all_inputs_set: | |
st.session_state.workflow_fsm.complete_current_state() | |
# -> data_entry_complete | |
else: | |
# button, disabled; no state change yet. | |
st.sidebar.button(":gray[*Validate*]", disabled=True, help="Please fill in all fields.") | |
if st.session_state.workflow_fsm.is_in_state('data_entry_complete'): | |
# can we advance state? - only when the validate button is pressed | |
if st.sidebar.button(":white_check_mark:[**Validate**]"): | |
# create a dictionary with the submitted observation | |
tab_log.info(f"{st.session_state.observations}") | |
df = pd.DataFrame([obs.to_dict() for obs in st.session_state.observations.values()]) | |
#df = pd.DataFrame(st.session_state.observations, index=[0]) | |
with tab_coords: | |
st.table(df) | |
# there doesn't seem to be any actual validation here?? TODO: find validator function (each element is validated by the input box, but is there something at the whole image level?) | |
# hmm, maybe it should actually just be "I'm done with data entry" | |
st.session_state.workflow_fsm.complete_current_state() | |
# -> data_entry_validated | |
# state handling re inference phases (tab_inference) | |
# 3. validation button pressed, validation done -> enable the inference button. | |
# 4. inference button pressed -> ML started. | let's cut this one out, since it would only | |
# make sense if we did it as an async action | |
# 5. ML done -> show results, and manual validation options | |
# 6. manual validation done -> enable the upload buttons | |
# | |
with tab_inference: | |
# inside the inference tab, on button press we call the model (on huggingface hub) | |
# which will be run locally. | |
# - the model predicts the top 3 most likely species from the input image | |
# - these species are shown | |
# - the user can override the species prediction using the dropdown | |
# - an observation is uploaded if the user chooses. | |
if st.session_state.MODE_DEV_STATEFUL: | |
dbg_show_observation_hashes() | |
add_classifier_header() | |
# if we are before data_entry_validated, show the button, disabled. | |
if not st.session_state.workflow_fsm.is_in_state_or_beyond('data_entry_validated'): | |
tab_inference.button(":gray[*Identify with cetacean classifier*]", disabled=True, | |
help="Please validate inputs before proceeding", | |
key="button_infer_ceteans") | |
if st.session_state.workflow_fsm.is_in_state('data_entry_validated'): | |
# show the button, enabled. If pressed, we start the ML model (And advance state) | |
if tab_inference.button("Identify with cetacean classifier", | |
key="button_infer_ceteans"): | |
cetacean_classifier = AutoModelForImageClassification.from_pretrained( | |
"Saving-Willy/cetacean-classifier", | |
revision=classifier_revision, | |
trust_remote_code=True) | |
cetacean_just_classify(cetacean_classifier) | |
st.session_state.workflow_fsm.complete_current_state() | |
# trigger a refresh too (refreshhing the prog indicator means the script reruns and | |
# we can enter the next state - visualising the results / review) | |
# ok it doesn't if done programmatically. maybe interacting with teh button? check docs. | |
refresh_progress_display() | |
#TODO: validate this doesn't harm performance adversely. | |
st.rerun() | |
elif st.session_state.workflow_fsm.is_in_state('ml_classification_completed'): | |
# show the results, and allow manual validation | |
st.markdown("""### Inference results and manual validation/adjustment """) | |
if st.session_state.MODE_DEV_STATEFUL: | |
s = "" | |
for k, v in st.session_state.whale_prediction1.items(): | |
s += f"* Image {k}: {v}\n" | |
st.markdown(s) | |
# add a button to advance the state | |
if st.button("Confirm species predictions", help="Confirm that all species are selected correctly"): | |
st.session_state.workflow_fsm.complete_current_state() | |
# -> manual_inspection_completed | |
st.rerun() | |
cetacean_show_results_and_review() | |
elif st.session_state.workflow_fsm.is_in_state('manual_inspection_completed'): | |
# show the ML results, and allow the user to upload the observation | |
st.markdown("""### Inference Results (after manual validation) """) | |
if st.button("Upload all observations to THE INTERNET!"): | |
# let this go through to the push_all func, since it just reports to log for now. | |
push_all_observations(enable_push=False) | |
st.session_state.workflow_fsm.complete_current_state() | |
# -> data_uploaded | |
st.rerun() | |
cetacean_show_results() | |
elif st.session_state.workflow_fsm.is_in_state('data_uploaded'): | |
# the data has been sent. Lets show the observations again | |
# but no buttons to upload (or greyed out ok) | |
st.markdown("""### Observation(s) uploaded - thank you!""") | |
cetacean_show_results() | |
st.divider() | |
#df = pd.DataFrame(st.session_state.observations, index=[0]) | |
df = pd.DataFrame([obs.to_dict() for obs in st.session_state.observations.values()]) | |
st.table(df) | |
# didn't decide what the next state is here - I think we are in the terminal state. | |
#st.session_state.workflow_fsm.complete_current_state() | |
# inside the hotdog tab, on button press we call a 2nd model (totally unrelated at present, just for demo | |
# purposes, an hotdog image classifier) which will be run locally. | |
# - this model predicts if the image is a hotdog or not, and returns probabilities | |
# - the input image is the same as for the ceteacean classifier - defined in the sidebar | |
tab_hotdogs.title("Hot Dog? Or Not?") | |
tab_hotdogs.write(""" | |
*Run alternative classifer on input images. Here we are using | |
a binary classifier - hotdog or not - from | |
huggingface.co/julien-c/hotdog-not-hotdog.*""") | |
if tab_hotdogs.button("Get Hotdog Prediction"): | |
pipeline_hot_dog = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog") | |
if st.session_state.image is None: | |
st.info("Please upload an image first.") | |
#st.info(str(observations.to_dict())) | |
else: | |
hotdog_classify(pipeline_hot_dog, tab_hotdogs) | |
# after all other processing, we can show the stage/state | |
refresh_progress_display() | |
if __name__ == "__main__": | |
main() | |