Spaces:

Saving-Willy
/

saving-willy-dev

Sleeping

rmm commited on Jan 25

Commit

5a21040

1 Parent(s): 80b4be6

Revert "feat: first implementation of an FSM to keep track of phase"

This reverts commit 80b4be61b16c215c79aba50f7e39fde2bfc81755.

- I learned what I needed to but I don't like the FSM implementation,
and I created plenty of mess in main that doesn't need to remain.

--> reverting.

Files changed (4) hide show

requirements.txt +0 -4
src/classifier/classifier_image.py +1 -105
src/main.py +16 -102
src/utils/workflow_state.py +0 -121

requirements.txt CHANGED Viewed

@@ -10,10 +10,6 @@ streamlit_folium==0.23.1
 # backend
 datasets==3.0.2
-# - FSM
-transitions==0.9.2
-# optional, dev for the FSM (diagrams)
-# pyperclip==1.9.0
 # running ML models

 # backend
 datasets==3.0.2
 # running ML models

src/classifier/classifier_image.py CHANGED Viewed

@@ -11,111 +11,7 @@ from hf_push_observations import push_observations
 from utils.grid_maker import gridder
 from utils.metadata_handler import metadata2md
-# need to divide this into two functions, one for the classification and one for the display
-# it is currently somewhat interleaved, not totally clear how to separate them.
-# perhaps we have more stages than I realised.
-# ML started, ML completed, manual review completed, data uploaded
-# for now, let's implement the division between ML classification, and display+manual review.
-def cetacean_classify_list(cetacean_classifier):
-    success = False
-    files = st.session_state.files
-    images = st.session_state.images
-    observations = st.session_state.observations
-    #batch_size, row_size, page = gridder(files)
-    #grid = st.columns(row_size)
-    #col = 0
-    for file in files:
-        key = file.name
-        image = images[key]
-        observation = observations[key].to_dict()
-        # run classifier model on `image`, and persistently store the output
-        out = cetacean_classifier(image) # get top 3 matches
-        st.session_state.whale_prediction1[key] = out['predictions'][0]
-        st.session_state.classify_whale_done[key] = True # TODO 25.01 unclear what this is for;
-        msg = f"[D]2 classify_whale_done: {st.session_state.classify_whale_done[key]}, whale_prediction1: {st.session_state.whale_prediction1[key]}"
-        g_logger.info(msg)
-        observations[key].set_top_predictions(out['predictions'][:])
-        st.session_state.public_observation[key] = observation #
-        msg = f"[D] full observation after inference: {observation}"
-        g_logger.debug(msg)
-        print(msg)
-    # TODO: add some mech to test if it was successful.
-    success = True
-    st.balloons()
-    return success
-def cetacean_show_classifications():
-    st.write("TOP TEXT")
-    st.write("Reviewing the classifications :construction:")
-    files = st.session_state.files
-    images = st.session_state.images
-    observations = st.session_state.observations
-    batch_size, row_size, page = gridder(files)
-    grid = st.columns(row_size)
-    col = 0
-    for file in files:
-        key = file.name
-        image = images[key]
-        with grid[col]:
-            st.image(image, use_column_width=True)
-            observation = observations[key].to_dict()
-            # fetch the classification results
-            # run classifier model on `image`, and persistently store the output
-            msg = f"[D]2b classify_whale_done ({file}): {st.session_state.classify_whale_done[key]}, whale_prediction1: {st.session_state.whale_prediction1[key]}"
-            g_logger.info(msg)
-            # dropdown for selecting/overriding the species prediction
-            # TODO: the "it's done" flag seems to get reset when we re-load the tab. Not quite right.
-            if not st.session_state.classify_whale_done[key]:
-                #selected_class = st.sidebar.selectbox("Species", viewer.WHALE_CLASSES,
-                # TODO: ask LV why it is in the sidebar, and not in the grid
-                selected_class = st.selectbox("Species", viewer.WHALE_CLASSES,
-                                            index=None, placeholder="Species not yet identified...",
-                                            disabled=True, key=f"cldd_{key}")
-            else:
-                pred1 = st.session_state.whale_prediction1[key]
-                # get index of pred1 from WHALE_CLASSES, none if not present
-                print(f"[D] pred1: {pred1}")
-                ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
-                selected_class = st.selectbox(f"Species for {file.name}", viewer.WHALE_CLASSES, index=ix)
-            observation['predicted_class'] = selected_class
-            if selected_class != st.session_state.whale_prediction1[key]:
-                observation['class_overriden'] = selected_class
-            st.session_state.public_observation = observation
-            st.button(f"Upload observation for {file.name} to THE INTERNET!", on_click=push_observations)
-            # TODO: the metadata only fills properly if `validate` was clicked.
-            st.markdown(metadata2md())
-            msg = f"[D] full observation after inference: {observation}"
-            g_logger.debug(msg)
-            print(msg)
-            # TODO: add a link to more info on the model, next to the button.
-            whale_classes = observations[key].top_predictions
-            # render images for the top 3 (that is what the model api returns)
-            n = len(whale_classes)
-            st.markdown(f"Top {n} Predictions for {file.name}")
-            for i in range(n):
-                viewer.display_whale(whale_classes, i)
-        col = (col + 1) % row_size
-    return True
-def cetacean_classify_and_review(cetacean_classifier):
     files = st.session_state.files
     images = st.session_state.images
     observations = st.session_state.observations

 from utils.grid_maker import gridder
 from utils.metadata_handler import metadata2md
+def cetacean_classify(cetacean_classifier):
     files = st.session_state.files
     images = st.session_state.images
     observations = st.session_state.observations

src/main.py CHANGED Viewed

@@ -17,11 +17,8 @@ import whale_viewer as viewer
 from input.input_handling import setup_input
 from maps.alps_map import present_alps_map
 from maps.obs_map import present_obs_map
-from maps.obs_map import add_header_text as add_header_text_obs_map
 from utils.st_logs import setup_logging, parse_log_buffer
-from utils.workflow_state import WorkflowFSM, WorkflowState
-from classifier.classifier_image import cetacean_classify_and_review, cetacean_classify_list, cetacean_show_classifications
 from classifier.classifier_hotdog import hotdog_classify
@@ -60,31 +57,14 @@ if "public_observation" not in st.session_state:
     st.session_state.public_observation = {}
 if "classify_whale_done" not in st.session_state:
-    st.session_state.classify_whale_done = {}
 if "whale_prediction1" not in st.session_state:
-    st.session_state.whale_prediction1 = {}
 if "tab_log" not in st.session_state:
     st.session_state.tab_log = None
-if "workflow_fsm" not in st.session_state:
-    # create and init the state machine
-    st.session_state.workflow_fsm = WorkflowFSM()
-# add progress indicator to session_state
-if "progress" not in st.session_state:
-    with st.sidebar:
-        st.session_state.disp_progress = [st.empty(), st.empty()]
-def refresh_progress():
-    with st.sidebar:
-        tot = st.session_state.workflow_fsm.num_states
-        cur_i = st.session_state.workflow_fsm.state_number
-        cur_t = st.session_state.workflow_fsm.state_name
-        st.session_state.disp_progress[0].markdown(f"*Progress: {cur_i}/{tot}. Current: {cur_t}.*")
-        st.session_state.disp_progress[1].progress(cur_i/tot)
 def main() -> None:
     """
@@ -118,15 +98,11 @@ def main() -> None:
         st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "*:gray[Dev:coordinates]*", "Log", "Beautiful cetaceans"])
     st.session_state.tab_log = tab_log
-    refresh_progress()
-    # add button to sidebar, with the callback to refesh_progress
-    st.sidebar.button("Refresh Progress", on_click=refresh_progress)
     # create a sidebar, and parse all the input (returned as `observations` object)
     observations = setup_input(viewcontainer=st.sidebar)
     if 0:## WIP
         # goal of this code is to allow the user to override the ML prediction, before transmitting an observations
         predicted_class = st.sidebar.selectbox("Predicted Class", viewer.WHALE_CLASSES)
@@ -142,7 +118,7 @@ def main() -> None:
     with tab_map:
         # visual structure: a couple of toggles at the top, then the map inlcuding a
         # dropdown for tileset selection.
-        add_header_text_obs_map()
         tab_map_ui_cols = st.columns(2)
         with tab_map_ui_cols[0]:
             show_db_points = st.toggle("Show Points from DB", True)
@@ -202,14 +178,9 @@ def main() -> None:
     # Display submitted observation
     if st.sidebar.button("Validate"):
-        # TODO 25.01 - it seems unclear what validation is actually happening *after* the button click.
         # create a dictionary with the submitted observation
         submitted_data = observations
         st.session_state.observations = observations
-        # advance two steps, since the code for enabling the validate button is in a different branch right now
-        st.session_state.workflow_fsm.advance() # init => data_entry_complete
-        st.session_state.workflow_fsm.advance() # data_entry_complete => data_entry_validated
         tab_log.info(f"{st.session_state.observations}")
@@ -231,74 +202,20 @@ def main() -> None:
                 Once inference is complete, the top three predictions are shown.
                 You can override the prediction by selecting a species from the dropdown.*""")
-    with tab_inference:
-        # test if the fsm is already at a point where results should be presented
-        cur_state_i = st.session_state.workflow_fsm.state_number
-        # here, if past manual inspection, we show the results
-        # elif past ml_completed, we show the results and the choice to manually validate
-        # else, we run the classifier (and show the results)
-        plan = "?"
-        if cur_state_i >= WorkflowState.MANUAL_REVIEW_COMPLETE.value:
-            plan = "show results"
-        elif cur_state_i >= WorkflowState.ML_COMPLETED.value:
-            plan = "present manual validation (with results shown)"
-        elif cur_state_i >= WorkflowState.DATA_VALIDATED.value:
-            plan = "run classifier"
-        st.info(f"Current state: {cur_state_i} [{WorkflowState.ML_COMPLETED.value}]. Plan: {plan}")
-        if plan == 'run classifier':
-            if tab_inference.button("Identify with cetacean classifier"):
-                #pipe = pipeline("image-classification", model="Saving-Willy/cetacean-classifier", trust_remote_code=True)
-                cetacean_classifier = AutoModelForImageClassification.from_pretrained("Saving-Willy/cetacean-classifier",
-                                                                                    revision=classifier_revision,
-                                                                                    trust_remote_code=True)
-                r = cetacean_classify_list(cetacean_classifier)
-                if r:
-                    st.session_state.workflow_fsm.advance() # data_entry_validated => ml_classification_started
-                    refresh_progress()
-                #cetacean_classify_and_review(cetacean_classifier)
-                # now, we can trigger the next state, which is the manual review of the classifications
-                st.write(f"megatextc {cur_state_i}")
-                r = cetacean_show_classifications()
-                if r:
-                    st.session_state.workflow_fsm.advance() # ml_classification_completed => manual_inspection_completed
-                    refresh_progress()
-        elif plan == 'present manual validation (with results shown)':
-            # show the results and the choice to manually validate
-                st.write(f"megatexta {cur_state_i}")
-                r = cetacean_show_classifications()
-                if r:
-                    st.session_state.workflow_fsm.advance() # ml_classification_completed => manual_inspection_completed
-        elif plan == 'show results':
-            r = cetacean_show_classifications()
-            # just showing it, no advance.
-        if 0:
-            if cur_state_i >= WorkflowState.ML_COMPLETED.value:
-                # ML DONE, let's show it
-                with tab_inference:
-                    st.write(f"megatexta {cur_state_i}")
-                    r = cetacean_show_classifications()
-                    if r:
-                        st.session_state.workflow_fsm.advance() # ml_classification_completed => manual_inspection_completed
-            else:
-                with tab_inference:
-                    st.write(f"megatextb {cur_state_i}")
-                # st.session_state.workflow_fsm.advance() # init => data_entry_complete
-                if st.session_state.images is None: # TODO: with FSM we check a state, not just images.
-                    # TODO: cleaner design to disable the button until data input done?
-                    st.info("Please upload an image first.")
-                else:
-                    pass
     # inside the hotdog tab, on button press we call a 2nd model (totally unrelated at present, just for demo
@@ -323,9 +240,6 @@ def main() -> None:
             hotdog_classify(pipeline_hot_dog, tab_hotdogs)
-    # after all other processing, we can show the stage/state
-    refresh_progress()
 if __name__ == "__main__":
     main()

 from input.input_handling import setup_input
 from maps.alps_map import present_alps_map
 from maps.obs_map import present_obs_map
 from utils.st_logs import setup_logging, parse_log_buffer
+from classifier.classifier_image import cetacean_classify
 from classifier.classifier_hotdog import hotdog_classify
     st.session_state.public_observation = {}
 if "classify_whale_done" not in st.session_state:
+    st.session_state.classify_whale_done = False
 if "whale_prediction1" not in st.session_state:
+    st.session_state.whale_prediction1 = None
 if "tab_log" not in st.session_state:
     st.session_state.tab_log = None
 def main() -> None:
     """
         st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "*:gray[Dev:coordinates]*", "Log", "Beautiful cetaceans"])
     st.session_state.tab_log = tab_log
     # create a sidebar, and parse all the input (returned as `observations` object)
     observations = setup_input(viewcontainer=st.sidebar)
     if 0:## WIP
         # goal of this code is to allow the user to override the ML prediction, before transmitting an observations
         predicted_class = st.sidebar.selectbox("Predicted Class", viewer.WHALE_CLASSES)
     with tab_map:
         # visual structure: a couple of toggles at the top, then the map inlcuding a
         # dropdown for tileset selection.
+        sw_map.add_header_text()
         tab_map_ui_cols = st.columns(2)
         with tab_map_ui_cols[0]:
             show_db_points = st.toggle("Show Points from DB", True)
     # Display submitted observation
     if st.sidebar.button("Validate"):
         # create a dictionary with the submitted observation
         submitted_data = observations
         st.session_state.observations = observations
         tab_log.info(f"{st.session_state.observations}")
                 Once inference is complete, the top three predictions are shown.
                 You can override the prediction by selecting a species from the dropdown.*""")
+    if tab_inference.button("Identify with cetacean classifier"):
+        #pipe = pipeline("image-classification", model="Saving-Willy/cetacean-classifier", trust_remote_code=True)
+        cetacean_classifier = AutoModelForImageClassification.from_pretrained("Saving-Willy/cetacean-classifier",
+                                                                            revision=classifier_revision,
+                                                                            trust_remote_code=True)
+        if st.session_state.images is None:
+            # TODO: cleaner design to disable the button until data input done?
+            st.info("Please upload an image first.")
+        else:
+            cetacean_classify(cetacean_classifier)
     # inside the hotdog tab, on button press we call a 2nd model (totally unrelated at present, just for demo
             hotdog_classify(pipeline_hot_dog, tab_hotdogs)
 if __name__ == "__main__":
     main()

src/utils/workflow_state.py DELETED Viewed

@@ -1,121 +0,0 @@
-from transitions import Machine
-from enum import Enum
-OKBLUE = '\033[94m'
-OKGREEN = '\033[92m'
-OKCYAN = '\033[96m'
-FAIL = '\033[91m'
-ENDC = '\033[0m'
-# define the states
-# 0. init
-# 1. data entry complete
-# 2. data entry validated
-# 3. ML classification started (can be long running on batch)
-# 4. ML classification completed
-# 5. manual inspection / adjustment of classification completed
-# 6. data uploaded
-states = ['init', 'data_entry_complete', 'data_entry_validated', 'ml_classification_started', 'ml_classification_completed', 'manual_inspection_completed', 'data_uploaded']
-# define an enum for the states, automatically giving integers according to the position in the list
-# - this is useful for the transitions
-# maybe this needs to use setattr or similar
-workflow_phases = Enum('StateEnum', {state: i for i, state in enumerate(states)})
-class WorkflowState(Enum):
-    INIT = 0
-    DATA_ENTRY_COMPLETE = 1
-    DATA_VALIDATED = 2
-    #ML_STARTED = 3
-    ML_COMPLETED = 3
-    MANUAL_REVIEW_COMPLETE = 4
-    UPLOADED = 5
-# TODO: refactor the FSM to have explicit named states, and write a helper function to determine the next state and advance to it.
-# this allows either triggering by name, or being a bit lazy and saying "advance" and it will go to the next state..
-# maybe a cleaner way is to say completed('X') and then whatever the next state from X is can be taken. Instead of knowing
-# what the next state is (becausee that was supposed to be defined her in the specification, and not in each phase)
-#
-# also add a "did we pass stage X" function, by name. This will make it easy to choose what to present, what actions to do next etc.
-class WorkflowFSM:
-    def __init__(self):
-        # Define states as strings (transitions requirement)
-        self.states = [state.name for state in WorkflowState]
-        # TODO: what is the point of the enum? I can just take the list and do an enumerate on it.??
-        # Create state machine
-        self.machine = Machine(
-            model=self,
-            states=self.states,
-            initial=WorkflowState.INIT.name,
-        )
-        # Add transitions for each state to the next state
-        for i in range(len(self.states) - 1):
-            self.machine.add_transition(
-                trigger='advance',
-                source=self.states[i],
-                dest=self.states[i + 1]
-            )
-        # Add reset transition
-        self.machine.add_transition(
-            trigger='reset',
-            source='*',
-            dest=WorkflowState.INIT.name
-        )
-        # Add callbacks for logging
-        self.machine.before_state_change = self._log_transition
-        self.machine.after_state_change = self._post_transition
-    def _cprint(self, msg:str, color:str=OKCYAN):
-        """Print colored message"""
-        print(f"{color}{msg}{ENDC}")
-    def _advance_state(self):
-        """Determine the next state based on current state"""
-        current_idx = self.states.index(self.state)
-        if current_idx < len(self.states) - 1:
-            return self.states[current_idx + 1]
-        return self.state  # Stay in final state if already there
-    def _log_transition(self):
-        # TODO: use logger, not printing.
-        self._cprint(f"[FSM] -> Transitioning from {self.state}")
-    def _post_transition(self):
-        # TODO: use logger, not printing.
-        self._cprint(f"[FSM] -| Transitioned to {self.state}")
-    def advance(self):
-        if self.state_number < len(self.states) - 1:
-            self.trigger('advance')
-        else:
-            # maybe too aggressive to exception here?
-            raise RuntimeError("Already at final state")
-    @property
-    def state_number(self) -> int:
-        """Get the numerical value of current state"""
-        return self.states.index(self.state)
-    @property
-    def state_name(self) -> str:
-        """Get the name of current state"""
-        return self.state
-    # add a property for the number of states
-    @property
-    def num_states(self) -> int:
-        return len(self.states)