Spaces:

Saving-Willy
/

saving-willy-dev

Sleeping

App Files Files Community

rmm commited on Jan 25

Commit

b384db4

2 Parent(s): 5a21040 41bbd4a

Merge remote-tracking branch 'origin/dev' into feat/stateful-workflow

Browse files

Files changed (19) hide show

docs/classifier_cetacean.md +3 -0
docs/fix_tabrender.md +1 -1
docs/grid_maker.md +3 -0
docs/hf_push_observations.md +3 -0
docs/hotdog.md +3 -0
docs/input_handling.md +1 -1
docs/input_observation.md +3 -0
docs/input_validator.md +3 -0
docs/metadata_handler.md +3 -0
docs/obs_map.md +1 -1
mkdocs.yaml +13 -2
src/apptest/demo_whale_viewer.py +3 -3
src/classifier/classifier_image.py +15 -10
src/input/input_handling.py +7 -4
src/input/input_validator.py +2 -1
src/main.py +7 -7
src/utils/grid_maker.py +10 -2
tests/test_demo_whale_viewer.py +9 -9
tests/test_input_handling.py +1 -1

docs/classifier_cetacean.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ This module documents the cetacean fluke and fin classifier.
2	+
3	+ ::: src.classifier.cetacean_image

docs/fix_tabrender.md CHANGED Viewed

@@ -2,4 +2,4 @@ A js fix for certain UI elements, including maps, getting rendered into a
 zero-sized frame by default. Here we resize it so it is visible once the tab is
 clicked and no further interaction is required to see it.
-::: src.fix_tabrender

 zero-sized frame by default. Here we resize it so it is visible once the tab is
 clicked and no further interaction is required to see it.
+::: src.utils.fix_tabrender

docs/grid_maker.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ This module takes care of creating a grid composed of batches, rows and pages based on an incoming number of items.
2	+
3	+ ::: src.utils.grid_maker

docs/hf_push_observations.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ This module writes an observation into a temporary JSON file, in order to add this JSON file to the Saving-Willy Dataset in the Saving-Willy Hugging Face Community.
2	+
3	+ ::: src.hf_push_observations

docs/hotdog.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ This module documents the "hotdog" classifier. This example is purely a placeholder to show how multiple image (or other data) classifiers could be present in the same interface.
2	+
3	+ ::: src.classifier.classifier_hotdog

docs/input_handling.md CHANGED Viewed

@@ -5,4 +5,4 @@ This module focuses on image and metadata entry:
 - a container class for an observation
-::: src.input_handling


5	- a container class for an observation
6
7
8	+ ::: src.input.input_handling

docs/input_observation.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ This module focuses on the Object Class representing an Observation once all the data has been extracted automatically or inputted by the user.
2	+
3	+ ::: src.input.input_observation

docs/input_validator.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ This module focuses on the extraction and validation of data after data input.
2	+
3	+ ::: src.input.input_validator

docs/metadata_handler.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ This module takes care of formatting some fields of the metadata to make them human readable by the user.
2	+
3	+ ::: src.utils.metadata_handler

docs/obs_map.md CHANGED Viewed

@@ -4,4 +4,4 @@ Note: OSM, ESRI, and CartoDB map tiles are served without authentication/tokens,
 and so render correctly on the huggingface deployment. The Stamen tiles render
 on localhost but require a token to present on a 3rd-party site.
-::: src.obs_map

 and so render correctly on the huggingface deployment. The Stamen tiles render
 on localhost but require a token to present on a 3rd-party site.
+::: src.maps.obs_map

mkdocs.yaml CHANGED Viewed

@@ -28,12 +28,23 @@ nav:
   - API:
     - Main app: main.md
     - Modules:
-      - Data entry handling: input_handling.md
       - Map of observations: obs_map.md
       - Whale gallery: whale_gallery.md
       - Whale viewer: whale_viewer.md
       - Logging: st_logs.md
-      - Tab-rendering fix (js): fix_tabrender.md
     - Development clutter:
       - Demo app: app.md

   - API:
     - Main app: main.md
     - Modules:
+      - Data entry handling:
+        - Data input: input_handling.md
+        - Data extraction and validation: input_validator.md
+        - Data Object Class: input_observation.md
+      - Classifiers:
+        - Cetacean Fluke & Fin Recognition: classifier_cetacean.md
+        - (temporary) Hotdog Classifier: hotdog.md
+      - Hugging Face Integration:
+        - Push Observations to Dataset: hf_push_observations.md
       - Map of observations: obs_map.md
       - Whale gallery: whale_gallery.md
       - Whale viewer: whale_viewer.md
       - Logging: st_logs.md
+      - Utils:
+        - Tab-rendering fix (js): fix_tabrender.md
+        - Metadata handling: metadata_handler.md
+        - Grid maker: grid_maker.md
     - Development clutter:
       - Demo app: app.md

src/apptest/demo_whale_viewer.py CHANGED Viewed

@@ -17,14 +17,14 @@ src_dir = path.dirname( path.dirname( path.abspath(__file__) ) )
 sys.path.append(src_dir)
-import whale_viewer as sw_wv
 # a menu to pick one of the images
 title = st.title("Whale Viewer testing")
-species = st.selectbox("Species", sw_wv.WHALE_CLASSES)
 if species is not None:
     # and display the image + reference
     st.write(f"Selected species: {species}")
-    sw_wv.display_whale([species], 0, st)

 sys.path.append(src_dir)
+import whale_viewer as whale_viewer
 # a menu to pick one of the images
 title = st.title("Whale Viewer testing")
+species = st.selectbox("Species", whale_viewer.WHALE_CLASSES)
 if species is not None:
     # and display the image + reference
     st.write(f"Selected species: {species}")
+    whale_viewer.display_whale([species], 0, st)

src/classifier/classifier_image.py CHANGED Viewed

@@ -12,21 +12,25 @@ from utils.grid_maker import gridder
 from utils.metadata_handler import metadata2md
 def cetacean_classify(cetacean_classifier):
-    files = st.session_state.files
     images = st.session_state.images
     observations = st.session_state.observations
-    batch_size, row_size, page = gridder(files)
     grid = st.columns(row_size)
     col = 0
-    for file in files:
-        image = images[file.name]
         with grid[col]:
             st.image(image, use_column_width=True)
-            observation = observations[file.name].to_dict()
             # run classifier model on `image`, and persistently store the output
             out = cetacean_classifier(image) # get top 3 matches
             st.session_state.whale_prediction1 = out['predictions'][0]
@@ -44,14 +48,14 @@ def cetacean_classify(cetacean_classifier):
                 # get index of pred1 from WHALE_CLASSES, none if not present
                 print(f"[D] pred1: {pred1}")
                 ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
-                selected_class = st.selectbox(f"Species for {file.name}", viewer.WHALE_CLASSES, index=ix)
             observation['predicted_class'] = selected_class
             if selected_class != st.session_state.whale_prediction1:
                 observation['class_overriden'] = selected_class
             st.session_state.public_observation = observation
-            st.button(f"Upload observation for {file.name} to THE INTERNET!", on_click=push_observations)
             # TODO: the metadata only fills properly if `validate` was clicked.
             st.markdown(metadata2md())
@@ -62,7 +66,8 @@ def cetacean_classify(cetacean_classifier):
             whale_classes = out['predictions'][:]
             # render images for the top 3 (that is what the model api returns)
-            st.markdown(f"Top 3 Predictions for {file.name}")
             for i in range(len(whale_classes)):
                 viewer.display_whale(whale_classes, i)
         col = (col + 1) % row_size

 from utils.metadata_handler import metadata2md
 def cetacean_classify(cetacean_classifier):
+    """Cetacean classifier using the saving-willy model from Saving Willy Hugging Face space.
+    For each image in the session state, classify the image and display the top 3 predictions.
+    Args:
+        cetacean_classifier ([type]):  saving-willy model from Saving Willy Hugging Face space
+    """
     images = st.session_state.images
     observations = st.session_state.observations
+    hashes = st.session_state.image_hashes
+    batch_size, row_size, page = gridder(hashes)
     grid = st.columns(row_size)
     col = 0
+    o=1
+    for hash in hashes:
+        image = images[hash]
         with grid[col]:
             st.image(image, use_column_width=True)
+            observation = observations[hash].to_dict()
             # run classifier model on `image`, and persistently store the output
             out = cetacean_classifier(image) # get top 3 matches
             st.session_state.whale_prediction1 = out['predictions'][0]
                 # get index of pred1 from WHALE_CLASSES, none if not present
                 print(f"[D] pred1: {pred1}")
                 ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
+                selected_class = st.selectbox(f"Species for observation {str(o)}", viewer.WHALE_CLASSES, index=ix)
             observation['predicted_class'] = selected_class
             if selected_class != st.session_state.whale_prediction1:
                 observation['class_overriden'] = selected_class
             st.session_state.public_observation = observation
+            st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
             # TODO: the metadata only fills properly if `validate` was clicked.
             st.markdown(metadata2md())
             whale_classes = out['predictions'][:]
             # render images for the top 3 (that is what the model api returns)
+            st.markdown(f"Top 3 Predictions for observation {str(o)}")
             for i in range(len(whale_classes)):
                 viewer.display_whale(whale_classes, i)
+        o += 1
         col = (col + 1) % row_size

src/input/input_handling.py CHANGED Viewed

@@ -66,6 +66,7 @@ def setup_input(
     uploaded_files = viewcontainer.file_uploader("Upload an image", type=allowed_image_types, accept_multiple_files=True)
     observations = {}
     images = {}
     if uploaded_files is not None:
         for file in uploaded_files:
@@ -108,11 +109,13 @@ def setup_input(
             observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
                                         author_email=author_email, date=image_datetime, time=None,
                                         date_option=date_option, time_option=time_option)
-            observations[file.name] = observation
-            images[file.name] = image
     st.session_state.images = images
     st.session_state.files = uploaded_files
-    return observations

     uploaded_files = viewcontainer.file_uploader("Upload an image", type=allowed_image_types, accept_multiple_files=True)
     observations = {}
     images = {}
+    image_hashes =[]
     if uploaded_files is not None:
         for file in uploaded_files:
             observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
                                         author_email=author_email, date=image_datetime, time=None,
                                         date_option=date_option, time_option=time_option)
+            image_hash = observation.to_dict()["image_md5"]
+            observations[image_hash] = observation
+            images[image_hash] = image
+            image_hashes.append(image_hash)
     st.session_state.images = images
     st.session_state.files = uploaded_files
+    st.session_state.observations = observations
+    st.session_state.image_hashes = image_hashes

src/input/input_validator.py CHANGED Viewed

@@ -96,7 +96,8 @@ def decimal_coords(coords:tuple, ref:str) -> Fraction:
     return decimal_degrees
-def get_image_latlon(image_file: UploadedFile) -> tuple[float, float] | None:
     """
     Extracts the latitude and longitude from the EXIF metadata of an uploaded image file.

     return decimal_degrees
+#def get_image_latlon(image_file: UploadedFile) -> tuple[float, float] | None:
+def get_image_latlon(image_file: UploadedFile) :
     """
     Extracts the latitude and longitude from the EXIF metadata of an uploaded image file.

src/main.py CHANGED Viewed

@@ -9,6 +9,7 @@ from streamlit_folium import st_folium
 from transformers import pipeline
 from transformers import AutoModelForImageClassification
 from datasets import disable_caching
 disable_caching()
@@ -44,6 +45,9 @@ st.set_page_config(layout="wide")
 if "handler" not in st.session_state:
     st.session_state['handler'] = setup_logging()
 if "observations" not in st.session_state:
     st.session_state.observations = {}
@@ -100,7 +104,7 @@ def main() -> None:
     # create a sidebar, and parse all the input (returned as `observations` object)
-    observations = setup_input(viewcontainer=st.sidebar)
     if 0:## WIP
@@ -118,7 +122,7 @@ def main() -> None:
     with tab_map:
         # visual structure: a couple of toggles at the top, then the map inlcuding a
         # dropdown for tileset selection.
-        sw_map.add_header_text()
         tab_map_ui_cols = st.columns(2)
         with tab_map_ui_cols[0]:
             show_db_points = st.toggle("Show Points from DB", True)
@@ -179,12 +183,8 @@ def main() -> None:
     # Display submitted observation
     if st.sidebar.button("Validate"):
         # create a dictionary with the submitted observation
-        submitted_data = observations
-        st.session_state.observations = observations
         tab_log.info(f"{st.session_state.observations}")
-        df = pd.DataFrame(submitted_data, index=[0])
         with tab_coords:
             st.table(df)

 from transformers import pipeline
 from transformers import AutoModelForImageClassification
+from maps.obs_map import add_header_text
 from datasets import disable_caching
 disable_caching()
 if "handler" not in st.session_state:
     st.session_state['handler'] = setup_logging()
+if "image_hashes" not in st.session_state:
+    st.session_state.image_hashes = []
 if "observations" not in st.session_state:
     st.session_state.observations = {}
     # create a sidebar, and parse all the input (returned as `observations` object)
+    setup_input(viewcontainer=st.sidebar)
     if 0:## WIP
     with tab_map:
         # visual structure: a couple of toggles at the top, then the map inlcuding a
         # dropdown for tileset selection.
+        add_header_text()
         tab_map_ui_cols = st.columns(2)
         with tab_map_ui_cols[0]:
             show_db_points = st.toggle("Show Points from DB", True)
     # Display submitted observation
     if st.sidebar.button("Validate"):
         # create a dictionary with the submitted observation
         tab_log.info(f"{st.session_state.observations}")
+        df = pd.DataFrame(st.session_state.observations, index=[0])
         with tab_coords:
             st.table(df)

src/utils/grid_maker.py CHANGED Viewed

@@ -1,13 +1,21 @@
 import streamlit as st
 import math
-def gridder(files):
     cols = st.columns(3)
     with cols[0]:
         batch_size = st.select_slider("Batch size:",range(10,110,10), value=10)
     with cols[1]:
         row_size = st.select_slider("Row size:", range(1,6), value = 5)
-    num_batches = math.ceil(len(files)/batch_size)
     with cols[2]:
         page = st.selectbox("Page", range(1,num_batches+1))
     return batch_size, row_size, page

 import streamlit as st
 import math
+def gridder(items):
+    """Creates a grid for displaying items in a batched manner.
+    Args:
+        items (list): The items to be displayed.
+    Returns:
+        batch_size (int): The number of items to display in each batch.
+        row_size (int): The number of items to display in each row.
+        page (int): The range of pages available based on the number of items.
+    """
     cols = st.columns(3)
     with cols[0]:
         batch_size = st.select_slider("Batch size:",range(10,110,10), value=10)
     with cols[1]:
         row_size = st.select_slider("Row size:", range(1,6), value = 5)
+    num_batches = math.ceil(len(items)/batch_size)
     with cols[2]:
         page = st.selectbox("Page", range(1,num_batches+1))
     return batch_size, row_size, page

tests/test_demo_whale_viewer.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from streamlit.testing.v1 import AppTest
 import pytest # for the exception testing
-import whale_viewer as sw_wv # for data
 def test_selectbox_ok():
@@ -27,7 +27,7 @@ def test_selectbox_ok():
     assert at.markdown[0].value == "Selected species: beluga"
     # the second markdown should be "### :whale:  #1: Beluga"
     print("markdown 1: ", at.markdown[1].value)
-    assert at.markdown[1].value == "### :whale:  #1: Beluga"
     # now let's select a different element. index 4 is commersons_dolphin
     v4 = "commersons_dolphin"
@@ -39,16 +39,16 @@ def test_selectbox_ok():
     # the first markdown should be "Selected species: commersons_dolphin"
     assert at.markdown[0].value == f"Selected species: {v4}"
     # the second markdown should be "### :whale:  #1: Commersons Dolphin"
-    assert at.markdown[1].value == f"### :whale:  #1: {v4_str}"
     # test there are the right number of options
     print("PROPS=> ", dir(at.selectbox[0])) # no length unfortunately,
     # test it dynamically intead.
     # should be fine
-    at.selectbox[0].select_index(len(sw_wv.WHALE_CLASSES)-1).run()
     # should fail
     with pytest.raises(Exception):
-        at.selectbox[0].select_index(len(sw_wv.WHALE_CLASSES)).run()
 def test_img_props():
     '''
@@ -95,15 +95,15 @@ def test_img_props():
     # we're expecting the caption to be WHALE_REFERENCES[ix]
     print(parsed_proto)
     assert "caption" in parsed_proto
-    assert parsed_proto["caption"] == sw_wv.WHALE_REFERENCES[ix]
     assert "url" in parsed_proto
     assert parsed_proto["url"].startswith("/mock/media")
-    print(sw_wv.WHALE_REFERENCES[ix])
     # now let's switch to another index
     ix = 15
-    v15 = sw_wv.WHALE_CLASSES[ix]
     v15_str = v15.replace("_", " ").title()
     at.selectbox[0].set_value(v15).run()
@@ -118,7 +118,7 @@ def test_img_props():
     # we're expecting the caption to be WHALE_REFERENCES[ix]
     print(parsed_proto)
     assert "caption" in parsed_proto
-    assert parsed_proto["caption"] == sw_wv.WHALE_REFERENCES[ix]
     assert "url" in parsed_proto
     assert parsed_proto["url"].startswith("/mock/media")

 from streamlit.testing.v1 import AppTest
 import pytest # for the exception testing
+import whale_viewer # for data
 def test_selectbox_ok():
     assert at.markdown[0].value == "Selected species: beluga"
     # the second markdown should be "### :whale:  #1: Beluga"
     print("markdown 1: ", at.markdown[1].value)
+    assert at.markdown[1].value == ":whale:  #1: Beluga"
     # now let's select a different element. index 4 is commersons_dolphin
     v4 = "commersons_dolphin"
     # the first markdown should be "Selected species: commersons_dolphin"
     assert at.markdown[0].value == f"Selected species: {v4}"
     # the second markdown should be "### :whale:  #1: Commersons Dolphin"
+    assert at.markdown[1].value == f":whale:  #1: {v4_str}"
     # test there are the right number of options
     print("PROPS=> ", dir(at.selectbox[0])) # no length unfortunately,
     # test it dynamically intead.
     # should be fine
+    at.selectbox[0].select_index(len(whale_viewer.WHALE_CLASSES)-1).run()
     # should fail
     with pytest.raises(Exception):
+        at.selectbox[0].select_index(len(whale_viewer.WHALE_CLASSES)).run()
 def test_img_props():
     '''
     # we're expecting the caption to be WHALE_REFERENCES[ix]
     print(parsed_proto)
     assert "caption" in parsed_proto
+    assert parsed_proto["caption"] == whale_viewer.WHALE_REFERENCES[ix]
     assert "url" in parsed_proto
     assert parsed_proto["url"].startswith("/mock/media")
+    print(whale_viewer.WHALE_REFERENCES[ix])
     # now let's switch to another index
     ix = 15
+    v15 = whale_viewer.WHALE_CLASSES[ix]
     v15_str = v15.replace("_", " ").title()
     at.selectbox[0].set_value(v15).run()
     # we're expecting the caption to be WHALE_REFERENCES[ix]
     print(parsed_proto)
     assert "caption" in parsed_proto
+    assert parsed_proto["caption"] == whale_viewer.WHALE_REFERENCES[ix]
     assert "url" in parsed_proto
     assert parsed_proto["url"].startswith("/mock/media")

tests/test_input_handling.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import pytest
 from pathlib import Path
-from input_handling import is_valid_email, is_valid_number
 from input.input_validator import get_image_latlon, decimal_coords, get_image_datetime
 # generate tests for is_valid_email

 import pytest
 from pathlib import Path
+from input.input_validator import is_valid_email, is_valid_number
 from input.input_validator import get_image_latlon, decimal_coords, get_image_datetime
 # generate tests for is_valid_email