rmm commited on
Commit
b384db4
·
2 Parent(s): 5a21040 41bbd4a

Merge remote-tracking branch 'origin/dev' into feat/stateful-workflow

Browse files
docs/classifier_cetacean.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ This module documents the cetacean fluke and fin classifier.
2
+
3
+ ::: src.classifier.cetacean_image
docs/fix_tabrender.md CHANGED
@@ -2,4 +2,4 @@ A js fix for certain UI elements, including maps, getting rendered into a
2
  zero-sized frame by default. Here we resize it so it is visible once the tab is
3
  clicked and no further interaction is required to see it.
4
 
5
- ::: src.fix_tabrender
 
2
  zero-sized frame by default. Here we resize it so it is visible once the tab is
3
  clicked and no further interaction is required to see it.
4
 
5
+ ::: src.utils.fix_tabrender
docs/grid_maker.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ This module takes care of creating a grid composed of batches, rows and pages based on an incoming number of items.
2
+
3
+ ::: src.utils.grid_maker
docs/hf_push_observations.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ This module writes an observation into a temporary JSON file, in order to add this JSON file to the Saving-Willy Dataset in the Saving-Willy Hugging Face Community.
2
+
3
+ ::: src.hf_push_observations
docs/hotdog.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ This module documents the "hotdog" classifier. This example is purely a placeholder to show how multiple image (or other data) classifiers could be present in the same interface.
2
+
3
+ ::: src.classifier.classifier_hotdog
docs/input_handling.md CHANGED
@@ -5,4 +5,4 @@ This module focuses on image and metadata entry:
5
  - a container class for an observation
6
 
7
 
8
- ::: src.input_handling
 
5
  - a container class for an observation
6
 
7
 
8
+ ::: src.input.input_handling
docs/input_observation.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ This module focuses on the Object Class representing an Observation once all the data has been extracted automatically or inputted by the user.
2
+
3
+ ::: src.input.input_observation
docs/input_validator.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ This module focuses on the extraction and validation of data after data input.
2
+
3
+ ::: src.input.input_validator
docs/metadata_handler.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ This module takes care of formatting some fields of the metadata to make them human readable by the user.
2
+
3
+ ::: src.utils.metadata_handler
docs/obs_map.md CHANGED
@@ -4,4 +4,4 @@ Note: OSM, ESRI, and CartoDB map tiles are served without authentication/tokens,
4
  and so render correctly on the huggingface deployment. The Stamen tiles render
5
  on localhost but require a token to present on a 3rd-party site.
6
 
7
- ::: src.obs_map
 
4
  and so render correctly on the huggingface deployment. The Stamen tiles render
5
  on localhost but require a token to present on a 3rd-party site.
6
 
7
+ ::: src.maps.obs_map
mkdocs.yaml CHANGED
@@ -28,12 +28,23 @@ nav:
28
  - API:
29
  - Main app: main.md
30
  - Modules:
31
- - Data entry handling: input_handling.md
 
 
 
 
 
 
 
 
32
  - Map of observations: obs_map.md
33
  - Whale gallery: whale_gallery.md
34
  - Whale viewer: whale_viewer.md
35
  - Logging: st_logs.md
36
- - Tab-rendering fix (js): fix_tabrender.md
 
 
 
37
 
38
  - Development clutter:
39
  - Demo app: app.md
 
28
  - API:
29
  - Main app: main.md
30
  - Modules:
31
+ - Data entry handling:
32
+ - Data input: input_handling.md
33
+ - Data extraction and validation: input_validator.md
34
+ - Data Object Class: input_observation.md
35
+ - Classifiers:
36
+ - Cetacean Fluke & Fin Recognition: classifier_cetacean.md
37
+ - (temporary) Hotdog Classifier: hotdog.md
38
+ - Hugging Face Integration:
39
+ - Push Observations to Dataset: hf_push_observations.md
40
  - Map of observations: obs_map.md
41
  - Whale gallery: whale_gallery.md
42
  - Whale viewer: whale_viewer.md
43
  - Logging: st_logs.md
44
+ - Utils:
45
+ - Tab-rendering fix (js): fix_tabrender.md
46
+ - Metadata handling: metadata_handler.md
47
+ - Grid maker: grid_maker.md
48
 
49
  - Development clutter:
50
  - Demo app: app.md
src/apptest/demo_whale_viewer.py CHANGED
@@ -17,14 +17,14 @@ src_dir = path.dirname( path.dirname( path.abspath(__file__) ) )
17
  sys.path.append(src_dir)
18
 
19
 
20
- import whale_viewer as sw_wv
21
 
22
  # a menu to pick one of the images
23
  title = st.title("Whale Viewer testing")
24
- species = st.selectbox("Species", sw_wv.WHALE_CLASSES)
25
 
26
  if species is not None:
27
  # and display the image + reference
28
  st.write(f"Selected species: {species}")
29
- sw_wv.display_whale([species], 0, st)
30
 
 
17
  sys.path.append(src_dir)
18
 
19
 
20
+ import whale_viewer as whale_viewer
21
 
22
  # a menu to pick one of the images
23
  title = st.title("Whale Viewer testing")
24
+ species = st.selectbox("Species", whale_viewer.WHALE_CLASSES)
25
 
26
  if species is not None:
27
  # and display the image + reference
28
  st.write(f"Selected species: {species}")
29
+ whale_viewer.display_whale([species], 0, st)
30
 
src/classifier/classifier_image.py CHANGED
@@ -12,21 +12,25 @@ from utils.grid_maker import gridder
12
  from utils.metadata_handler import metadata2md
13
 
14
  def cetacean_classify(cetacean_classifier):
15
- files = st.session_state.files
 
 
 
 
16
  images = st.session_state.images
17
  observations = st.session_state.observations
18
-
19
- batch_size, row_size, page = gridder(files)
20
 
21
  grid = st.columns(row_size)
22
  col = 0
23
-
24
- for file in files:
25
- image = images[file.name]
26
 
27
  with grid[col]:
28
  st.image(image, use_column_width=True)
29
- observation = observations[file.name].to_dict()
30
  # run classifier model on `image`, and persistently store the output
31
  out = cetacean_classifier(image) # get top 3 matches
32
  st.session_state.whale_prediction1 = out['predictions'][0]
@@ -44,14 +48,14 @@ def cetacean_classify(cetacean_classifier):
44
  # get index of pred1 from WHALE_CLASSES, none if not present
45
  print(f"[D] pred1: {pred1}")
46
  ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
47
- selected_class = st.selectbox(f"Species for {file.name}", viewer.WHALE_CLASSES, index=ix)
48
 
49
  observation['predicted_class'] = selected_class
50
  if selected_class != st.session_state.whale_prediction1:
51
  observation['class_overriden'] = selected_class
52
 
53
  st.session_state.public_observation = observation
54
- st.button(f"Upload observation for {file.name} to THE INTERNET!", on_click=push_observations)
55
  # TODO: the metadata only fills properly if `validate` was clicked.
56
  st.markdown(metadata2md())
57
 
@@ -62,7 +66,8 @@ def cetacean_classify(cetacean_classifier):
62
 
63
  whale_classes = out['predictions'][:]
64
  # render images for the top 3 (that is what the model api returns)
65
- st.markdown(f"Top 3 Predictions for {file.name}")
66
  for i in range(len(whale_classes)):
67
  viewer.display_whale(whale_classes, i)
 
68
  col = (col + 1) % row_size
 
12
  from utils.metadata_handler import metadata2md
13
 
14
  def cetacean_classify(cetacean_classifier):
15
+ """Cetacean classifier using the saving-willy model from Saving Willy Hugging Face space.
16
+ For each image in the session state, classify the image and display the top 3 predictions.
17
+ Args:
18
+ cetacean_classifier ([type]): saving-willy model from Saving Willy Hugging Face space
19
+ """
20
  images = st.session_state.images
21
  observations = st.session_state.observations
22
+ hashes = st.session_state.image_hashes
23
+ batch_size, row_size, page = gridder(hashes)
24
 
25
  grid = st.columns(row_size)
26
  col = 0
27
+ o=1
28
+ for hash in hashes:
29
+ image = images[hash]
30
 
31
  with grid[col]:
32
  st.image(image, use_column_width=True)
33
+ observation = observations[hash].to_dict()
34
  # run classifier model on `image`, and persistently store the output
35
  out = cetacean_classifier(image) # get top 3 matches
36
  st.session_state.whale_prediction1 = out['predictions'][0]
 
48
  # get index of pred1 from WHALE_CLASSES, none if not present
49
  print(f"[D] pred1: {pred1}")
50
  ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
51
+ selected_class = st.selectbox(f"Species for observation {str(o)}", viewer.WHALE_CLASSES, index=ix)
52
 
53
  observation['predicted_class'] = selected_class
54
  if selected_class != st.session_state.whale_prediction1:
55
  observation['class_overriden'] = selected_class
56
 
57
  st.session_state.public_observation = observation
58
+ st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
59
  # TODO: the metadata only fills properly if `validate` was clicked.
60
  st.markdown(metadata2md())
61
 
 
66
 
67
  whale_classes = out['predictions'][:]
68
  # render images for the top 3 (that is what the model api returns)
69
+ st.markdown(f"Top 3 Predictions for observation {str(o)}")
70
  for i in range(len(whale_classes)):
71
  viewer.display_whale(whale_classes, i)
72
+ o += 1
73
  col = (col + 1) % row_size
src/input/input_handling.py CHANGED
@@ -66,6 +66,7 @@ def setup_input(
66
  uploaded_files = viewcontainer.file_uploader("Upload an image", type=allowed_image_types, accept_multiple_files=True)
67
  observations = {}
68
  images = {}
 
69
  if uploaded_files is not None:
70
  for file in uploaded_files:
71
 
@@ -108,11 +109,13 @@ def setup_input(
108
  observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
109
  author_email=author_email, date=image_datetime, time=None,
110
  date_option=date_option, time_option=time_option)
111
- observations[file.name] = observation
112
- images[file.name] = image
 
 
113
 
114
  st.session_state.images = images
115
  st.session_state.files = uploaded_files
116
-
117
- return observations
118
 
 
66
  uploaded_files = viewcontainer.file_uploader("Upload an image", type=allowed_image_types, accept_multiple_files=True)
67
  observations = {}
68
  images = {}
69
+ image_hashes =[]
70
  if uploaded_files is not None:
71
  for file in uploaded_files:
72
 
 
109
  observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
110
  author_email=author_email, date=image_datetime, time=None,
111
  date_option=date_option, time_option=time_option)
112
+ image_hash = observation.to_dict()["image_md5"]
113
+ observations[image_hash] = observation
114
+ images[image_hash] = image
115
+ image_hashes.append(image_hash)
116
 
117
  st.session_state.images = images
118
  st.session_state.files = uploaded_files
119
+ st.session_state.observations = observations
120
+ st.session_state.image_hashes = image_hashes
121
 
src/input/input_validator.py CHANGED
@@ -96,7 +96,8 @@ def decimal_coords(coords:tuple, ref:str) -> Fraction:
96
  return decimal_degrees
97
 
98
 
99
- def get_image_latlon(image_file: UploadedFile) -> tuple[float, float] | None:
 
100
  """
101
  Extracts the latitude and longitude from the EXIF metadata of an uploaded image file.
102
 
 
96
  return decimal_degrees
97
 
98
 
99
+ #def get_image_latlon(image_file: UploadedFile) -> tuple[float, float] | None:
100
+ def get_image_latlon(image_file: UploadedFile) :
101
  """
102
  Extracts the latitude and longitude from the EXIF metadata of an uploaded image file.
103
 
src/main.py CHANGED
@@ -9,6 +9,7 @@ from streamlit_folium import st_folium
9
  from transformers import pipeline
10
  from transformers import AutoModelForImageClassification
11
 
 
12
  from datasets import disable_caching
13
  disable_caching()
14
 
@@ -44,6 +45,9 @@ st.set_page_config(layout="wide")
44
  if "handler" not in st.session_state:
45
  st.session_state['handler'] = setup_logging()
46
 
 
 
 
47
  if "observations" not in st.session_state:
48
  st.session_state.observations = {}
49
 
@@ -100,7 +104,7 @@ def main() -> None:
100
 
101
 
102
  # create a sidebar, and parse all the input (returned as `observations` object)
103
- observations = setup_input(viewcontainer=st.sidebar)
104
 
105
 
106
  if 0:## WIP
@@ -118,7 +122,7 @@ def main() -> None:
118
  with tab_map:
119
  # visual structure: a couple of toggles at the top, then the map inlcuding a
120
  # dropdown for tileset selection.
121
- sw_map.add_header_text()
122
  tab_map_ui_cols = st.columns(2)
123
  with tab_map_ui_cols[0]:
124
  show_db_points = st.toggle("Show Points from DB", True)
@@ -179,12 +183,8 @@ def main() -> None:
179
  # Display submitted observation
180
  if st.sidebar.button("Validate"):
181
  # create a dictionary with the submitted observation
182
- submitted_data = observations
183
- st.session_state.observations = observations
184
-
185
  tab_log.info(f"{st.session_state.observations}")
186
-
187
- df = pd.DataFrame(submitted_data, index=[0])
188
  with tab_coords:
189
  st.table(df)
190
 
 
9
  from transformers import pipeline
10
  from transformers import AutoModelForImageClassification
11
 
12
+ from maps.obs_map import add_header_text
13
  from datasets import disable_caching
14
  disable_caching()
15
 
 
45
  if "handler" not in st.session_state:
46
  st.session_state['handler'] = setup_logging()
47
 
48
+ if "image_hashes" not in st.session_state:
49
+ st.session_state.image_hashes = []
50
+
51
  if "observations" not in st.session_state:
52
  st.session_state.observations = {}
53
 
 
104
 
105
 
106
  # create a sidebar, and parse all the input (returned as `observations` object)
107
+ setup_input(viewcontainer=st.sidebar)
108
 
109
 
110
  if 0:## WIP
 
122
  with tab_map:
123
  # visual structure: a couple of toggles at the top, then the map inlcuding a
124
  # dropdown for tileset selection.
125
+ add_header_text()
126
  tab_map_ui_cols = st.columns(2)
127
  with tab_map_ui_cols[0]:
128
  show_db_points = st.toggle("Show Points from DB", True)
 
183
  # Display submitted observation
184
  if st.sidebar.button("Validate"):
185
  # create a dictionary with the submitted observation
 
 
 
186
  tab_log.info(f"{st.session_state.observations}")
187
+ df = pd.DataFrame(st.session_state.observations, index=[0])
 
188
  with tab_coords:
189
  st.table(df)
190
 
src/utils/grid_maker.py CHANGED
@@ -1,13 +1,21 @@
1
  import streamlit as st
2
  import math
3
 
4
- def gridder(files):
 
 
 
 
 
 
 
 
5
  cols = st.columns(3)
6
  with cols[0]:
7
  batch_size = st.select_slider("Batch size:",range(10,110,10), value=10)
8
  with cols[1]:
9
  row_size = st.select_slider("Row size:", range(1,6), value = 5)
10
- num_batches = math.ceil(len(files)/batch_size)
11
  with cols[2]:
12
  page = st.selectbox("Page", range(1,num_batches+1))
13
  return batch_size, row_size, page
 
1
  import streamlit as st
2
  import math
3
 
4
+ def gridder(items):
5
+ """Creates a grid for displaying items in a batched manner.
6
+ Args:
7
+ items (list): The items to be displayed.
8
+ Returns:
9
+ batch_size (int): The number of items to display in each batch.
10
+ row_size (int): The number of items to display in each row.
11
+ page (int): The range of pages available based on the number of items.
12
+ """
13
  cols = st.columns(3)
14
  with cols[0]:
15
  batch_size = st.select_slider("Batch size:",range(10,110,10), value=10)
16
  with cols[1]:
17
  row_size = st.select_slider("Row size:", range(1,6), value = 5)
18
+ num_batches = math.ceil(len(items)/batch_size)
19
  with cols[2]:
20
  page = st.selectbox("Page", range(1,num_batches+1))
21
  return batch_size, row_size, page
tests/test_demo_whale_viewer.py CHANGED
@@ -1,7 +1,7 @@
1
  from streamlit.testing.v1 import AppTest
2
  import pytest # for the exception testing
3
 
4
- import whale_viewer as sw_wv # for data
5
 
6
 
7
  def test_selectbox_ok():
@@ -27,7 +27,7 @@ def test_selectbox_ok():
27
  assert at.markdown[0].value == "Selected species: beluga"
28
  # the second markdown should be "### :whale: #1: Beluga"
29
  print("markdown 1: ", at.markdown[1].value)
30
- assert at.markdown[1].value == "### :whale: #1: Beluga"
31
 
32
  # now let's select a different element. index 4 is commersons_dolphin
33
  v4 = "commersons_dolphin"
@@ -39,16 +39,16 @@ def test_selectbox_ok():
39
  # the first markdown should be "Selected species: commersons_dolphin"
40
  assert at.markdown[0].value == f"Selected species: {v4}"
41
  # the second markdown should be "### :whale: #1: Commersons Dolphin"
42
- assert at.markdown[1].value == f"### :whale: #1: {v4_str}"
43
 
44
  # test there are the right number of options
45
  print("PROPS=> ", dir(at.selectbox[0])) # no length unfortunately,
46
  # test it dynamically intead.
47
  # should be fine
48
- at.selectbox[0].select_index(len(sw_wv.WHALE_CLASSES)-1).run()
49
  # should fail
50
  with pytest.raises(Exception):
51
- at.selectbox[0].select_index(len(sw_wv.WHALE_CLASSES)).run()
52
 
53
  def test_img_props():
54
  '''
@@ -95,15 +95,15 @@ def test_img_props():
95
  # we're expecting the caption to be WHALE_REFERENCES[ix]
96
  print(parsed_proto)
97
  assert "caption" in parsed_proto
98
- assert parsed_proto["caption"] == sw_wv.WHALE_REFERENCES[ix]
99
  assert "url" in parsed_proto
100
  assert parsed_proto["url"].startswith("/mock/media")
101
 
102
- print(sw_wv.WHALE_REFERENCES[ix])
103
 
104
  # now let's switch to another index
105
  ix = 15
106
- v15 = sw_wv.WHALE_CLASSES[ix]
107
  v15_str = v15.replace("_", " ").title()
108
  at.selectbox[0].set_value(v15).run()
109
 
@@ -118,7 +118,7 @@ def test_img_props():
118
  # we're expecting the caption to be WHALE_REFERENCES[ix]
119
  print(parsed_proto)
120
  assert "caption" in parsed_proto
121
- assert parsed_proto["caption"] == sw_wv.WHALE_REFERENCES[ix]
122
  assert "url" in parsed_proto
123
  assert parsed_proto["url"].startswith("/mock/media")
124
 
 
1
  from streamlit.testing.v1 import AppTest
2
  import pytest # for the exception testing
3
 
4
+ import whale_viewer # for data
5
 
6
 
7
  def test_selectbox_ok():
 
27
  assert at.markdown[0].value == "Selected species: beluga"
28
  # the second markdown should be "### :whale: #1: Beluga"
29
  print("markdown 1: ", at.markdown[1].value)
30
+ assert at.markdown[1].value == ":whale: #1: Beluga"
31
 
32
  # now let's select a different element. index 4 is commersons_dolphin
33
  v4 = "commersons_dolphin"
 
39
  # the first markdown should be "Selected species: commersons_dolphin"
40
  assert at.markdown[0].value == f"Selected species: {v4}"
41
  # the second markdown should be "### :whale: #1: Commersons Dolphin"
42
+ assert at.markdown[1].value == f":whale: #1: {v4_str}"
43
 
44
  # test there are the right number of options
45
  print("PROPS=> ", dir(at.selectbox[0])) # no length unfortunately,
46
  # test it dynamically intead.
47
  # should be fine
48
+ at.selectbox[0].select_index(len(whale_viewer.WHALE_CLASSES)-1).run()
49
  # should fail
50
  with pytest.raises(Exception):
51
+ at.selectbox[0].select_index(len(whale_viewer.WHALE_CLASSES)).run()
52
 
53
  def test_img_props():
54
  '''
 
95
  # we're expecting the caption to be WHALE_REFERENCES[ix]
96
  print(parsed_proto)
97
  assert "caption" in parsed_proto
98
+ assert parsed_proto["caption"] == whale_viewer.WHALE_REFERENCES[ix]
99
  assert "url" in parsed_proto
100
  assert parsed_proto["url"].startswith("/mock/media")
101
 
102
+ print(whale_viewer.WHALE_REFERENCES[ix])
103
 
104
  # now let's switch to another index
105
  ix = 15
106
+ v15 = whale_viewer.WHALE_CLASSES[ix]
107
  v15_str = v15.replace("_", " ").title()
108
  at.selectbox[0].set_value(v15).run()
109
 
 
118
  # we're expecting the caption to be WHALE_REFERENCES[ix]
119
  print(parsed_proto)
120
  assert "caption" in parsed_proto
121
+ assert parsed_proto["caption"] == whale_viewer.WHALE_REFERENCES[ix]
122
  assert "url" in parsed_proto
123
  assert parsed_proto["url"].startswith("/mock/media")
124
 
tests/test_input_handling.py CHANGED
@@ -1,7 +1,7 @@
1
  import pytest
2
  from pathlib import Path
3
 
4
- from input_handling import is_valid_email, is_valid_number
5
  from input.input_validator import get_image_latlon, decimal_coords, get_image_datetime
6
 
7
  # generate tests for is_valid_email
 
1
  import pytest
2
  from pathlib import Path
3
 
4
+ from input.input_validator import is_valid_email, is_valid_number
5
  from input.input_validator import get_image_latlon, decimal_coords, get_image_datetime
6
 
7
  # generate tests for is_valid_email