vancauwe commited on
Commit
43cae2c
Β·
unverified Β·
2 Parent(s): 6efbf9b 8cbe888

Merge pull request #24 from sdsc-ordes/feat/image-batch

Browse files
Dockerfile ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ From ubuntu:latest
2
+
3
+ RUN apt-get update
4
+ RUN apt-get install python3 python3-pip -y
5
+
6
+ # https://stackoverflow.com/questions/75608323/how-do-i-solve-error-externally-managed-environment-every-time-i-use-pip-3
7
+ # https://veronneau.org/python-311-pip-and-breaking-system-packages.html
8
+ ENV PIP_BREAK_SYSTEM_PACKAGES 1
9
+
10
+
11
+ ##################################################
12
+ # Ubuntu setup
13
+ ##################################################
14
+
15
+ RUN apt-get update \
16
+ && apt-get install -y wget \
17
+ && rm -rf /var/lib/apt/lists/*
18
+
19
+ RUN apt-get update && apt-get -y upgrade \
20
+ && apt-get install -y --no-install-recommends \
21
+ unzip \
22
+ nano \
23
+ git \
24
+ g++ \
25
+ gcc \
26
+ htop \
27
+ zip \
28
+ ca-certificates \
29
+ && rm -rf /var/lib/apt/lists/*
30
+
31
+ ##################################################
32
+ # ODTP setup
33
+ ##################################################
34
+
35
+ RUN mkdir /app
36
+ COPY . /saving-willy
37
+ RUN pip3 install --upgrade setuptools
38
+ RUN pip3 install -r /saving-willy/requirements.txt
39
+
40
+ WORKDIR /saving-willy
41
+
42
+ ENTRYPOINT bash
README.md CHANGED
@@ -28,7 +28,7 @@ pip install -r requirements.txt
28
  ```
29
 
30
  ```
31
- streamlit run app.py
32
  ```
33
 
34
 
 
28
  ```
29
 
30
  ```
31
+ streamlit run src/main.py
32
  ```
33
 
34
 
basic_map/app.py DELETED
@@ -1,21 +0,0 @@
1
- import pandas as pd
2
- import streamlit as st
3
- import folium
4
-
5
- from streamlit_folium import st_folium
6
- from streamlit_folium import folium_static
7
-
8
-
9
- visp_loc = 46.295833, 7.883333
10
- #m = folium.Map(location=visp_loc, zoom_start=9)
11
-
12
-
13
- st.markdown("# :whale: :whale: Cetaceans :red[& friends] :balloon:")
14
-
15
- m = folium.Map(location=visp_loc, zoom_start=9,
16
- tiles='https://tile.opentopomap.org/{z}/{x}/{y}.png',
17
- attr='<a href="https://opentopomap.org/">Open Topo Map</a>')
18
-
19
- folium_static(m)
20
-
21
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
basic_map/app1.py DELETED
@@ -1,42 +0,0 @@
1
- # lets try using map stuff without folium, maybe stlite doesnt support that.
2
-
3
- import streamlit as st
4
- import pandas as pd
5
-
6
- # Load data
7
- f = 'mountains_clr.csv'
8
- df = pd.read_csv(f).dropna()
9
-
10
- print(df)
11
-
12
- st.markdown("# :whale: :whale: Cetaceans :red[& friends] :balloon:")
13
-
14
- st.markdown("## :mountain: Mountains")
15
- st.markdown(f"library version: **{st.__version__}**")
16
- # not sure where my versions are getting pegged from, but we have a 1y spread :(
17
- # https://github.com/streamlit/streamlit/blob/1.24.1/lib/streamlit/elements/map.py
18
- # rather hard to find the docs for old versions, no selector unlike many libraries.
19
-
20
- visp_loc = 46.295833, 7.883333
21
- tile_xyz = 'https://tile.opentopomap.org/{z}/{x}/{y}.png'
22
- tile_attr = '<a href="https://opentopomap.org/">Open Topo Map</a>'
23
- st.map(df, latitude='lat', longitude='lon', color='color', size='size', zoom=7)
24
- #, tiles=tile_xyz, attr=tile_attr)
25
-
26
- #st.map(df)
27
-
28
- #st.map(df, latitude="col1", longitude="col2", size="col3", color="col4")
29
-
30
- import numpy as np
31
-
32
- df2 = pd.DataFrame(
33
- {
34
- "col1": np.random.randn(1000) / 50 + 37.76,
35
- "col2": np.random.randn(1000) / 50 + -122.4,
36
- "col3": np.random.randn(1000) * 100,
37
- "col4": np.random.rand(1000, 4).tolist(),
38
- }
39
- )
40
- #st.map(df, latitude="col1", longitude="col2", size="col3", color="col4")
41
-
42
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
basic_map/requirements.txt DELETED
@@ -1,4 +0,0 @@
1
- streamlit
2
- folium
3
- streamlit-folium
4
-
 
 
 
 
 
docs/app.md CHANGED
@@ -1,5 +0,0 @@
1
- Here is the documentation for the app code generating the streamlit front-end.
2
-
3
- # Streamlit App
4
-
5
- ::: basic_map.app
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- numpy==1.24
2
  pandas==2.2.3
3
 
4
 
 
1
+ numpy==1.26.4
2
  pandas==2.2.3
3
 
4
 
src/classifier/classifier_hotdog.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import json
3
+ from PIL import Image
4
+
5
+
6
+ def hotdog_classify(pipeline_hot_dog, tab_hotdogs):
7
+ col1, col2 = tab_hotdogs.columns(2)
8
+ for file in st.session_state.files:
9
+ image = st.session_state.images[file.name]
10
+ observation = st.session_state.observations[file.name].to_dict()
11
+ # display the image (use cached version, no need to reread)
12
+ col1.image(image, use_column_width=True)
13
+ # and then run inference on the image
14
+ hotdog_image = Image.fromarray(image)
15
+ predictions = pipeline_hot_dog(hotdog_image)
16
+
17
+ col2.header("Probabilities")
18
+ first = True
19
+ for p in predictions:
20
+ col2.subheader(f"{ p['label'] }: { round(p['score'] * 100, 1)}%")
21
+ if first:
22
+ observation['predicted_class'] = p['label']
23
+ observation['predicted_score'] = round(p['score'] * 100, 1)
24
+ first = False
25
+
26
+ tab_hotdogs.write(f"Session observation: {json.dumps(observation)}")
src/classifier/classifier_image.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import logging
3
+
4
+ # get a global var for logger accessor in this module
5
+ LOG_LEVEL = logging.DEBUG
6
+ g_logger = logging.getLogger(__name__)
7
+ g_logger.setLevel(LOG_LEVEL)
8
+
9
+ import whale_viewer as viewer
10
+ from hf_push_observations import push_observations
11
+ from utils.grid_maker import gridder
12
+ from utils.metadata_handler import metadata2md
13
+
14
+ def cetacean_classify(cetacean_classifier):
15
+ files = st.session_state.files
16
+ images = st.session_state.images
17
+ observations = st.session_state.observations
18
+
19
+ batch_size, row_size, page = gridder(files)
20
+
21
+ grid = st.columns(row_size)
22
+ col = 0
23
+
24
+ for file in files:
25
+ image = images[file.name]
26
+
27
+ with grid[col]:
28
+ st.image(image, use_column_width=True)
29
+ observation = observations[file.name].to_dict()
30
+ # run classifier model on `image`, and persistently store the output
31
+ out = cetacean_classifier(image) # get top 3 matches
32
+ st.session_state.whale_prediction1 = out['predictions'][0]
33
+ st.session_state.classify_whale_done = True
34
+ msg = f"[D]2 classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}"
35
+ g_logger.info(msg)
36
+
37
+ # dropdown for selecting/overriding the species prediction
38
+ if not st.session_state.classify_whale_done:
39
+ selected_class = st.sidebar.selectbox("Species", viewer.WHALE_CLASSES,
40
+ index=None, placeholder="Species not yet identified...",
41
+ disabled=True)
42
+ else:
43
+ pred1 = st.session_state.whale_prediction1
44
+ # get index of pred1 from WHALE_CLASSES, none if not present
45
+ print(f"[D] pred1: {pred1}")
46
+ ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
47
+ selected_class = st.selectbox(f"Species for {file.name}", viewer.WHALE_CLASSES, index=ix)
48
+
49
+ observation['predicted_class'] = selected_class
50
+ if selected_class != st.session_state.whale_prediction1:
51
+ observation['class_overriden'] = selected_class
52
+
53
+ st.session_state.public_observation = observation
54
+ st.button(f"Upload observation for {file.name} to THE INTERNET!", on_click=push_observations)
55
+ # TODO: the metadata only fills properly if `validate` was clicked.
56
+ st.markdown(metadata2md())
57
+
58
+ msg = f"[D] full observation after inference: {observation}"
59
+ g_logger.debug(msg)
60
+ print(msg)
61
+ # TODO: add a link to more info on the model, next to the button.
62
+
63
+ whale_classes = out['predictions'][:]
64
+ # render images for the top 3 (that is what the model api returns)
65
+ st.markdown(f"Top 3 Predictions for {file.name}")
66
+ for i in range(len(whale_classes)):
67
+ viewer.display_whale(whale_classes, i)
68
+ col = (col + 1) % row_size
src/classifier_image.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import logging
3
+ import os
4
+
5
+ # get a global var for logger accessor in this module
6
+ LOG_LEVEL = logging.DEBUG
7
+ g_logger = logging.getLogger(__name__)
8
+ g_logger.setLevel(LOG_LEVEL)
9
+
10
+ from grid_maker import gridder
11
+ import hf_push_observations as sw_push_obs
12
+ import utils.metadata_handler as meta_handler
13
+ import whale_viewer as sw_wv
14
+
15
+ def cetacean_classify(cetacean_classifier, tab_inference):
16
+ files = st.session_state.files
17
+ images = st.session_state.images
18
+ observations = st.session_state.observations
19
+
20
+ batch_size, row_size, page = gridder(files)
21
+
22
+ grid = st.columns(row_size)
23
+ col = 0
24
+
25
+ for file in files:
26
+ image = images[file.name]
27
+
28
+ with grid[col]:
29
+ st.image(image, use_column_width=True)
30
+ observation = observations[file.name].to_dict()
31
+ # run classifier model on `image`, and persistently store the output
32
+ out = cetacean_classifier(image) # get top 3 matches
33
+ st.session_state.whale_prediction1 = out['predictions'][0]
34
+ st.session_state.classify_whale_done = True
35
+ msg = f"[D]2 classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}"
36
+ g_logger.info(msg)
37
+
38
+ # dropdown for selecting/overriding the species prediction
39
+ if not st.session_state.classify_whale_done:
40
+ selected_class = st.sidebar.selectbox("Species", sw_wv.WHALE_CLASSES,
41
+ index=None, placeholder="Species not yet identified...",
42
+ disabled=True)
43
+ else:
44
+ pred1 = st.session_state.whale_prediction1
45
+ # get index of pred1 from WHALE_CLASSES, none if not present
46
+ print(f"[D] pred1: {pred1}")
47
+ ix = sw_wv.WHALE_CLASSES.index(pred1) if pred1 in sw_wv.WHALE_CLASSES else None
48
+ selected_class = tab_inference.selectbox("Species", sw_wv.WHALE_CLASSES, index=ix)
49
+
50
+ observation['predicted_class'] = selected_class
51
+ if selected_class != st.session_state.whale_prediction1:
52
+ observation['class_overriden'] = selected_class
53
+
54
+ st.session_state.public_observation = observation
55
+ st.button(f"Upload observation for {file.name} to THE INTERNET!", on_click=sw_push_obs.push_observations)
56
+ # TODO: the metadata only fills properly if `validate` was clicked.
57
+ st.markdown(meta_handler.metadata2md())
58
+
59
+ msg = f"[D] full observation after inference: {observation}"
60
+ g_logger.debug(msg)
61
+ print(msg)
62
+ # TODO: add a link to more info on the model, next to the button.
63
+
64
+ whale_classes = out['predictions'][:]
65
+ # render images for the top 3 (that is what the model api returns)
66
+ #with tab_inference:
67
+ st.title(f"Species detected for {file.name}")
68
+ for i in range(len(whale_classes)):
69
+ sw_wv.display_whale(whale_classes, i)
70
+ col = (col + 1) % row_size
src/hf_push_observations.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from streamlit.delta_generator import DeltaGenerator
2
+ import streamlit as st
3
+ from huggingface_hub import HfApi
4
+ import json
5
+ import tempfile
6
+ import logging
7
+
8
+ # get a global var for logger accessor in this module
9
+ LOG_LEVEL = logging.DEBUG
10
+ g_logger = logging.getLogger(__name__)
11
+ g_logger.setLevel(LOG_LEVEL)
12
+
13
+ def push_observations(tab_log:DeltaGenerator=None):
14
+ """
15
+ Push the observations to the Hugging Face dataset
16
+
17
+ Args:
18
+ tab_log (streamlit.container): The container to log messages to. If not provided,
19
+ log messages are in any case written to the global logger (TODO: test - didn't
20
+ push any observation since generating the logger)
21
+
22
+ """
23
+ # we get the observation from session state: 1 is the dict 2 is the image.
24
+ # first, lets do an info display (popup)
25
+ metadata_str = json.dumps(st.session_state.public_observation)
26
+
27
+ st.toast(f"Uploading observations: {metadata_str}", icon="🦭")
28
+ tab_log = st.session_state.tab_log
29
+ if tab_log is not None:
30
+ tab_log.info(f"Uploading observations: {metadata_str}")
31
+
32
+ # get huggingface api
33
+ import os
34
+ token = os.environ.get("HF_TOKEN", None)
35
+ api = HfApi(token=token)
36
+
37
+ f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
38
+ f.write(metadata_str)
39
+ f.close()
40
+ st.info(f"temp file: {f.name} with metadata written...")
41
+
42
+ path_in_repo= f"metadata/{st.session_state.public_observation['author_email']}/{st.session_state.public_observation['image_md5']}.json"
43
+ msg = f"fname: {f.name} | path: {path_in_repo}"
44
+ print(msg)
45
+ st.warning(msg)
46
+ # rv = api.upload_file(
47
+ # path_or_fileobj=f.name,
48
+ # path_in_repo=path_in_repo,
49
+ # repo_id="Saving-Willy/temp_dataset",
50
+ # repo_type="dataset",
51
+ # )
52
+ # print(rv)
53
+ # msg = f"observation attempted tx to repo happy walrus: {rv}"
54
+ g_logger.info(msg)
55
+ st.info(msg)
56
+
src/input/input_handling.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import logging
3
+
4
+ import streamlit as st
5
+ from streamlit.delta_generator import DeltaGenerator
6
+
7
+ import cv2
8
+ import numpy as np
9
+
10
+ from input.input_observation import InputObservation
11
+ from input.input_validator import get_image_datetime, is_valid_email, is_valid_number
12
+
13
+ m_logger = logging.getLogger(__name__)
14
+ m_logger.setLevel(logging.INFO)
15
+
16
+ '''
17
+ A module to setup the input handling for the whale observation guidance tool
18
+
19
+ both the UI elements (setup_input_UI) and the validation functions.
20
+ '''
21
+ allowed_image_types = ['jpg', 'jpeg', 'png', 'webp']
22
+
23
+ # an arbitrary set of defaults so testing is less painful...
24
+ # ideally we add in some randomization to the defaults
25
+ spoof_metadata = {
26
+ "latitude": 23.5,
27
+ "longitude": 44,
28
+ "author_email": "[email protected]",
29
+ "date": None,
30
+ "time": None,
31
+ }
32
+
33
+ def setup_input(
34
+ viewcontainer: DeltaGenerator=None,
35
+ _allowed_image_types: list=None, ) -> InputObservation:
36
+ """
37
+ Sets up the input interface for uploading an image and entering metadata.
38
+
39
+ It provides input fields for an image upload, lat/lon, author email, and date-time.
40
+ In the ideal case, the image metadata will be used to populate location and datetime.
41
+
42
+ Parameters:
43
+ viewcontainer (DeltaGenerator, optional): The Streamlit container to use for the input interface. Defaults to st.sidebar.
44
+ _allowed_image_types (list, optional): List of allowed image file types for upload. Defaults to allowed_image_types.
45
+
46
+ Returns:
47
+ InputObservation: An object containing the uploaded image and entered metadata.
48
+
49
+ """
50
+
51
+ if viewcontainer is None:
52
+ viewcontainer = st.sidebar
53
+
54
+ if _allowed_image_types is None:
55
+ _allowed_image_types = allowed_image_types
56
+
57
+
58
+ viewcontainer.title("Input image and data")
59
+
60
+ # 1. Input the author email
61
+ author_email = viewcontainer.text_input("Author Email", spoof_metadata.get('author_email', ""))
62
+ if author_email and not is_valid_email(author_email):
63
+ viewcontainer.error("Please enter a valid email address.")
64
+
65
+ # 2. Image Selector
66
+ uploaded_files = viewcontainer.file_uploader("Upload an image", type=allowed_image_types, accept_multiple_files=True)
67
+ observations = {}
68
+ images = {}
69
+ if uploaded_files is not None:
70
+ for file in uploaded_files:
71
+
72
+ viewcontainer.title(f"Metadata for {file.name}")
73
+
74
+ # Display the uploaded image
75
+ # load image using cv2 format, so it is compatible with the ML models
76
+ file_bytes = np.asarray(bytearray(file.read()), dtype=np.uint8)
77
+ filename = file.name
78
+ image = cv2.imdecode(file_bytes, 1)
79
+ # Extract and display image date-time
80
+ image_datetime = None # For storing date-time from image
81
+ image_datetime = get_image_datetime(file)
82
+ m_logger.debug(f"image date extracted as {image_datetime} (from {uploaded_files})")
83
+
84
+
85
+ # 3. Latitude Entry Box
86
+ latitude = viewcontainer.text_input("Latitude for "+filename, spoof_metadata.get('latitude', ""))
87
+ if latitude and not is_valid_number(latitude):
88
+ viewcontainer.error("Please enter a valid latitude (numerical only).")
89
+ m_logger.error(f"Invalid latitude entered: {latitude}.")
90
+ # 4. Longitude Entry Box
91
+ longitude = viewcontainer.text_input("Longitude for "+filename, spoof_metadata.get('longitude', ""))
92
+ if longitude and not is_valid_number(longitude):
93
+ viewcontainer.error("Please enter a valid longitude (numerical only).")
94
+ m_logger.error(f"Invalid latitude entered: {latitude}.")
95
+ # 5. Date/time
96
+ ## first from image metadata
97
+ if image_datetime is not None:
98
+ time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
99
+ date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
100
+ else:
101
+ time_value = datetime.datetime.now().time() # Default to current time
102
+ date_value = datetime.datetime.now().date()
103
+
104
+ ## if not, give user the option to enter manually
105
+ date_option = st.sidebar.date_input("Date for "+filename, value=date_value)
106
+ time_option = st.sidebar.time_input("Time for "+filename, time_value)
107
+
108
+ observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
109
+ author_email=author_email, date=image_datetime, time=None,
110
+ date_option=date_option, time_option=time_option)
111
+ observations[file.name] = observation
112
+ images[file.name] = image
113
+
114
+ st.session_state.images = images
115
+ st.session_state.files = uploaded_files
116
+
117
+ return observations
118
+
src/{input_handling.py β†’ input/input_observation.py} RENAMED
@@ -1,48 +1,5 @@
1
- from fractions import Fraction
2
- from PIL import Image
3
- from PIL import ExifTags
4
- import re
5
- import datetime
6
  import hashlib
7
- import logging
8
-
9
- import streamlit as st
10
- from streamlit.runtime.uploaded_file_manager import UploadedFile # for type hinting
11
- from streamlit.delta_generator import DeltaGenerator
12
-
13
- import cv2
14
- import numpy as np
15
-
16
- m_logger = logging.getLogger(__name__)
17
- # we can set the log level locally for funcs in this module
18
- #g_m_logger.setLevel(logging.DEBUG)
19
- m_logger.setLevel(logging.INFO)
20
-
21
- '''
22
- A module to setup the input handling for the whale observation guidance tool
23
-
24
- both the UI elements (setup_input_UI) and the validation functions.
25
- '''
26
- #allowed_image_types = ['webp']
27
- allowed_image_types = ['jpg', 'jpeg', 'png', 'webp']
28
-
29
- import random
30
- import string
31
- def generate_random_md5() -> str:
32
- """
33
- Generates a random MD5 hash.
34
-
35
- This function creates a random string of 16 alphanumeric characters,
36
- encodes it, and then computes its MD5 hash.
37
-
38
- Returns:
39
- str: The MD5 hash of the generated random string.
40
- """
41
- # Generate a random string
42
- random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=16))
43
- # Encode the string and compute its MD5 hash
44
- md5_hash = hashlib.md5(random_string.encode()).hexdigest()
45
- return md5_hash
46
 
47
  # autogenerated class to hold the input data
48
  class InputObservation:
@@ -87,7 +44,9 @@ class InputObservation:
87
  from_input(input):
88
  Creates an observation from another input observation.
89
  """
90
- def __init__(self, image=None, latitude=None, longitude=None, author_email=None, date=None, time=None, date_option=None, time_option=None, uploaded_filename=None):
 
 
91
  self.image = image
92
  self.latitude = latitude
93
  self.longitude = longitude
@@ -125,8 +84,6 @@ class InputObservation:
125
  "author_email": self.author_email,
126
  "date": self.date,
127
  "time": self.time,
128
- # "date_option": self.date_option,
129
- # "time_option": self.time_option,
130
  "date_option": str(self.date_option),
131
  "time_option": str(self.time_option),
132
  "uploaded_filename": self.uploaded_filename
@@ -149,208 +106,5 @@ class InputObservation:
149
  return InputObservation(data["image"], data["latitude"], data["longitude"], data["author_email"], data["date"], data["time"], data["date_option"], data["time_option"], data["uploaded_filename"])
150
 
151
 
152
- def is_valid_number(number:str) -> bool:
153
- """
154
- Check if the given string is a valid number (int or float, sign ok)
155
-
156
- Args:
157
- number (str): The string to be checked.
158
-
159
- Returns:
160
- bool: True if the string is a valid number, False otherwise.
161
- """
162
- pattern = r'^[-+]?[0-9]*\.?[0-9]+$'
163
- return re.match(pattern, number) is not None
164
-
165
-
166
- # Function to validate email address
167
- def is_valid_email(email:str) -> bool:
168
- """
169
- Validates if the provided email address is in a correct format.
170
-
171
- Args:
172
- email (str): The email address to validate.
173
-
174
- Returns:
175
- bool: True if the email address is valid, False otherwise.
176
- """
177
- pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
178
- return re.match(pattern, email) is not None
179
-
180
- # Function to extract date and time from image metadata
181
- def get_image_datetime(image_file: UploadedFile) -> str | None:
182
- """
183
- Extracts the original date and time from the EXIF metadata of an uploaded image file.
184
-
185
- Args:
186
- image_file (UploadedFile): The uploaded image file from which to extract the date and time.
187
-
188
- Returns:
189
- str: The original date and time as a string if available, otherwise None.
190
-
191
- Raises:
192
- Warning: If the date and time could not be extracted from the image metadata.
193
- """
194
- try:
195
- image = Image.open(image_file)
196
- exif_data = image._getexif()
197
- if exif_data is not None:
198
- if ExifTags.Base.DateTimeOriginal in exif_data:
199
- return exif_data.get(ExifTags.Base.DateTimeOriginal)
200
- except Exception as e: # FIXME: what types of exception?
201
- st.warning(f"Could not extract date from image metadata. (file: {image_file.name})")
202
- # TODO: add to logger
203
- return None
204
-
205
- def decimal_coords(coords:tuple, ref:str) -> Fraction:
206
- """
207
- Converts coordinates from degrees, minutes, and seconds to decimal degrees.
208
-
209
- Args:
210
- coords (tuple): A tuple containing three elements representing degrees, minutes, and seconds.
211
- ref (str): A string representing the reference direction ('N', 'S', 'E', 'W').
212
-
213
- Returns:
214
- Fraction: The coordinates in decimal degrees. Negative if the reference is 'S' or 'W'.
215
-
216
- Example:
217
- decimal_coords((40, 26, 46), 'N') -> 40.44611111111111
218
- decimal_coords((40, 26, 46), 'W') -> -40.44611111111111
219
- """
220
- # https://stackoverflow.com/a/73267185
221
- if ref not in ['N', 'S', 'E', 'W']:
222
- raise ValueError("Invalid reference direction. Must be 'N', 'S', 'E', or 'W'.")
223
- if len(coords) != 3:
224
- raise ValueError("Coordinates must be a tuple of three elements (degrees, minutes, seconds).")
225
-
226
- decimal_degrees = coords[0] + coords[1] / 60 + coords[2] / 3600
227
- if ref == "S" or ref =='W':
228
- decimal_degrees = -decimal_degrees
229
- return decimal_degrees
230
-
231
-
232
- def get_image_latlon(image_file: UploadedFile) -> tuple[float, float] | None:
233
- """
234
- Extracts the latitude and longitude from the EXIF metadata of an uploaded image file.
235
-
236
- Args:
237
- image_file (UploadedFile): The uploaded image file from which to extract the latitude and longitude.
238
-
239
- Returns:
240
- tuple[float, float]: The latitude and longitude as a tuple if available, otherwise None.
241
-
242
- Raises:
243
- Warning: If the latitude and longitude could not be extracted from the image metadata.
244
- """
245
- try:
246
- image = Image.open(image_file)
247
- exif_data = image._getexif()
248
- if exif_data is not None:
249
- if ExifTags.Base.GPSInfo in exif_data:
250
- gps_ifd = exif_data.get(ExifTags.Base.GPSInfo)
251
-
252
- lat = float(decimal_coords(gps_ifd[ExifTags.GPS.GPSLatitude], gps_ifd[ExifTags.GPS.GPSLatitudeRef]))
253
- lon = float(decimal_coords(gps_ifd[ExifTags.GPS.GPSLongitude], gps_ifd[ExifTags.GPS.GPSLongitudeRef]))
254
-
255
- return lat, lon
256
-
257
- except Exception as e: # FIXME: what types of exception?
258
- st.warning(f"Could not extract latitude and longitude from image metadata. (file: {str(image_file)}")
259
-
260
-
261
- # an arbitrary set of defaults so testing is less painful...
262
- # ideally we add in some randomization to the defaults
263
- spoof_metadata = {
264
- "latitude": 23.5,
265
- "longitude": 44,
266
- "author_email": "[email protected]",
267
- "date": None,
268
- "time": None,
269
- }
270
-
271
- #def display_whale(whale_classes:List[str], i:int, viewcontainer=None):
272
- def setup_input(
273
- viewcontainer: DeltaGenerator=None,
274
- _allowed_image_types: list=None, ) -> InputObservation:
275
- """
276
- Sets up the input interface for uploading an image and entering metadata.
277
-
278
- It provides input fields for an image upload, lat/lon, author email, and date-time.
279
- In the ideal case, the image metadata will be used to populate location and datetime.
280
-
281
- Parameters:
282
- viewcontainer (DeltaGenerator, optional): The Streamlit container to use for the input interface. Defaults to st.sidebar.
283
- _allowed_image_types (list, optional): List of allowed image file types for upload. Defaults to allowed_image_types.
284
-
285
- Returns:
286
- InputObservation: An object containing the uploaded image and entered metadata.
287
-
288
- """
289
-
290
- if viewcontainer is None:
291
- viewcontainer = st.sidebar
292
-
293
- if _allowed_image_types is None:
294
- _allowed_image_types = allowed_image_types
295
-
296
-
297
- viewcontainer.title("Input image and data")
298
-
299
- # 1. Image Selector
300
- uploaded_filename = viewcontainer.file_uploader("Upload an image", type=allowed_image_types)
301
- image_datetime = None # For storing date-time from image
302
-
303
- if uploaded_filename is not None:
304
- # Display the uploaded image
305
- #image = Image.open(uploaded_filename)
306
- # load image using cv2 format, so it is compatible with the ML models
307
- file_bytes = np.asarray(bytearray(uploaded_filename.read()), dtype=np.uint8)
308
- image = cv2.imdecode(file_bytes, 1)
309
-
310
-
311
- viewcontainer.image(image, caption='Uploaded Image.', use_column_width=True)
312
- # store the image in the session state
313
- st.session_state.image = image
314
-
315
-
316
- # Extract and display image date-time
317
- image_datetime = get_image_datetime(uploaded_filename)
318
- print(f"[D] image date extracted as {image_datetime}")
319
- m_logger.debug(f"image date extracted as {image_datetime} (from {uploaded_filename})")
320
-
321
-
322
- # 2. Latitude Entry Box
323
- latitude = viewcontainer.text_input("Latitude", spoof_metadata.get('latitude', ""))
324
- if latitude and not is_valid_number(latitude):
325
- viewcontainer.error("Please enter a valid latitude (numerical only).")
326
- m_logger.error(f"Invalid latitude entered: {latitude}.")
327
- # 3. Longitude Entry Box
328
- longitude = viewcontainer.text_input("Longitude", spoof_metadata.get('longitude', ""))
329
- if longitude and not is_valid_number(longitude):
330
- viewcontainer.error("Please enter a valid longitude (numerical only).")
331
- m_logger.error(f"Invalid latitude entered: {latitude}.")
332
-
333
- # 4. Author Box with Email Address Validator
334
- author_email = viewcontainer.text_input("Author Email", spoof_metadata.get('author_email', ""))
335
-
336
- if author_email and not is_valid_email(author_email):
337
- viewcontainer.error("Please enter a valid email address.")
338
-
339
- # 5. date/time
340
- ## first from image metadata
341
- if image_datetime is not None:
342
- time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
343
- date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
344
- else:
345
- time_value = datetime.datetime.now().time() # Default to current time
346
- date_value = datetime.datetime.now().date()
347
-
348
- ## if not, give user the option to enter manually
349
- date_option = st.sidebar.date_input("Date", value=date_value)
350
- time_option = st.sidebar.time_input("Time", time_value)
351
 
352
- observation = InputObservation(image=uploaded_filename, latitude=latitude, longitude=longitude,
353
- author_email=author_email, date=image_datetime, time=None,
354
- date_option=date_option, time_option=time_option)
355
- return observation
356
 
 
 
 
 
 
 
1
  import hashlib
2
+ from input.input_validator import generate_random_md5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  # autogenerated class to hold the input data
5
  class InputObservation:
 
44
  from_input(input):
45
  Creates an observation from another input observation.
46
  """
47
+ def __init__(self, image=None, latitude=None, longitude=None,
48
+ author_email=None, date=None, time=None, date_option=None, time_option=None,
49
+ uploaded_filename=None):
50
  self.image = image
51
  self.latitude = latitude
52
  self.longitude = longitude
 
84
  "author_email": self.author_email,
85
  "date": self.date,
86
  "time": self.time,
 
 
87
  "date_option": str(self.date_option),
88
  "time_option": str(self.time_option),
89
  "uploaded_filename": self.uploaded_filename
 
106
  return InputObservation(data["image"], data["latitude"], data["longitude"], data["author_email"], data["date"], data["time"], data["date_option"], data["time_option"], data["uploaded_filename"])
107
 
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
 
 
 
 
110
 
src/input/input_validator.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import string
3
+ import hashlib
4
+ import re
5
+ import streamlit as st
6
+ from fractions import Fraction
7
+
8
+ from PIL import Image
9
+ from PIL import ExifTags
10
+
11
+ from streamlit.runtime.uploaded_file_manager import UploadedFile
12
+
13
+ def generate_random_md5():
14
+ # Generate a random string
15
+ random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=16))
16
+ # Encode the string and compute its MD5 hash
17
+ md5_hash = hashlib.md5(random_string.encode()).hexdigest()
18
+ return md5_hash
19
+
20
+ def is_valid_number(number:str) -> bool:
21
+ """
22
+ Check if the given string is a valid number (int or float, sign ok)
23
+
24
+ Args:
25
+ number (str): The string to be checked.
26
+
27
+ Returns:
28
+ bool: True if the string is a valid number, False otherwise.
29
+ """
30
+ pattern = r'^[-+]?[0-9]*\.?[0-9]+$'
31
+ return re.match(pattern, number) is not None
32
+
33
+ # Function to validate email address
34
+ def is_valid_email(email:str) -> bool:
35
+ """
36
+ Validates if the provided email address is in a correct format.
37
+
38
+ Args:
39
+ email (str): The email address to validate.
40
+
41
+ Returns:
42
+ bool: True if the email address is valid, False otherwise.
43
+ """
44
+ pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
45
+ return re.match(pattern, email) is not None
46
+
47
+ # Function to extract date and time from image metadata
48
+ def get_image_datetime(image_file):
49
+ """
50
+ Extracts the original date and time from the EXIF metadata of an uploaded image file.
51
+
52
+ Args:
53
+ image_file (UploadedFile): The uploaded image file from which to extract the date and time.
54
+
55
+ Returns:
56
+ str: The original date and time as a string if available, otherwise None.
57
+
58
+ Raises:
59
+ Warning: If the date and time could not be extracted from the image metadata.
60
+ """
61
+ try:
62
+ image = Image.open(image_file)
63
+ exif_data = image._getexif()
64
+ if exif_data is not None:
65
+ if ExifTags.Base.DateTimeOriginal in exif_data:
66
+ return exif_data.get(ExifTags.Base.DateTimeOriginal)
67
+ except Exception as e: # FIXME: what types of exception?
68
+ st.warning(f"Could not extract date from image metadata. (file: {image_file.name})")
69
+ # TODO: add to logger
70
+ return None
71
+
72
+ def decimal_coords(coords:tuple, ref:str) -> Fraction:
73
+ """
74
+ Converts coordinates from degrees, minutes, and seconds to decimal degrees.
75
+
76
+ Args:
77
+ coords (tuple): A tuple containing three elements representing degrees, minutes, and seconds.
78
+ ref (str): A string representing the reference direction ('N', 'S', 'E', 'W').
79
+
80
+ Returns:
81
+ Fraction: The coordinates in decimal degrees. Negative if the reference is 'S' or 'W'.
82
+
83
+ Example:
84
+ decimal_coords((40, 26, 46), 'N') -> 40.44611111111111
85
+ decimal_coords((40, 26, 46), 'W') -> -40.44611111111111
86
+ """
87
+ # https://stackoverflow.com/a/73267185
88
+ if ref not in ['N', 'S', 'E', 'W']:
89
+ raise ValueError("Invalid reference direction. Must be 'N', 'S', 'E', or 'W'.")
90
+ if len(coords) != 3:
91
+ raise ValueError("Coordinates must be a tuple of three elements (degrees, minutes, seconds).")
92
+
93
+ decimal_degrees = coords[0] + coords[1] / 60 + coords[2] / 3600
94
+ if ref == "S" or ref =='W':
95
+ decimal_degrees = -decimal_degrees
96
+ return decimal_degrees
97
+
98
+
99
+ def get_image_latlon(image_file: UploadedFile) -> tuple[float, float] | None:
100
+ """
101
+ Extracts the latitude and longitude from the EXIF metadata of an uploaded image file.
102
+
103
+ Args:
104
+ image_file (UploadedFile): The uploaded image file from which to extract the latitude and longitude.
105
+
106
+ Returns:
107
+ tuple[float, float]: The latitude and longitude as a tuple if available, otherwise None.
108
+
109
+ Raises:
110
+ Warning: If the latitude and longitude could not be extracted from the image metadata.
111
+ """
112
+ try:
113
+ image = Image.open(image_file)
114
+ exif_data = image._getexif()
115
+ if exif_data is not None:
116
+ if ExifTags.Base.GPSInfo in exif_data:
117
+ gps_ifd = exif_data.get(ExifTags.Base.GPSInfo)
118
+
119
+ lat = float(decimal_coords(gps_ifd[ExifTags.GPS.GPSLatitude], gps_ifd[ExifTags.GPS.GPSLatitudeRef]))
120
+ lon = float(decimal_coords(gps_ifd[ExifTags.GPS.GPSLongitude], gps_ifd[ExifTags.GPS.GPSLongitudeRef]))
121
+
122
+ return lat, lon
123
+
124
+ except Exception as e: # FIXME: what types of exception?
125
+ st.warning(f"Could not extract latitude and longitude from image metadata. (file: {str(image_file)}")
src/main.py CHANGED
@@ -1,31 +1,25 @@
1
- #import datetime
2
- from PIL import Image
3
-
4
- import json
5
  import logging
6
  import os
7
- import tempfile
8
 
9
  import pandas as pd
10
  import streamlit as st
11
- from streamlit.delta_generator import DeltaGenerator # for type hinting
12
  import folium
13
  from streamlit_folium import st_folium
14
- from huggingface_hub import HfApi
15
  from transformers import pipeline
16
  from transformers import AutoModelForImageClassification
17
 
18
  from datasets import disable_caching
19
  disable_caching()
20
 
21
- import alps_map as sw_am
22
- import input_handling as sw_inp
23
- import obs_map as sw_map
24
- import st_logs as sw_logs
25
- import whale_gallery as sw_wg
26
- import whale_viewer as sw_wv
27
-
28
-
29
 
30
 
31
  # setup for the ML model on huggingface (our wrapper)
@@ -45,96 +39,40 @@ g_logger = logging.getLogger(__name__)
45
  g_logger.setLevel(LOG_LEVEL)
46
 
47
  st.set_page_config(layout="wide")
48
- #sw_logs.setup_logging(level=LOG_LEVEL, buffer_len=40)
49
-
50
-
51
 
52
  # initialise various session state variables
53
  if "handler" not in st.session_state:
54
- st.session_state['handler'] = sw_logs.setup_logging()
 
 
 
 
 
 
55
 
56
- if "full_data" not in st.session_state:
57
- st.session_state.full_data = {}
 
 
 
58
 
59
  if "classify_whale_done" not in st.session_state:
60
  st.session_state.classify_whale_done = False
61
 
62
  if "whale_prediction1" not in st.session_state:
63
  st.session_state.whale_prediction1 = None
64
-
65
- if "image" not in st.session_state:
66
- st.session_state.image = None
67
 
68
  if "tab_log" not in st.session_state:
69
  st.session_state.tab_log = None
70
 
71
 
72
- def metadata2md() -> str:
73
- """Get metadata from cache and return as markdown-formatted key-value list
74
-
75
- Returns:
76
- str: Markdown-formatted key-value list of metadata
77
-
78
- """
79
- markdown_str = "\n"
80
- for key, value in st.session_state.full_data.items():
81
- markdown_str += f"- **{key}**: {value}\n"
82
- return markdown_str
83
-
84
-
85
- def push_observation(tab_log:DeltaGenerator=None):
86
- """
87
- Push the observation to the Hugging Face dataset
88
-
89
- Args:
90
- tab_log (streamlit.container): The container to log messages to. If not provided,
91
- log messages are in any case written to the global logger (TODO: test - didn't
92
- push any data since generating the logger)
93
-
94
- """
95
- # we get the data from session state: 1 is the dict 2 is the image.
96
- # first, lets do an info display (popup)
97
- metadata_str = json.dumps(st.session_state.full_data)
98
-
99
- st.toast(f"Uploading observation: {metadata_str}", icon="🦭")
100
- tab_log = st.session_state.tab_log
101
- if tab_log is not None:
102
- tab_log.info(f"Uploading observation: {metadata_str}")
103
-
104
- # get huggingface api
105
- import os
106
- token = os.environ.get("HF_TOKEN", None)
107
- api = HfApi(token=token)
108
-
109
- f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
110
- f.write(metadata_str)
111
- f.close()
112
- st.info(f"temp file: {f.name} with metadata written...")
113
-
114
- path_in_repo= f"metadata/{st.session_state.full_data['author_email']}/{st.session_state.full_data['image_md5']}.json"
115
- msg = f"fname: {f.name} | path: {path_in_repo}"
116
- print(msg)
117
- st.warning(msg)
118
- rv = api.upload_file(
119
- path_or_fileobj=f.name,
120
- path_in_repo=path_in_repo,
121
- repo_id="Saving-Willy/temp_dataset",
122
- repo_type="dataset",
123
- )
124
- print(rv)
125
- msg = f"data attempted tx to repo happy walrus: {rv}"
126
- g_logger.info(msg)
127
- st.info(msg)
128
-
129
-
130
-
131
  def main() -> None:
132
  """
133
  Main entry point to set up the streamlit UI and run the application.
134
 
135
  The organisation is as follows:
136
 
137
- 1. data input (a new observation) is handled in the sidebar
138
  2. the rest of the interface is organised in tabs:
139
 
140
  - cetean classifier
@@ -156,26 +94,25 @@ def main() -> None:
156
  #g_logger.warning("warning message")
157
 
158
  # Streamlit app
159
- #tab_gallery, tab_inference, tab_hotdogs, tab_map, tab_coords, tab_log = st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "Data", "Log", "Beautiful cetaceans"])
160
  tab_inference, tab_hotdogs, tab_map, tab_coords, tab_log, tab_gallery = \
161
  st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "*:gray[Dev:coordinates]*", "Log", "Beautiful cetaceans"])
162
  st.session_state.tab_log = tab_log
163
 
164
 
165
- # create a sidebar, and parse all the input (returned as `observation` object)
166
- observation = sw_inp.setup_input(viewcontainer=st.sidebar)
167
 
168
 
169
  if 0:## WIP
170
- # goal of this code is to allow the user to override the ML prediction, before transmitting an observation
171
- predicted_class = st.sidebar.selectbox("Predicted Class", sw_wv.WHALE_CLASSES)
172
  override_prediction = st.sidebar.checkbox("Override Prediction")
173
 
174
  if override_prediction:
175
- overridden_class = st.sidebar.selectbox("Override Class", sw_wv.WHALE_CLASSES)
176
- st.session_state.full_data['class_overriden'] = overridden_class
177
  else:
178
- st.session_state.full_data['class_overriden'] = None
179
 
180
 
181
  with tab_map:
@@ -190,19 +127,19 @@ def main() -> None:
190
 
191
  if show_db_points:
192
  # show a nicer map, observations marked, tileset selectable.
193
- st_data = sw_map.present_obs_map(
194
  dataset_id=dataset_id, data_files=data_files,
195
  dbg_show_extra=dbg_show_extra)
196
 
197
  else:
198
  # development map.
199
- st_data = sw_am.present_alps_map()
200
 
201
 
202
  with tab_log:
203
  handler = st.session_state['handler']
204
  if handler is not None:
205
- records = sw_logs.parse_log_buffer(handler.buffer)
206
  st.dataframe(records[::-1], use_container_width=True,)
207
  st.info(f"Length of records: {len(records)}")
208
  else:
@@ -236,22 +173,16 @@ def main() -> None:
236
  # specific to the gallery (otherwise we get side effects)
237
  tg_cont = st.container(key="swgallery")
238
  with tg_cont:
239
- sw_wg.render_whale_gallery(n_cols=4)
240
 
241
 
242
- # Display submitted data
243
  if st.sidebar.button("Validate"):
244
- # create a dictionary with the submitted data
245
- submitted_data = observation.to_dict()
246
- #print(submitted_data)
247
-
248
- #full_data.update(**submitted_data)
249
- for k, v in submitted_data.items():
250
- st.session_state.full_data[k] = v
251
 
252
- #st.write(f"full dict of data: {json.dumps(submitted_data)}")
253
- #tab_inference.info(f"{st.session_state.full_data}")
254
- tab_log.info(f"{st.session_state.full_data}")
255
 
256
  df = pd.DataFrame(submitted_data, index=[0])
257
  with tab_coords:
@@ -278,49 +209,12 @@ def main() -> None:
278
  trust_remote_code=True)
279
 
280
 
281
- if st.session_state.image is None:
282
  # TODO: cleaner design to disable the button until data input done?
283
  st.info("Please upload an image first.")
284
  else:
285
- # run classifier model on `image`, and persistently store the output
286
- out = cetacean_classifier(st.session_state.image) # get top 3 matches
287
- st.session_state.whale_prediction1 = out['predictions'][0]
288
- st.session_state.classify_whale_done = True
289
- msg = f"[D]2 classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}"
290
- st.info(msg)
291
- g_logger.info(msg)
292
-
293
- # dropdown for selecting/overriding the species prediction
294
- #st.info(f"[D] classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}")
295
- if not st.session_state.classify_whale_done:
296
- selected_class = tab_inference.sidebar.selectbox("Species", sw_wv.WHALE_CLASSES, index=None, placeholder="Species not yet identified...", disabled=True)
297
- else:
298
- pred1 = st.session_state.whale_prediction1
299
- # get index of pred1 from WHALE_CLASSES, none if not present
300
- print(f"[D] pred1: {pred1}")
301
- ix = sw_wv.WHALE_CLASSES.index(pred1) if pred1 in sw_wv.WHALE_CLASSES else None
302
- selected_class = tab_inference.selectbox("Species", sw_wv.WHALE_CLASSES, index=ix)
303
-
304
- st.session_state.full_data['predicted_class'] = selected_class
305
- if selected_class != st.session_state.whale_prediction1:
306
- st.session_state.full_data['class_overriden'] = selected_class
307
 
308
- btn = st.button("Upload observation to THE INTERNET!", on_click=push_observation)
309
- # TODO: the metadata only fills properly if `validate` was clicked.
310
- tab_inference.markdown(metadata2md())
311
-
312
- msg = f"[D] full data after inference: {st.session_state.full_data}"
313
- g_logger.debug(msg)
314
- print(msg)
315
- # TODO: add a link to more info on the model, next to the button.
316
-
317
- whale_classes = out['predictions'][:]
318
- # render images for the top 3 (that is what the model api returns)
319
- with tab_inference:
320
- st.markdown("## Species detected")
321
- for i in range(len(whale_classes)):
322
- sw_wv.display_whale(whale_classes, i)
323
-
324
 
325
 
326
 
@@ -340,27 +234,10 @@ def main() -> None:
340
 
341
  if st.session_state.image is None:
342
  st.info("Please upload an image first.")
343
- st.info(str(observation.to_dict()))
344
 
345
  else:
346
- col1, col2 = tab_hotdogs.columns(2)
347
-
348
- # display the image (use cached version, no need to reread)
349
- col1.image(st.session_state.image, use_column_width=True)
350
- # and then run inference on the image
351
- hotdog_image = Image.fromarray(st.session_state.image)
352
- predictions = pipeline_hot_dog(hotdog_image)
353
-
354
- col2.header("Probabilities")
355
- first = True
356
- for p in predictions:
357
- col2.subheader(f"{ p['label'] }: { round(p['score'] * 100, 1)}%")
358
- if first:
359
- st.session_state.full_data['predicted_class'] = p['label']
360
- st.session_state.full_data['predicted_score'] = round(p['score'] * 100, 1)
361
- first = False
362
-
363
- tab_hotdogs.write(f"Session Data: {json.dumps(st.session_state.full_data)}")
364
 
365
 
366
 
 
 
 
 
 
1
  import logging
2
  import os
 
3
 
4
  import pandas as pd
5
  import streamlit as st
 
6
  import folium
7
  from streamlit_folium import st_folium
8
+
9
  from transformers import pipeline
10
  from transformers import AutoModelForImageClassification
11
 
12
  from datasets import disable_caching
13
  disable_caching()
14
 
15
+ import whale_gallery as gallery
16
+ import whale_viewer as viewer
17
+ from input.input_handling import setup_input
18
+ from maps.alps_map import present_alps_map
19
+ from maps.obs_map import present_obs_map
20
+ from utils.st_logs import setup_logging, parse_log_buffer
21
+ from classifier.classifier_image import cetacean_classify
22
+ from classifier.classifier_hotdog import hotdog_classify
23
 
24
 
25
  # setup for the ML model on huggingface (our wrapper)
 
39
  g_logger.setLevel(LOG_LEVEL)
40
 
41
  st.set_page_config(layout="wide")
 
 
 
42
 
43
  # initialise various session state variables
44
  if "handler" not in st.session_state:
45
+ st.session_state['handler'] = setup_logging()
46
+
47
+ if "observations" not in st.session_state:
48
+ st.session_state.observations = {}
49
+
50
+ if "images" not in st.session_state:
51
+ st.session_state.images = {}
52
 
53
+ if "files" not in st.session_state:
54
+ st.session_state.files = {}
55
+
56
+ if "public_observation" not in st.session_state:
57
+ st.session_state.public_observation = {}
58
 
59
  if "classify_whale_done" not in st.session_state:
60
  st.session_state.classify_whale_done = False
61
 
62
  if "whale_prediction1" not in st.session_state:
63
  st.session_state.whale_prediction1 = None
 
 
 
64
 
65
  if "tab_log" not in st.session_state:
66
  st.session_state.tab_log = None
67
 
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  def main() -> None:
70
  """
71
  Main entry point to set up the streamlit UI and run the application.
72
 
73
  The organisation is as follows:
74
 
75
+ 1. observation input (a new observations) is handled in the sidebar
76
  2. the rest of the interface is organised in tabs:
77
 
78
  - cetean classifier
 
94
  #g_logger.warning("warning message")
95
 
96
  # Streamlit app
 
97
  tab_inference, tab_hotdogs, tab_map, tab_coords, tab_log, tab_gallery = \
98
  st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "*:gray[Dev:coordinates]*", "Log", "Beautiful cetaceans"])
99
  st.session_state.tab_log = tab_log
100
 
101
 
102
+ # create a sidebar, and parse all the input (returned as `observations` object)
103
+ observations = setup_input(viewcontainer=st.sidebar)
104
 
105
 
106
  if 0:## WIP
107
+ # goal of this code is to allow the user to override the ML prediction, before transmitting an observations
108
+ predicted_class = st.sidebar.selectbox("Predicted Class", viewer.WHALE_CLASSES)
109
  override_prediction = st.sidebar.checkbox("Override Prediction")
110
 
111
  if override_prediction:
112
+ overridden_class = st.sidebar.selectbox("Override Class", viewer.WHALE_CLASSES)
113
+ st.session_state.observations['class_overriden'] = overridden_class
114
  else:
115
+ st.session_state.observations['class_overriden'] = None
116
 
117
 
118
  with tab_map:
 
127
 
128
  if show_db_points:
129
  # show a nicer map, observations marked, tileset selectable.
130
+ st_observation = present_obs_map(
131
  dataset_id=dataset_id, data_files=data_files,
132
  dbg_show_extra=dbg_show_extra)
133
 
134
  else:
135
  # development map.
136
+ st_observation = present_alps_map()
137
 
138
 
139
  with tab_log:
140
  handler = st.session_state['handler']
141
  if handler is not None:
142
+ records = parse_log_buffer(handler.buffer)
143
  st.dataframe(records[::-1], use_container_width=True,)
144
  st.info(f"Length of records: {len(records)}")
145
  else:
 
173
  # specific to the gallery (otherwise we get side effects)
174
  tg_cont = st.container(key="swgallery")
175
  with tg_cont:
176
+ gallery.render_whale_gallery(n_cols=4)
177
 
178
 
179
+ # Display submitted observation
180
  if st.sidebar.button("Validate"):
181
+ # create a dictionary with the submitted observation
182
+ submitted_data = observations
183
+ st.session_state.observations = observations
 
 
 
 
184
 
185
+ tab_log.info(f"{st.session_state.observations}")
 
 
186
 
187
  df = pd.DataFrame(submitted_data, index=[0])
188
  with tab_coords:
 
209
  trust_remote_code=True)
210
 
211
 
212
+ if st.session_state.images is None:
213
  # TODO: cleaner design to disable the button until data input done?
214
  st.info("Please upload an image first.")
215
  else:
216
+ cetacean_classify(cetacean_classifier)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
 
220
 
 
234
 
235
  if st.session_state.image is None:
236
  st.info("Please upload an image first.")
237
+ #st.info(str(observations.to_dict()))
238
 
239
  else:
240
+ hotdog_classify(pipeline_hot_dog, tab_hotdogs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
 
243
 
src/{alps_map.py β†’ maps/alps_map.py} RENAMED
File without changes
src/{obs_map.py β†’ maps/obs_map.py} RENAMED
@@ -7,8 +7,8 @@ import streamlit as st
7
  import folium
8
  from streamlit_folium import st_folium
9
 
10
- import whale_viewer as sw_wv
11
- from fix_tabrender import js_show_zeroheight_iframe
12
 
13
  m_logger = logging.getLogger(__name__)
14
  # we can set the log level locally for funcs in this module
@@ -60,7 +60,7 @@ _colors = [
60
  "#778899" # Light Slate Gray
61
  ]
62
 
63
- whale2color = {k: v for k, v in zip(sw_wv.WHALE_CLASSES, _colors)}
64
 
65
  def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> folium.Map:
66
  """
 
7
  import folium
8
  from streamlit_folium import st_folium
9
 
10
+ import whale_viewer as viewer
11
+ from utils.fix_tabrender import js_show_zeroheight_iframe
12
 
13
  m_logger = logging.getLogger(__name__)
14
  # we can set the log level locally for funcs in this module
 
60
  "#778899" # Light Slate Gray
61
  ]
62
 
63
+ whale2color = {k: v for k, v in zip(viewer.WHALE_CLASSES, _colors)}
64
 
65
  def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> folium.Map:
66
  """
src/{fix_tabrender.py β†’ utils/fix_tabrender.py} RENAMED
File without changes
src/utils/grid_maker.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import math
3
+
4
+ def gridder(files):
5
+ cols = st.columns(3)
6
+ with cols[0]:
7
+ batch_size = st.select_slider("Batch size:",range(10,110,10), value=10)
8
+ with cols[1]:
9
+ row_size = st.select_slider("Row size:", range(1,6), value = 5)
10
+ num_batches = math.ceil(len(files)/batch_size)
11
+ with cols[2]:
12
+ page = st.selectbox("Page", range(1,num_batches+1))
13
+ return batch_size, row_size, page
src/utils/metadata_handler.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def metadata2md() -> str:
4
+ """Get metadata from cache and return as markdown-formatted key-value list
5
+
6
+ Returns:
7
+ str: Markdown-formatted key-value list of metadata
8
+
9
+ """
10
+ markdown_str = "\n"
11
+ keys_to_print = ["latitude","longitude","author_email","date","time"]
12
+ for key, value in st.session_state.public_observation.items():
13
+ if key in keys_to_print:
14
+ markdown_str += f"- **{key}**: {value}\n"
15
+ return markdown_str
16
+
src/{st_logs.py β†’ utils/st_logs.py} RENAMED
File without changes
src/whale_viewer.py CHANGED
@@ -1,4 +1,5 @@
1
  from typing import List
 
2
  from streamlit.delta_generator import DeltaGenerator
3
 
4
  from PIL import Image
@@ -133,7 +134,7 @@ def display_whale(whale_classes:List[str], i:int, viewcontainer:DeltaGenerator=N
133
  None
134
 
135
  """
136
- import streamlit as st
137
  if viewcontainer is None:
138
  viewcontainer = st
139
 
@@ -147,11 +148,10 @@ def display_whale(whale_classes:List[str], i:int, viewcontainer:DeltaGenerator=N
147
 
148
 
149
  viewcontainer.markdown(
150
- "### :whale: #" + str(i + 1) + ": " + format_whale_name(whale_classes[i])
151
  )
152
  current_dir = os.getcwd()
153
  image_path = os.path.join(current_dir, "src/images/references/")
154
  image = Image.open(image_path + df_whale_img_ref.loc[whale_classes[i], "WHALE_IMAGES"])
155
 
156
- viewcontainer.image(image, caption=df_whale_img_ref.loc[whale_classes[i], "WHALE_REFERENCES"])
157
- # link st.markdown(f"[{df.loc[whale_classes[i], 'WHALE_REFERENCES']}]({df.loc[whale_classes[i], 'WHALE_REFERENCES']})")
 
1
  from typing import List
2
+ import streamlit as st
3
  from streamlit.delta_generator import DeltaGenerator
4
 
5
  from PIL import Image
 
134
  None
135
 
136
  """
137
+
138
  if viewcontainer is None:
139
  viewcontainer = st
140
 
 
148
 
149
 
150
  viewcontainer.markdown(
151
+ ":whale: #" + str(i + 1) + ": " + format_whale_name(whale_classes[i])
152
  )
153
  current_dir = os.getcwd()
154
  image_path = os.path.join(current_dir, "src/images/references/")
155
  image = Image.open(image_path + df_whale_img_ref.loc[whale_classes[i], "WHALE_IMAGES"])
156
 
157
+ viewcontainer.image(image, caption=df_whale_img_ref.loc[whale_classes[i], "WHALE_REFERENCES"], use_column_width=True)
 
tests/test_input_handling.py CHANGED
@@ -2,7 +2,7 @@ import pytest
2
  from pathlib import Path
3
 
4
  from input_handling import is_valid_email, is_valid_number
5
- from input_handling import get_image_datetime, get_image_latlon, decimal_coords
6
 
7
  # generate tests for is_valid_email
8
  # - test with valid email
 
2
  from pathlib import Path
3
 
4
  from input_handling import is_valid_email, is_valid_number
5
+ from input.input_validator import get_image_latlon, decimal_coords, get_image_datetime
6
 
7
  # generate tests for is_valid_email
8
  # - test with valid email