Spaces:

Saving-Willy
/

saving-willy-dev

Sleeping

App Files Files Community

vancauwe commited on Jan 17

Commit

0e8c927

1 Parent(s): 54319e9

feat: refactor and multi image classification

Browse files

Files changed (15) hide show

src/classifier/classifier_hotdog.py +26 -0
src/classifier/classifier_image.py +69 -0
src/classifier_image.py +70 -0
src/hf_push_observations.py +56 -0
src/{input_handling.py → input/input_handling.py} +3 -174
src/input/input_observation.py +110 -0
src/input/input_validator.py +68 -0
src/main.py +43 -171
src/{alps_map.py → maps/alps_map.py} +0 -0
src/{obs_map.py → maps/obs_map.py} +3 -3
src/{fix_tabrender.py → utils/fix_tabrender.py} +0 -0
src/utils/grid_maker.py +13 -0
src/utils/metadata_handler.py +16 -0
src/{st_logs.py → utils/st_logs.py} +0 -0
src/whale_viewer.py +4 -5

src/classifier/classifier_hotdog.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import streamlit as st
+import json
+from PIL import Image
+def hotdog_classify(pipeline_hot_dog, tab_hotdogs):
+    col1, col2 = tab_hotdogs.columns(2)
+    for file in st.session_state.files:
+        image = st.session_state.images[file.name]
+        observation = st.session_state.observations[file.name].to_dict()
+        # display the image (use cached version, no need to reread)
+        col1.image(image, use_column_width=True)
+        # and then run inference on the image
+        hotdog_image = Image.fromarray(image)
+        predictions = pipeline_hot_dog(hotdog_image)
+        col2.header("Probabilities")
+        first = True
+        for p in predictions:
+            col2.subheader(f"{ p['label'] }: { round(p['score'] * 100, 1)}%")
+            if first:
+                observation['predicted_class'] = p['label']
+                observation['predicted_score'] = round(p['score'] * 100, 1)
+                first = False
+        tab_hotdogs.write(f"Session observation: {json.dumps(observation)}")

src/classifier/classifier_image.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import streamlit as st
+import logging
+# get a global var for logger accessor in this module
+LOG_LEVEL = logging.DEBUG
+g_logger = logging.getLogger(__name__)
+g_logger.setLevel(LOG_LEVEL)
+import whale_viewer as viewer
+from hf_push_observations import push_observations
+from utils.grid_maker import gridder
+from utils.metadata_handler import metadata2md
+def cetacean_classify(cetacean_classifier, tab_inference):
+    files = st.session_state.files
+    images = st.session_state.images
+    observations = st.session_state.observations
+    batch_size, row_size, page = gridder(files)
+    grid = st.columns(row_size)
+    col = 0
+    for file in files:
+        image = images[file.name]
+        with grid[col]:
+            st.image(image, use_column_width=True)
+            observation = observations[file.name].to_dict()
+            # run classifier model on `image`, and persistently store the output
+            out = cetacean_classifier(image) # get top 3 matches
+            st.session_state.whale_prediction1 = out['predictions'][0]
+            st.session_state.classify_whale_done = True
+            msg = f"[D]2 classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}"
+            g_logger.info(msg)
+            # dropdown for selecting/overriding the species prediction
+            if not st.session_state.classify_whale_done:
+                selected_class = st.sidebar.selectbox("Species", viewer.WHALE_CLASSES,
+                                                                index=None, placeholder="Species not yet identified...",
+                                                                disabled=True)
+            else:
+                pred1 = st.session_state.whale_prediction1
+                # get index of pred1 from WHALE_CLASSES, none if not present
+                print(f"[D] pred1: {pred1}")
+                ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
+                selected_class = st.selectbox(f"Species for {file.name}", viewer.WHALE_CLASSES, index=ix)
+            observation['predicted_class'] = selected_class
+            if selected_class != st.session_state.whale_prediction1:
+                observation['class_overriden'] = selected_class
+            st.session_state.public_observation = observation
+            st.button(f"Upload observation for {file.name} to THE INTERNET!", on_click=push_observations)
+            # TODO: the metadata only fills properly if `validate` was clicked.
+            st.markdown(metadata2md())
+            msg = f"[D] full observation after inference: {observation}"
+            g_logger.debug(msg)
+            print(msg)
+            # TODO: add a link to more info on the model, next to the button.
+            whale_classes = out['predictions'][:]
+            # render images for the top 3 (that is what the model api returns)
+            #with tab_inference:
+            st.markdown(f"Top 3 Predictions for {file.name}")
+            for i in range(len(whale_classes)):
+                viewer.display_whale(whale_classes, i)
+        col = (col + 1) % row_size

src/classifier_image.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import streamlit as st
+import logging
+import os
+# get a global var for logger accessor in this module
+LOG_LEVEL = logging.DEBUG
+g_logger = logging.getLogger(__name__)
+g_logger.setLevel(LOG_LEVEL)
+from grid_maker import gridder
+import hf_push_observations as sw_push_obs
+import utils.metadata_handler as meta_handler
+import whale_viewer as sw_wv
+def cetacean_classify(cetacean_classifier, tab_inference):
+    files = st.session_state.files
+    images = st.session_state.images
+    observations = st.session_state.observations
+    batch_size, row_size, page = gridder(files)
+    grid = st.columns(row_size)
+    col = 0
+    for file in files:
+        image = images[file.name]
+        with grid[col]:
+            st.image(image, use_column_width=True)
+            observation = observations[file.name].to_dict()
+            # run classifier model on `image`, and persistently store the output
+            out = cetacean_classifier(image) # get top 3 matches
+            st.session_state.whale_prediction1 = out['predictions'][0]
+            st.session_state.classify_whale_done = True
+            msg = f"[D]2 classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}"
+            g_logger.info(msg)
+            # dropdown for selecting/overriding the species prediction
+            if not st.session_state.classify_whale_done:
+                selected_class = st.sidebar.selectbox("Species", sw_wv.WHALE_CLASSES,
+                                                                index=None, placeholder="Species not yet identified...",
+                                                                disabled=True)
+            else:
+                pred1 = st.session_state.whale_prediction1
+                # get index of pred1 from WHALE_CLASSES, none if not present
+                print(f"[D] pred1: {pred1}")
+                ix = sw_wv.WHALE_CLASSES.index(pred1) if pred1 in sw_wv.WHALE_CLASSES else None
+                selected_class = tab_inference.selectbox("Species", sw_wv.WHALE_CLASSES, index=ix)
+            observation['predicted_class'] = selected_class
+            if selected_class != st.session_state.whale_prediction1:
+                observation['class_overriden'] = selected_class
+            st.session_state.public_observation = observation
+            st.button(f"Upload observation for {file.name} to THE INTERNET!", on_click=sw_push_obs.push_observations)
+            # TODO: the metadata only fills properly if `validate` was clicked.
+            st.markdown(meta_handler.metadata2md())
+            msg = f"[D] full observation after inference: {observation}"
+            g_logger.debug(msg)
+            print(msg)
+            # TODO: add a link to more info on the model, next to the button.
+            whale_classes = out['predictions'][:]
+            # render images for the top 3 (that is what the model api returns)
+            #with tab_inference:
+            st.title(f"Species detected for {file.name}")
+            for i in range(len(whale_classes)):
+                sw_wv.display_whale(whale_classes, i)
+        col = (col + 1) % row_size

src/hf_push_observations.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from streamlit.delta_generator import DeltaGenerator
+import streamlit as st
+from huggingface_hub import HfApi
+import json
+import tempfile
+import logging
+# get a global var for logger accessor in this module
+LOG_LEVEL = logging.DEBUG
+g_logger = logging.getLogger(__name__)
+g_logger.setLevel(LOG_LEVEL)
+def push_observations(tab_log:DeltaGenerator=None):
+    """
+    Push the observations to the Hugging Face dataset
+    Args:
+        tab_log (streamlit.container): The container to log messages to. If not provided,
+            log messages are in any case written to the global logger (TODO: test - didn't
+            push any observation since generating the logger)
+    """
+    # we get the observation from session state: 1 is the dict 2 is the image.
+    # first, lets do an info display (popup)
+    metadata_str = json.dumps(st.session_state.public_observation)
+    st.toast(f"Uploading observations: {metadata_str}", icon="🦭")
+    tab_log = st.session_state.tab_log
+    if tab_log is not None:
+        tab_log.info(f"Uploading observations: {metadata_str}")
+    # get huggingface api
+    import os
+    token = os.environ.get("HF_TOKEN", None)
+    api = HfApi(token=token)
+    f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
+    f.write(metadata_str)
+    f.close()
+    st.info(f"temp file: {f.name} with metadata written...")
+    path_in_repo= f"metadata/{st.session_state.public_observation['author_email']}/{st.session_state.public_observation['image_md5']}.json"
+    msg = f"fname: {f.name} | path: {path_in_repo}"
+    print(msg)
+    st.warning(msg)
+    # rv = api.upload_file(
+    #     path_or_fileobj=f.name,
+    #     path_in_repo=path_in_repo,
+    #     repo_id="Saving-Willy/temp_dataset",
+    #     repo_type="dataset",
+    # )
+    # print(rv)
+    # msg = f"observation attempted tx to repo happy walrus: {rv}"
+    g_logger.info(msg)
+    st.info(msg)

src/{input_handling.py → input/input_handling.py} RENAMED Viewed

@@ -1,19 +1,14 @@
-from PIL import Image
-from PIL import ExifTags
-import re
 import datetime
-import hashlib
 import logging
 import streamlit as st
-from streamlit.runtime.uploaded_file_manager import UploadedFile # for type hinting
 from streamlit.delta_generator import DeltaGenerator
 import cv2
 import numpy as np
-import random
-import string
 m_logger = logging.getLogger(__name__)
 m_logger.setLevel(logging.INFO)
@@ -25,172 +20,6 @@ both the UI elements (setup_input_UI) and the validation functions.
 '''
 allowed_image_types = ['jpg', 'jpeg', 'png', 'webp']
-def generate_random_md5():
-    # Generate a random string
-    random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=16))
-    # Encode the string and compute its MD5 hash
-    md5_hash = hashlib.md5(random_string.encode()).hexdigest()
-    return md5_hash
-# autogenerated class to hold the input data
-class InputObservation:
-    """
-    A class to hold an input observation and associated metadata
-    Attributes:
-        image (Any):
-            The image associated with the observation.
-        latitude (float):
-            The latitude where the observation was made.
-        longitude (float):
-            The longitude where the observation was made.
-        author_email (str):
-            The email of the author of the observation.
-        date (str):
-            The date when the observation was made.
-        time (str):
-            The time when the observation was made.
-        date_option (str):
-            Additional date option for the observation.
-        time_option (str):
-            Additional time option for the observation.
-        uploaded_filename (Any):
-            The uploaded filename associated with the observation.
-    Methods:
-        __str__():
-            Returns a string representation of the observation.
-        __repr__():
-            Returns a string representation of the observation.
-        __eq__(other):
-            Checks if two observations are equal.
-        __ne__(other):
-            Checks if two observations are not equal.
-        __hash__():
-            Returns the hash of the observation.
-        to_dict():
-            Converts the observation to a dictionary.
-        from_dict(data):
-            Creates an observation from a dictionary.
-        from_input(input):
-            Creates an observation from another input observation.
-    """
-    def __init__(self, image=None, latitude=None, longitude=None, author_email=None, date=None, time=None, date_option=None, time_option=None, uploaded_filename=None):
-        self.image = image
-        self.latitude = latitude
-        self.longitude = longitude
-        self.author_email = author_email
-        self.date = date
-        self.time = time
-        self.date_option = date_option
-        self.time_option = time_option
-        self.uploaded_filename = uploaded_filename
-    def __str__(self):
-        return f"Observation: {self.image}, {self.latitude}, {self.longitude}, {self.author_email}, {self.date}, {self.time}, {self.date_option}, {self.time_option}, {self.uploaded_filename}"
-    def __repr__(self):
-        return f"Observation: {self.image}, {self.latitude}, {self.longitude}, {self.author_email}, {self.date}, {self.time}, {self.date_option}, {self.time_option}, {self.uploaded_filename}"
-    def __eq__(self, other):
-        return (self.image == other.image and self.latitude == other.latitude and self.longitude == other.longitude and
-                self.author_email == other.author_email and self.date == other.date and self.time == other.time and
-                self.date_option == other.date_option and self.time_option == other.time_option and self.uploaded_filename == other.uploaded_filename)
-    def __ne__(self, other):
-        return not self.__eq__(other)
-    def __hash__(self):
-        return hash((self.image, self.latitude, self.longitude, self.author_email, self.date, self.time, self.date_option, self.time_option, self.uploaded_filename))
-    def to_dict(self):
-        return {
-            #"image": self.image,
-            "image_filename": self.uploaded_filename.name if self.uploaded_filename else None,
-            "image_md5": hashlib.md5(self.uploaded_filename.read()).hexdigest() if self.uploaded_filename else generate_random_md5(),
-            "latitude": self.latitude,
-            "longitude": self.longitude,
-            "author_email": self.author_email,
-            "date": self.date,
-            "time": self.time,
-            "date_option": str(self.date_option),
-            "time_option": str(self.time_option),
-            "uploaded_filename": self.uploaded_filename
-        }
-    @classmethod
-    def from_dict(cls, data):
-        return cls(data["image"], data["latitude"], data["longitude"], data["author_email"], data["date"], data["time"], data["date_option"], data["time_option"], data["uploaded_filename"])
-    @classmethod
-    def from_input(cls, input):
-        return cls(input.image, input.latitude, input.longitude, input.author_email, input.date, input.time, input.date_option, input.time_option, input.uploaded_filename)
-    @staticmethod
-    def from_input(input):
-        return InputObservation(input.image, input.latitude, input.longitude, input.author_email, input.date, input.time, input.date_option, input.time_option, input.uploaded_filename)
-    @staticmethod
-    def from_dict(data):
-        return InputObservation(data["image"], data["latitude"], data["longitude"], data["author_email"], data["date"], data["time"], data["date_option"], data["time_option"], data["uploaded_filename"])
-def is_valid_number(number:str) -> bool:
-    """
-    Check if the given string is a valid number (int or float, sign ok)
-    Args:
-        number (str): The string to be checked.
-    Returns:
-        bool: True if the string is a valid number, False otherwise.
-    """
-    pattern = r'^[-+]?[0-9]*\.?[0-9]+$'
-    return re.match(pattern, number) is not None
-# Function to validate email address
-def is_valid_email(email:str) -> bool:
-    """
-    Validates if the provided email address is in a correct format.
-    Args:
-        email (str): The email address to validate.
-    Returns:
-        bool: True if the email address is valid, False otherwise.
-    """
-    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
-    return re.match(pattern, email) is not None
-# Function to extract date and time from image metadata
-# def get_image_datetime(image_file: UploadedFile) -> str | None:
-def get_image_datetime(image_file):
-    """
-    Extracts the original date and time from the EXIF metadata of an uploaded image file.
-    Args:
-        image_file (UploadedFile): The uploaded image file from which to extract the date and time.
-    Returns:
-        str: The original date and time as a string if available, otherwise None.
-    Raises:
-        Warning: If the date and time could not be extracted from the image metadata.
-    """
-    try:
-        image = Image.open(image_file)
-        exif_data = image._getexif()
-        if exif_data is not None:
-            for tag, value in exif_data.items():
-                if ExifTags.TAGS.get(tag) == 'DateTimeOriginal':
-                    return value
-    except Exception as e: # FIXME: what types of exception?
-         st.warning(f"Could not extract date from image metadata. (file: {image_file.name})")
-         # TODO: add to logger
-    return None
 # an arbitrary set of defaults so testing is less painful...
 # ideally we add in some randomization to the defaults
 spoof_metadata = {
@@ -282,7 +111,7 @@ def setup_input(
             observations[file.name] = observation
             images[file.name] = image
-    st.session_state.image = images
     st.session_state.files = uploaded_files
     return observations

 import datetime
 import logging
 import streamlit as st
 from streamlit.delta_generator import DeltaGenerator
 import cv2
 import numpy as np
+from input.input_observation import InputObservation
+from input.input_validator import get_image_datetime, is_valid_email, is_valid_number
 m_logger = logging.getLogger(__name__)
 m_logger.setLevel(logging.INFO)
 '''
 allowed_image_types = ['jpg', 'jpeg', 'png', 'webp']
 # an arbitrary set of defaults so testing is less painful...
 # ideally we add in some randomization to the defaults
 spoof_metadata = {
             observations[file.name] = observation
             images[file.name] = image
+    st.session_state.images = images
     st.session_state.files = uploaded_files
     return observations

src/input/input_observation.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import hashlib
+from input.input_validator import generate_random_md5
+# autogenerated class to hold the input data
+class InputObservation:
+    """
+    A class to hold an input observation and associated metadata
+    Attributes:
+        image (Any):
+            The image associated with the observation.
+        latitude (float):
+            The latitude where the observation was made.
+        longitude (float):
+            The longitude where the observation was made.
+        author_email (str):
+            The email of the author of the observation.
+        date (str):
+            The date when the observation was made.
+        time (str):
+            The time when the observation was made.
+        date_option (str):
+            Additional date option for the observation.
+        time_option (str):
+            Additional time option for the observation.
+        uploaded_filename (Any):
+            The uploaded filename associated with the observation.
+    Methods:
+        __str__():
+            Returns a string representation of the observation.
+        __repr__():
+            Returns a string representation of the observation.
+        __eq__(other):
+            Checks if two observations are equal.
+        __ne__(other):
+            Checks if two observations are not equal.
+        __hash__():
+            Returns the hash of the observation.
+        to_dict():
+            Converts the observation to a dictionary.
+        from_dict(data):
+            Creates an observation from a dictionary.
+        from_input(input):
+            Creates an observation from another input observation.
+    """
+    def __init__(self, image=None, latitude=None, longitude=None,
+                 author_email=None, date=None, time=None, date_option=None, time_option=None,
+                 uploaded_filename=None):
+        self.image = image
+        self.latitude = latitude
+        self.longitude = longitude
+        self.author_email = author_email
+        self.date = date
+        self.time = time
+        self.date_option = date_option
+        self.time_option = time_option
+        self.uploaded_filename = uploaded_filename
+    def __str__(self):
+        return f"Observation: {self.image}, {self.latitude}, {self.longitude}, {self.author_email}, {self.date}, {self.time}, {self.date_option}, {self.time_option}, {self.uploaded_filename}"
+    def __repr__(self):
+        return f"Observation: {self.image}, {self.latitude}, {self.longitude}, {self.author_email}, {self.date}, {self.time}, {self.date_option}, {self.time_option}, {self.uploaded_filename}"
+    def __eq__(self, other):
+        return (self.image == other.image and self.latitude == other.latitude and self.longitude == other.longitude and
+                self.author_email == other.author_email and self.date == other.date and self.time == other.time and
+                self.date_option == other.date_option and self.time_option == other.time_option and self.uploaded_filename == other.uploaded_filename)
+    def __ne__(self, other):
+        return not self.__eq__(other)
+    def __hash__(self):
+        return hash((self.image, self.latitude, self.longitude, self.author_email, self.date, self.time, self.date_option, self.time_option, self.uploaded_filename))
+    def to_dict(self):
+        return {
+            #"image": self.image,
+            "image_filename": self.uploaded_filename.name if self.uploaded_filename else None,
+            "image_md5": hashlib.md5(self.uploaded_filename.read()).hexdigest() if self.uploaded_filename else generate_random_md5(),
+            "latitude": self.latitude,
+            "longitude": self.longitude,
+            "author_email": self.author_email,
+            "date": self.date,
+            "time": self.time,
+            "date_option": str(self.date_option),
+            "time_option": str(self.time_option),
+            "uploaded_filename": self.uploaded_filename
+        }
+    @classmethod
+    def from_dict(cls, data):
+        return cls(data["image"], data["latitude"], data["longitude"], data["author_email"], data["date"], data["time"], data["date_option"], data["time_option"], data["uploaded_filename"])
+    @classmethod
+    def from_input(cls, input):
+        return cls(input.image, input.latitude, input.longitude, input.author_email, input.date, input.time, input.date_option, input.time_option, input.uploaded_filename)
+    @staticmethod
+    def from_input(input):
+        return InputObservation(input.image, input.latitude, input.longitude, input.author_email, input.date, input.time, input.date_option, input.time_option, input.uploaded_filename)
+    @staticmethod
+    def from_dict(data):
+        return InputObservation(data["image"], data["latitude"], data["longitude"], data["author_email"], data["date"], data["time"], data["date_option"], data["time_option"], data["uploaded_filename"])

src/input/input_validator.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import random
+import string
+import hashlib
+import re
+import streamlit as st
+from PIL import Image
+from PIL import ExifTags
+def generate_random_md5():
+    # Generate a random string
+    random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=16))
+    # Encode the string and compute its MD5 hash
+    md5_hash = hashlib.md5(random_string.encode()).hexdigest()
+    return md5_hash
+def is_valid_number(number:str) -> bool:
+    """
+    Check if the given string is a valid number (int or float, sign ok)
+    Args:
+        number (str): The string to be checked.
+    Returns:
+        bool: True if the string is a valid number, False otherwise.
+    """
+    pattern = r'^[-+]?[0-9]*\.?[0-9]+$'
+    return re.match(pattern, number) is not None
+# Function to validate email address
+def is_valid_email(email:str) -> bool:
+    """
+    Validates if the provided email address is in a correct format.
+    Args:
+        email (str): The email address to validate.
+    Returns:
+        bool: True if the email address is valid, False otherwise.
+    """
+    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
+    return re.match(pattern, email) is not None
+# Function to extract date and time from image metadata
+def get_image_datetime(image_file):
+    """
+    Extracts the original date and time from the EXIF metadata of an uploaded image file.
+    Args:
+        image_file (UploadedFile): The uploaded image file from which to extract the date and time.
+    Returns:
+        str: The original date and time as a string if available, otherwise None.
+    Raises:
+        Warning: If the date and time could not be extracted from the image metadata.
+    """
+    try:
+        image = Image.open(image_file)
+        exif_data = image._getexif()
+        if exif_data is not None:
+            for tag, value in exif_data.items():
+                if ExifTags.TAGS.get(tag) == 'DateTimeOriginal':
+                    return value
+    except Exception as e: # FIXME: what types of exception?
+         st.warning(f"Could not extract date from image metadata. (file: {image_file.name})")
+         # TODO: add to logger
+    return None

src/main.py CHANGED Viewed

@@ -1,31 +1,25 @@
-#import datetime
-from PIL import Image
-import json
 import logging
 import os
-import tempfile
 import pandas as pd
 import streamlit as st
-from streamlit.delta_generator import DeltaGenerator # for type hinting
 import folium
 from streamlit_folium import st_folium
-from huggingface_hub import HfApi
 from transformers import pipeline
 from transformers import AutoModelForImageClassification
 from datasets import disable_caching
 disable_caching()
-import alps_map as sw_am
-import input_handling as sw_inp
-import obs_map as sw_map
-import st_logs as sw_logs
-import whale_gallery as sw_wg
-import whale_viewer as sw_wv
 # setup for the ML model on huggingface (our wrapper)
@@ -45,96 +39,40 @@ g_logger = logging.getLogger(__name__)
 g_logger.setLevel(LOG_LEVEL)
 st.set_page_config(layout="wide")
-#sw_logs.setup_logging(level=LOG_LEVEL, buffer_len=40)
 # initialise various session state variables
 if "handler" not in st.session_state:
-    st.session_state['handler'] = sw_logs.setup_logging()
-if "full_data" not in st.session_state:
-    st.session_state.full_data = {}
 if "classify_whale_done" not in st.session_state:
     st.session_state.classify_whale_done = False
 if "whale_prediction1" not in st.session_state:
     st.session_state.whale_prediction1 = None
-if "image" not in st.session_state:
-    st.session_state.image = None
 if "tab_log" not in st.session_state:
     st.session_state.tab_log = None
-def metadata2md() -> str:
-    """Get metadata from cache and return as markdown-formatted key-value list
-    Returns:
-        str: Markdown-formatted key-value list of metadata
-    """
-    markdown_str = "\n"
-    for key, value in st.session_state.public_observation.items():
-            markdown_str += f"- **{key}**: {value}\n"
-    return markdown_str
-def push_observations(tab_log:DeltaGenerator=None):
-    """
-    Push the observations to the Hugging Face dataset
-    Args:
-        tab_log (streamlit.container): The container to log messages to. If not provided,
-            log messages are in any case written to the global logger (TODO: test - didn't
-            push any data since generating the logger)
-    """
-    # we get the data from session state: 1 is the dict 2 is the image.
-    # first, lets do an info display (popup)
-    metadata_str = json.dumps(st.session_state.public_observation)
-    st.toast(f"Uploading observations: {metadata_str}", icon="🦭")
-    tab_log = st.session_state.tab_log
-    if tab_log is not None:
-        tab_log.info(f"Uploading observations: {metadata_str}")
-    # get huggingface api
-    import os
-    token = os.environ.get("HF_TOKEN", None)
-    api = HfApi(token=token)
-    f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
-    f.write(metadata_str)
-    f.close()
-    st.info(f"temp file: {f.name} with metadata written...")
-    path_in_repo= f"metadata/{st.session_state.public_observation['author_email']}/{st.session_state.public_observation['image_md5']}.json"
-    msg = f"fname: {f.name} | path: {path_in_repo}"
-    print(msg)
-    st.warning(msg)
-    rv = api.upload_file(
-        path_or_fileobj=f.name,
-        path_in_repo=path_in_repo,
-        repo_id="Saving-Willy/temp_dataset",
-        repo_type="dataset",
-    )
-    print(rv)
-    msg = f"data attempted tx to repo happy walrus: {rv}"
-    g_logger.info(msg)
-    st.info(msg)
 def main() -> None:
     """
     Main entry point to set up the streamlit UI and run the application.
     The organisation is as follows:
-    1. data input (a new observations) is handled in the sidebar
     2. the rest of the interface is organised in tabs:
         - cetean classifier
@@ -156,25 +94,25 @@ def main() -> None:
     #g_logger.warning("warning message")
     # Streamlit app
-    #tab_gallery, tab_inference, tab_hotdogs, tab_map, tab_data, tab_log = st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "Data", "Log", "Beautiful cetaceans"])
-    tab_inference, tab_hotdogs, tab_map, tab_data, tab_log, tab_gallery = st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "Data", "Log", "Beautiful cetaceans"])
     st.session_state.tab_log = tab_log
     # create a sidebar, and parse all the input (returned as `observations` object)
-    observations = sw_inp.setup_input(viewcontainer=st.sidebar)
     if 0:## WIP
         # goal of this code is to allow the user to override the ML prediction, before transmitting an observations
-        predicted_class = st.sidebar.selectbox("Predicted Class", sw_wv.WHALE_CLASSES)
         override_prediction = st.sidebar.checkbox("Override Prediction")
         if override_prediction:
-            overridden_class = st.sidebar.selectbox("Override Class", sw_wv.WHALE_CLASSES)
-            st.session_state.full_data['class_overriden'] = overridden_class
         else:
-            st.session_state.full_data['class_overriden'] = None
     with tab_map:
@@ -188,19 +126,19 @@ def main() -> None:
         if show_db_points:
             # show a nicer map, observations marked, tileset selectable.
-            st_data = sw_map.present_obs_map(
                 dataset_id=dataset_id, data_files=data_files,
                 dbg_show_extra=dbg_show_extra)
         else:
             # development map.
-            st_data = sw_am.present_alps_map()
     with tab_log:
         handler = st.session_state['handler']
         if handler is not None:
-            records = sw_logs.parse_log_buffer(handler.buffer)
             st.dataframe(records[::-1], use_container_width=True,)
             st.info(f"Length of records: {len(records)}")
         else:
@@ -230,19 +168,18 @@ def main() -> None:
         # specific to the gallery (otherwise we get side effects)
         tg_cont = st.container(key="swgallery")
         with tg_cont:
-            sw_wg.render_whale_gallery(n_cols=4)
-    # Display submitted data
     if st.sidebar.button("Validate"):
-        # create a dictionary with the submitted data
         submitted_data = observations
-        st.session_state.full_data = observations
-        tab_log.info(f"{st.session_state.full_data}")
-        df = pd.DataFrame(submitted_data)
-        print("Dataframe Shape: ", df.shape)
         with tab_data:
             st.table(df)
@@ -254,7 +191,7 @@ def main() -> None:
     # - the model predicts the top 3 most likely species from the input image
     # - these species are shown
     # - the user can override the species prediction using the dropdown
-    # - an observations is uploaded if the user chooses.
     if tab_inference.button("Identify with cetacean classifier"):
         #pipe = pipeline("image-classification", model="Saving-Willy/cetacean-classifier", trust_remote_code=True)
@@ -262,58 +199,12 @@ def main() -> None:
                                                                             revision=classifier_revision,
                                                                             trust_remote_code=True)
-        if st.session_state.image is None:
             # TODO: cleaner design to disable the button until data input done?
             st.info("Please upload an image first.")
         else:
-            files = st.session_state.files
-            images = st.session_state.images
-            full_data = st.session_state.full_data
-            for file in files:
-                image = images[file]
-                data = full_data[file]
-                # run classifier model on `image`, and persistently store the output
-                out = cetacean_classifier(image) # get top 3 matches
-                st.session_state.whale_prediction1 = out['predictions'][0]
-                st.session_state.classify_whale_done = True
-                msg = f"[D]2 classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}"
-                # st.info(msg)
-                g_logger.info(msg)
-                # dropdown for selecting/overriding the species prediction
-                #st.info(f"[D] classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}")
-                if not st.session_state.classify_whale_done:
-                    selected_class = tab_inference.sidebar.selectbox("Species", sw_wv.WHALE_CLASSES,
-                                                                     index=None, placeholder="Species not yet identified...",
-                                                                     disabled=True)
-                else:
-                    pred1 = st.session_state.whale_prediction1
-                    # get index of pred1 from WHALE_CLASSES, none if not present
-                    print(f"[D] pred1: {pred1}")
-                    ix = sw_wv.WHALE_CLASSES.index(pred1) if pred1 in sw_wv.WHALE_CLASSES else None
-                    selected_class = tab_inference.selectbox("Species", sw_wv.WHALE_CLASSES, index=ix)
-                data['predicted_class'] = selected_class
-                if selected_class != st.session_state.whale_prediction1:
-                    data['class_overriden'] = selected_class
-                st.session_state.public_observation = data
-                st.button("Upload observations to THE INTERNET!", on_click=push_observations)
-                # TODO: the metadata only fills properly if `validate` was clicked.
-                tab_inference.markdown(metadata2md())
-                msg = f"[D] full data after inference: {data}"
-                g_logger.debug(msg)
-                print(msg)
-                # TODO: add a link to more info on the model, next to the button.
-                whale_classes = out['predictions'][:]
-                # render images for the top 3 (that is what the model api returns)
-                with tab_inference:
-                    st.markdown("## Species detected")
-                    for i in range(len(whale_classes)):
-                        sw_wv.display_whale(whale_classes, i)
@@ -329,29 +220,10 @@ def main() -> None:
         if st.session_state.image is None:
             st.info("Please upload an image first.")
-            st.info(str(observations.to_dict()))
         else:
-            col1, col2 = tab_hotdogs.columns(2)
-            for file in st.session_state.files:
-                image = st.session_state.images[file]
-                data = st.session_state.full_data[file]
-                # display the image (use cached version, no need to reread)
-                col1.image(image, use_column_width=True)
-                # and then run inference on the image
-                hotdog_image = Image.fromarray(image)
-                predictions = pipeline_hot_dog(hotdog_image)
-                col2.header("Probabilities")
-                first = True
-                for p in predictions:
-                    col2.subheader(f"{ p['label'] }: { round(p['score'] * 100, 1)}%")
-                    if first:
-                        data['predicted_class'] = p['label']
-                        data['predicted_score'] = round(p['score'] * 100, 1)
-                        first = False
-                tab_hotdogs.write(f"Session Data: {json.dumps(data)}")

 import logging
 import os
 import pandas as pd
 import streamlit as st
 import folium
 from streamlit_folium import st_folium
 from transformers import pipeline
 from transformers import AutoModelForImageClassification
 from datasets import disable_caching
 disable_caching()
+import whale_gallery as gallery
+import whale_viewer as viewer
+from input.input_handling import setup_input
+from maps.alps_map import present_alps_map
+from maps.obs_map import present_obs_map
+from utils.st_logs import setup_logging, parse_log_buffer
+from classifier.classifier_image import cetacean_classify
+from classifier.classifier_hotdog import hotdog_classify
 # setup for the ML model on huggingface (our wrapper)
 g_logger.setLevel(LOG_LEVEL)
 st.set_page_config(layout="wide")
 # initialise various session state variables
 if "handler" not in st.session_state:
+    st.session_state['handler'] = setup_logging()
+if "observations" not in st.session_state:
+    st.session_state.observations = {}
+if "images" not in st.session_state:
+    st.session_state.images = {}
+if "files" not in st.session_state:
+    st.session_state.files = {}
+if "public_observation" not in st.session_state:
+    st.session_state.public_observation = {}
 if "classify_whale_done" not in st.session_state:
     st.session_state.classify_whale_done = False
 if "whale_prediction1" not in st.session_state:
     st.session_state.whale_prediction1 = None
 if "tab_log" not in st.session_state:
     st.session_state.tab_log = None
 def main() -> None:
     """
     Main entry point to set up the streamlit UI and run the application.
     The organisation is as follows:
+    1. observation input (a new observations) is handled in the sidebar
     2. the rest of the interface is organised in tabs:
         - cetean classifier
     #g_logger.warning("warning message")
     # Streamlit app
+    #tab_gallery, tab_inference, tab_hotdogs, tab_map, tab_data, tab_log = st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "observation", "Log", "Beautiful cetaceans"])
+    tab_inference, tab_hotdogs, tab_map, tab_data, tab_log, tab_gallery = st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "observation", "Log", "Beautiful cetaceans"])
     st.session_state.tab_log = tab_log
     # create a sidebar, and parse all the input (returned as `observations` object)
+    observations = setup_input(viewcontainer=st.sidebar)
     if 0:## WIP
         # goal of this code is to allow the user to override the ML prediction, before transmitting an observations
+        predicted_class = st.sidebar.selectbox("Predicted Class", viewer.WHALE_CLASSES)
         override_prediction = st.sidebar.checkbox("Override Prediction")
         if override_prediction:
+            overridden_class = st.sidebar.selectbox("Override Class", viewer.WHALE_CLASSES)
+            st.session_state.observations['class_overriden'] = overridden_class
         else:
+            st.session_state.observations['class_overriden'] = None
     with tab_map:
         if show_db_points:
             # show a nicer map, observations marked, tileset selectable.
+            st_observation = present_obs_map(
                 dataset_id=dataset_id, data_files=data_files,
                 dbg_show_extra=dbg_show_extra)
         else:
             # development map.
+            st_observation = present_alps_map()
     with tab_log:
         handler = st.session_state['handler']
         if handler is not None:
+            records = parse_log_buffer(handler.buffer)
             st.dataframe(records[::-1], use_container_width=True,)
             st.info(f"Length of records: {len(records)}")
         else:
         # specific to the gallery (otherwise we get side effects)
         tg_cont = st.container(key="swgallery")
         with tg_cont:
+            gallery.render_whale_gallery(n_cols=4)
+    # Display submitted observation
     if st.sidebar.button("Validate"):
+        # create a dictionary with the submitted observation
         submitted_data = observations
+        st.session_state.observations = observations
+        tab_log.info(f"{st.session_state.observations}")
+        df = pd.DataFrame(submitted_data, index=[0])
         with tab_data:
             st.table(df)
     # - the model predicts the top 3 most likely species from the input image
     # - these species are shown
     # - the user can override the species prediction using the dropdown
+    # - an observation is uploaded if the user chooses.
     if tab_inference.button("Identify with cetacean classifier"):
         #pipe = pipeline("image-classification", model="Saving-Willy/cetacean-classifier", trust_remote_code=True)
                                                                             revision=classifier_revision,
                                                                             trust_remote_code=True)
+        if st.session_state.images is None:
             # TODO: cleaner design to disable the button until data input done?
             st.info("Please upload an image first.")
         else:
+            cetacean_classify(cetacean_classifier, tab_inference)
         if st.session_state.image is None:
             st.info("Please upload an image first.")
+            #st.info(str(observations.to_dict()))
         else:
+            hotdog_classify(pipeline_hot_dog, tab_hotdogs)

src/{alps_map.py → maps/alps_map.py} RENAMED Viewed

File without changes

src/{obs_map.py → maps/obs_map.py} RENAMED Viewed

@@ -7,8 +7,8 @@ import streamlit as st
 import folium
 from streamlit_folium import st_folium
-import whale_viewer as sw_wv
-from fix_tabrender import js_show_zeroheight_iframe
 m_logger = logging.getLogger(__name__)
 # we can set the log level locally for funcs in this module
@@ -60,7 +60,7 @@ _colors = [
     "#778899" # Light Slate Gray
 ]
-whale2color = {k: v for k, v in zip(sw_wv.WHALE_CLASSES, _colors)}
 def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> folium.Map:
     """

 import folium
 from streamlit_folium import st_folium
+import whale_viewer as viewer
+from utils.fix_tabrender import js_show_zeroheight_iframe
 m_logger = logging.getLogger(__name__)
 # we can set the log level locally for funcs in this module
     "#778899" # Light Slate Gray
 ]
+whale2color = {k: v for k, v in zip(viewer.WHALE_CLASSES, _colors)}
 def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> folium.Map:
     """

src/{fix_tabrender.py → utils/fix_tabrender.py} RENAMED Viewed

File without changes

src/utils/grid_maker.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import streamlit as st
+import math
+def gridder(files):
+    cols = st.columns(3)
+    with cols[0]:
+        batch_size = st.select_slider("Batch size:",range(10,110,10), value=10)
+    with cols[1]:
+        row_size = st.select_slider("Row size:", range(1,6), value = 5)
+    num_batches = math.ceil(len(files)/batch_size)
+    with cols[2]:
+        page = st.selectbox("Page", range(1,num_batches+1))
+    return batch_size, row_size, page

src/utils/metadata_handler.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import streamlit as st
+def metadata2md() -> str:
+    """Get metadata from cache and return as markdown-formatted key-value list
+    Returns:
+        str: Markdown-formatted key-value list of metadata
+    """
+    markdown_str = "\n"
+    keys_to_print = ["latitude","longitude","author_email","date","time"]
+    for key, value in st.session_state.public_observation.items():
+            if key in keys_to_print:
+                markdown_str += f"- **{key}**: {value}\n"
+    return markdown_str

src/{st_logs.py → utils/st_logs.py} RENAMED Viewed

File without changes

src/whale_viewer.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from typing import List
 from PIL import Image
 import pandas as pd
 import os
@@ -134,7 +134,7 @@ def display_whale(whale_classes:List[str], i:int, viewcontainer=None):
     TODO: how to find the object type of viewcontainer.? they are just "deltagenerators" but
     we want the result of the generator.. In any case, it works ok with either call signature.
     """
-    import streamlit as st
     if viewcontainer is None:
         viewcontainer = st
@@ -148,11 +148,10 @@ def display_whale(whale_classes:List[str], i:int, viewcontainer=None):
     viewcontainer.markdown(
-        "### :whale:  #" + str(i + 1) + ": " + format_whale_name(whale_classes[i])
     )
     current_dir = os.getcwd()
     image_path = os.path.join(current_dir, "src/images/references/")
     image = Image.open(image_path + df_whale_img_ref.loc[whale_classes[i], "WHALE_IMAGES"])
-    viewcontainer.image(image, caption=df_whale_img_ref.loc[whale_classes[i], "WHALE_REFERENCES"])
-    # link st.markdown(f"[{df.loc[whale_classes[i], 'WHALE_REFERENCES']}]({df.loc[whale_classes[i], 'WHALE_REFERENCES']})")

 from typing import List
+import streamlit as st
 from PIL import Image
 import pandas as pd
 import os
     TODO: how to find the object type of viewcontainer.? they are just "deltagenerators" but
     we want the result of the generator.. In any case, it works ok with either call signature.
     """
     if viewcontainer is None:
         viewcontainer = st
     viewcontainer.markdown(
+        ":whale:  #" + str(i + 1) + ": " + format_whale_name(whale_classes[i])
     )
     current_dir = os.getcwd()
     image_path = os.path.join(current_dir, "src/images/references/")
     image = Image.open(image_path + df_whale_img_ref.loc[whale_classes[i], "WHALE_IMAGES"])
+    viewcontainer.image(image, caption=df_whale_img_ref.loc[whale_classes[i], "WHALE_REFERENCES"], use_column_width=True)