Spaces:

Saving-Willy
/

saving-willy-space

Sleeping

App Files Files Community

vancauwe commited on Dec 6, 2024

Commit

f8bf7d4

1 Parent(s): f5800be

feat: sync github with huggingface

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.github/workflows/push_to_hf.yml +20 -0
.gitignore +3 -0
README.md +39 -2
app.py +1 -0
basic_map/app.py +21 -0
basic_map/app1.py +42 -0
basic_map/requirements.txt +4 -0
call_models/alps_map.py +171 -0
call_models/click_map.py +18 -0
call_models/d_entry.py +108 -0
call_models/entry_and_hotdog.py +304 -0
call_models/fix_tabrender.py +69 -0
call_models/hotdogs.py +24 -0
call_models/images/references/640x427-atlantic-white-sided-dolphin.jpg +0 -0
call_models/images/references/640x427-long-finned-pilot-whale.webp +0 -0
call_models/images/references/640x427-southern-right-whale.jpg +0 -0
call_models/images/references/Humpback.webp +0 -0
call_models/images/references/Whale_Short-Finned_Pilot-markedDW.png +0 -0
call_models/images/references/beluga.webp +0 -0
call_models/images/references/blue-whale.webp +0 -0
call_models/images/references/bottlenose_dolphin.webp +0 -0
call_models/images/references/brydes.webp +0 -0
call_models/images/references/common_dolphin.webp +0 -0
call_models/images/references/cuviers_beaked_whale.webp +0 -0
call_models/images/references/false-killer-whale.webp +0 -0
call_models/images/references/fin-whale.webp +0 -0
call_models/images/references/gray-whale.webp +0 -0
call_models/images/references/killer_whale.webp +0 -0
call_models/images/references/melon.webp +0 -0
call_models/images/references/minke-whale.webp +0 -0
call_models/images/references/pantropical-spotted-dolphin.webp +0 -0
call_models/images/references/pygmy-killer-whale.webp +0 -0
call_models/images/references/rough-toothed-dolphin.webp +0 -0
call_models/images/references/sei.webp +0 -0
call_models/images/references/spinner.webp +0 -0
call_models/imgs/cakes.jpg +0 -0
call_models/input_handling.py +184 -0
call_models/obs_map.py +163 -0
call_models/requirements.txt +17 -0
call_models/st_logs.py +128 -0
call_models/test_upload.py +49 -0
call_models/whale_gallery.py +89 -0
call_models/whale_viewer.py +145 -0
git +0 -0
images/references/640x427-atlantic-white-sided-dolphin.jpg +0 -0
images/references/640x427-long-finned-pilot-whale.webp +0 -0
images/references/640x427-southern-right-whale.jpg +0 -0
images/references/Humpback.webp +0 -0
images/references/Whale_Short-Finned_Pilot-markedDW.png +0 -0
images/references/beluga.webp +0 -0

.github/workflows/push_to_hf.yml ADDED Viewed

	@@ -0,0 +1,20 @@

+name: Sync to Hugging Face hub
+on:
+  push:
+    branches: [main]
+  # to run this workflow manually from the Actions tab
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          lfs: true
+      - name: Push to hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          HF_USERNAME: ${{secrets.HF_USERNAME}}
+        run: git push --force https://$HF_USERNAME:[email protected]/spaces/Saving-Willy/saving-willy-space main

.gitignore CHANGED Viewed

@@ -1,3 +1,6 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

+# OS Related
+.DS_Store
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

README.md CHANGED Viewed

@@ -1,2 +1,39 @@
-# saving-willy
-Research Data Infrastructure for cetacean identification

+---
+title: Saving Willy
+emoji: 👀
+colorFrom: yellow
+colorTo: red
+sdk: streamlit
+sdk_version: 1.39.0
+app_file: call_models/entry_and_hotdog.py
+pinned: false
+license: apache-2.0
+short_description: 'SDSC Hackathon - Project 10. '
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+app.py is the one and only app
+## Install
+```
+git clone [email protected]:spaces/Saving-Willy/saving-willy-space
+pip install -r requirements.txt
+```
+```
+streamlit run app.py
+```
+## Test data
+https://www.kaggle.com/competitions/happy-whale-and-dolphin/data
+Have a lot of fun!

app.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ call_models/entry_and_hotdog.py

basic_map/app.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import pandas as pd
+import streamlit as st
+import folium
+from streamlit_folium import st_folium
+from streamlit_folium import folium_static
+visp_loc = 46.295833, 7.883333
+#m = folium.Map(location=visp_loc, zoom_start=9)
+st.markdown("# :whale: :whale: Cetaceans :red[& friends] :balloon:")
+m = folium.Map(location=visp_loc, zoom_start=9,
+                              tiles='https://tile.opentopomap.org/{z}/{x}/{y}.png',
+                              attr='<a href="https://opentopomap.org/">Open Topo Map</a>')
+folium_static(m)

basic_map/app1.py ADDED Viewed

	@@ -0,0 +1,42 @@

+# lets try using map stuff without folium, maybe stlite doesnt support that.
+import streamlit as st
+import pandas as pd
+# Load data
+f = 'mountains_clr.csv'
+df = pd.read_csv(f).dropna()
+print(df)
+st.markdown("# :whale: :whale: Cetaceans :red[& friends] :balloon:")
+st.markdown("## :mountain: Mountains")
+st.markdown(f"library version: **{st.__version__}**")
+# not sure where my versions are getting pegged from, but we have a 1y spread :(
+# https://github.com/streamlit/streamlit/blob/1.24.1/lib/streamlit/elements/map.py
+#    rather hard to find the docs for old versions, no selector unlike many libraries.
+visp_loc = 46.295833, 7.883333
+tile_xyz = 'https://tile.opentopomap.org/{z}/{x}/{y}.png'
+tile_attr = '<a href="https://opentopomap.org/">Open Topo Map</a>'
+st.map(df, latitude='lat', longitude='lon', color='color', size='size', zoom=7)
+#, tiles=tile_xyz, attr=tile_attr)
+#st.map(df)
+#st.map(df, latitude="col1", longitude="col2", size="col3", color="col4")
+import numpy as np
+df2 = pd.DataFrame(
+    {
+        "col1": np.random.randn(1000) / 50 + 37.76,
+        "col2": np.random.randn(1000) / 50 + -122.4,
+        "col3": np.random.randn(1000) * 100,
+        "col4": np.random.rand(1000, 4).tolist(),
+    }
+)
+#st.map(df, latitude="col1", longitude="col2", size="col3", color="col4")

basic_map/requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+streamlit
+folium
+streamlit-folium

call_models/alps_map.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import pandas as pd
+import streamlit as st
+import folium
+from streamlit_folium import st_folium
+_map_data = {
+    'name': {
+        0: 'matterhorn',
+        1: 'zinalrothorn',
+        2: 'alphubel',
+        3: 'allalinhorn',
+        4: 'weissmies',
+        5: 'lagginhorn',
+        6: 'lenzspitze',
+        10: 'strahlhorn',
+        11: 'parrotspitze'},
+    'lat': {
+        0: 45.9764263,
+        1: 46.0648271,
+        2: 46.0628767,
+        3: 46.0460858,
+        4: 46.127633,
+        5: 46.1570635,
+        6: 46.1045505,
+        10: 46.0131498,
+        11: 45.9197881},
+    'lon': {
+        0: 7.6586024,
+        1: 7.6901238,
+        2: 7.8638549,
+        3: 7.8945842,
+        4: 8.0120569,
+        5: 8.0031044,
+        6: 7.8686568,
+        10: 7.9021703,
+        11: 7.8710552},
+    'height': {
+        0: 4181.0,
+        1: 3944.0,
+        2: 4174.0,
+        3: 3940.0,
+        4: 3983.0,
+        5: 3916.0,
+        6: 4255.0,
+        10: 4072.0,
+        11: 4419.0},
+    'color': {
+        0: '#aa0000',
+        1: '#aa0000',
+        2: '#aa0000',
+        3: '#aa0000',
+        4: '#aa0000',
+        5: '#aa0000',
+        6: '#aa0000',
+        10: '#00aa00',
+        11: '#aa0000'},
+    'size': {0: 30, 1: 30, 2: 30, 3: 30, 4: 30, 5: 30, 6: 30, 10: 500, 11: 30}
+}
+tile_sets = [
+    'Open Street Map',
+    #'Stamen Terrain',
+    #'Stamen Toner',
+    'Esri Ocean',
+    'Esri Images',
+    'Stamen Watercolor',
+    'CartoDB Positron',
+    #'CartoDB Dark_Matter'
+]
+def create_map(tile_name, location, zoom_start: int = 7):
+    # https://xyzservices.readthedocs.io/en/stable/gallery.html
+    # get teh attribtuions from here once we pick the 2-3-4 options
+    # make esri ocean the default
+    m = folium.Map(location=location, zoom_start=zoom_start,
+                   tiles='Esri.OceanBasemap', attr="Esri")
+    #m = folium.Map(location=location, zoom_start=zoom_start)
+    attr = ""
+    if tile_name == 'Open Street Map':
+        folium.TileLayer('openstreetmap').add_to(m)
+        pass
+    #Esri.OceanBasemap
+    elif tile_name == 'Esri Ocean':
+        pass # made this one default ()
+        #attr = "Esri"
+        #folium.TileLayer('Esri.OceanBasemap', attr=attr).add_to(m)
+    elif tile_name == 'Esri Images':
+        attr = "Esri &mdash; Source: Esri, i-cubed, USDA"
+        #folium.TileLayer('stamenterrain', attr=attr).add_to(m)
+        folium.TileLayer('Esri.WorldImagery', attr=attr).add_to(m)
+    elif tile_name == 'Stamen Toner':
+        attr = "Stamen"
+        folium.TileLayer('stamentoner', attr=attr).add_to(m)
+    elif tile_name == 'Stamen Watercolor':
+        attr = "Stamen"
+        folium.TileLayer('Stadia.StamenWatercolor', attr=attr).add_to(m)
+    elif tile_name == 'CartoDB Positron':
+        folium.TileLayer('cartodb positron').add_to(m)
+    elif tile_name == 'CartoDB Dark_Matter':
+        folium.TileLayer('cartodb dark_matter').add_to(m)
+    #folium.LayerControl().add_to(m)
+    return m
+def present_alps_map():
+  '''show a map of the alps with peaks (from the event's teamnames) marked
+  there are two rendering modes:
+  a) basic - this uses a streamlit map, which doesn't offer much flexibility on
+  the tiles, but if you supply a dataframe then you just tell it the columns to
+  use for lat, lon, color, size of points
+  b) advanced - this uses folium, which allows for more control over the tiles,
+  but sadly it seems much less flexible for the point markers.
+  '''
+  st.markdown("# :whale: :whale: Cetaceans :red[& friends] :balloon:")
+  show_points = st.toggle("Show Points", False)
+  basic_map = st.toggle("Use Basic Map", False)
+  visp_loc = 46.295833, 7.883333 # position of town nearby to the peaks
+  # (maybe zermatt or Taesch better? all the mountains seem on valais gauche)
+  _df = pd.DataFrame(_map_data)
+  if basic_map:
+      # render using streamlit map element
+      st.map(_df, latitude='lat', longitude='lon', color='color', size='size', zoom=7)
+  else:
+      # setup a dropdown to pick tiles, and render with folium
+      selected_tile = st.selectbox("Choose a tile set", tile_sets)
+      #st.info(f"Selected tile: {selected_tile}")
+      # don't get why the default selection doesn't get renderd.
+      # generate a layer
+      map_ = create_map(selected_tile, visp_loc)
+      # and render it
+      #tile_xyz = 'https://tile.opentopomap.org/{z}/{x}/{y}.png'
+      #tile_attr = '<a href="https://opentopomap.org/">Open Topo Map</a>'
+      if show_points:
+          folium.Marker(
+              location=visp_loc,
+              popup="Visp",
+              tooltip="Visp",
+              icon=folium.Icon(color='blue', icon='info-sign')
+          ).add_to(map_)
+          for i, row in _df.iterrows():
+              c = 'red'
+              if row['name'] == 'strahlhorn':
+                  c = 'green'
+              kw = {"prefix": "fa", "color": c, "icon": "mountain-sun"}
+              folium.Marker(
+                  location=[row['lat'], row['lon']],
+                  popup=f"{row['name']} ({row['height']} m)",
+                  tooltip=row['name'],
+                  icon=folium.Icon(**kw)
+              ).add_to(map_)
+              #st.info(f"Added marker for {row['name']} {row['lat']} {row['lon']}")
+      #folium_static(map_)
+      st_data = st_folium(map_, width=725)
+      # maybe solution for click => new marker
+      # https://discuss.streamlit.io/t/add-marker-after-clicking-on-map/69472
+      return st_data

call_models/click_map.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import folium
+import streamlit as st
+from streamlit_folium import st_folium
+# center on Liberty Bell, add marker
+m = folium.Map(location=[39.949610, -75.150282], zoom_start=16)
+folium.Marker(
+    [39.949610, -75.150282], popup="Liberty Bell", tooltip="Liberty Bell"
+).add_to(m)
+# call to render Folium map in Streamlit
+st_data = st_folium(m, width=725)
+if st_data['last_clicked'] is not None:
+    print(st_data)
+    st.info(st_data['last_clicked'])

call_models/d_entry.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import streamlit as st
+from PIL import Image
+import datetime
+import re
+#import os
+import json
+import hashlib
+allowed_image_types = ['webp']
+#allowed_image_types = ['jpg', 'jpeg', 'png', 'webp']
+# Function to validate email address
+def is_valid_email(email):
+    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
+    return re.match(pattern, email) is not None
+# Function to extract date and time from image metadata
+def get_image_datetime(image_file):
+    try:
+        from PIL import ExifTags
+        image = Image.open(image_file)
+        exif_data = image._getexif()
+        if exif_data is not None:
+            for tag, value in exif_data.items():
+                if ExifTags.TAGS.get(tag) == 'DateTimeOriginal':
+                    return value
+    except Exception as e:
+        st.warning("Could not extract date from image metadata.")
+    return None
+# Streamlit app
+st.sidebar.title("Input Form")
+# 1. Image Selector
+uploaded_filename = st.sidebar.file_uploader("Upload an image", type=allowed_image_types)
+image_datetime = None  # For storing date-time from image
+if uploaded_filename is not None:
+    # Display the uploaded image
+    image = Image.open(uploaded_filename)
+    st.sidebar.image(image, caption='Uploaded Image.', use_column_width=True)
+    # Extract and display image date-time
+    image_datetime = get_image_datetime(uploaded_filename)
+    print(f"[D] image date extracted as {image_datetime}")
+metadata = {
+    "latitude": 23.5,
+    "longitude": 44,
+    "author_email": "[email protected]",
+    "date": None,
+    "time": None,
+}
+# 2. Latitude Entry Box
+latitude = st.sidebar.text_input("Latitude", metadata.get('latitude', ""))
+# 3. Longitude Entry Box
+longitude = st.sidebar.text_input("Longitude", metadata.get('longitude', ""))
+# 4. Author Box with Email Address Validator
+author_email = st.sidebar.text_input("Author Email", metadata.get('author_email', ""))
+if author_email and not is_valid_email(author_email):
+    st.sidebar.error("Please enter a valid email address.")
+# 5. date/time
+## first from image metadata
+if image_datetime is not None:
+    time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
+    date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
+else:
+    time_value = datetime.datetime.now().time()  # Default to current time
+    date_value = datetime.datetime.now().date()
+## if not, give user the option to enter manually
+date_option = st.sidebar.date_input("Date", value=date_value)
+time_option = st.sidebar.time_input("Time", time_value)
+# Display submitted data
+if st.sidebar.button("Upload"):
+    # create a dictionary with the submitted data
+    submitted_data = {
+        "latitude": latitude,
+        "longitude": longitude,
+        "author_email": author_email,
+        "date": str(date_option),
+        "time": str(time_option),
+        "predicted_class": None,
+        "image_filename": uploaded_filename.name if uploaded_filename else None,
+        "image_md5": hashlib.md5(uploaded_filename.read()).hexdigest() if uploaded_filename else None,
+    }
+    st.write("Submitted Data:")
+    st.write(f"Latitude: {submitted_data['latitude']}")
+    st.write(f"Longitude: {submitted_data['longitude']}")
+    st.write(f"Author Email: {submitted_data['author_email']}")
+    st.write(f"Date: {submitted_data['date']}")
+    st.write(f"Time: {submitted_data['time']}")
+    st.write(f"full dict of data: {json.dumps(submitted_data)}")

call_models/entry_and_hotdog.py ADDED Viewed

	@@ -0,0 +1,304 @@

+import datetime
+import os
+import json
+import logging
+import tempfile
+import pandas as pd
+import streamlit as st
+import folium
+from streamlit_folium import st_folium
+from huggingface_hub import HfApi
+#from datasets import load_dataset
+#from fix_tabrender import js_show_zeroheight_iframe
+import whale_viewer as sw_wv
+import input_handling as sw_inp
+import alps_map as sw_am
+import whale_gallery as sw_wg
+import obs_map as sw_map
+import st_logs as sw_logs
+from transformers import pipeline
+from transformers import AutoModelForImageClassification
+# setup for the ML model on huggingface (our wrapper)
+os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
+# and the dataset of observations (hf dataset in our space)
+dataset_id = "Saving-Willy/Happywhale-kaggle"
+data_files = "data/train-00000-of-00001.parquet"
+USE_BASIC_MAP = False
+DEV_SIDEBAR_LIB = True
+# get a global var for logger accessor in this module
+LOG_LEVEL = logging.DEBUG
+g_logger = logging.getLogger(__name__)
+g_logger.setLevel(LOG_LEVEL)
+st.set_page_config(layout="wide")
+#sw_logs.setup_logging(level=LOG_LEVEL, buffer_len=40)
+# initialise various session state variables
+if "handler" not in st.session_state:
+    st.session_state['handler'] = sw_logs.setup_logging()
+if "full_data" not in st.session_state:
+    st.session_state.full_data = {}
+if "classify_whale_done" not in st.session_state:
+    st.session_state.classify_whale_done = False
+if "whale_prediction1" not in st.session_state:
+    st.session_state.whale_prediction1 = None
+if "image" not in st.session_state:
+    st.session_state.image = None
+if "tab_log" not in st.session_state:
+    st.session_state.tab_log = None
+def metadata2md():
+    markdown_str = "\n"
+    for key, value in st.session_state.full_data.items():
+            markdown_str += f"- **{key}**: {value}\n"
+    return markdown_str
+def push_observation(tab_log=None):
+    # we get the data from session state: 1 is the dict 2 is the image.
+    # first, lets do an info display (popup)
+    metadata_str = json.dumps(st.session_state.full_data)
+    st.toast(f"Uploading observation: {metadata_str}", icon="🦭")
+    tab_log = st.session_state.tab_log
+    if tab_log is not None:
+        tab_log.info(f"Uploading observation: {metadata_str}")
+    # get huggingface api
+    api = HfApi()
+    f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
+    f.write(metadata_str)
+    f.close()
+    st.info(f"temp file: {f.name} with metadata written...")
+    path_in_repo= f"metadata/{st.session_state.full_data['author_email']}/{st.session_state.full_data['image_md5']}.json"
+    msg = f"fname: {f.name} | path: {path_in_repo}"
+    print(msg)
+    st.warning(msg)
+    rv = api.upload_file(
+        path_or_fileobj=f.name,
+        path_in_repo=path_in_repo,
+        repo_id="Saving-Willy/Happywhale-kaggle",
+        repo_type="dataset",
+    )
+    print(rv)
+    msg = f"data attempted tx to repo happy walrus: {rv}"
+    g_logger.info(msg)
+    st.info(msg)
+if __name__ == "__main__":
+    g_logger.info("App started.")
+    #g_logger.debug("debug message")
+    #g_logger.info("info message")
+    #g_logger.warning("warning message")
+    # Streamlit app
+    #tab_gallery, tab_inference, tab_hotdogs, tab_map, tab_data, tab_log = st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "Data", "Log", "Beautiful cetaceans"])
+    tab_inference, tab_hotdogs, tab_map, tab_data, tab_log, tab_gallery = st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "Data", "Log", "Beautiful cetaceans"])
+    st.session_state.tab_log = tab_log
+    # create a sidebar, and parse all the input (returned as `observation` object)
+    observation = sw_inp.setup_input(viewcontainer=st.sidebar)
+    if 0:## WIP
+        # goal of this code is to allow the user to override the ML prediction, before transmitting an observation
+        predicted_class = st.sidebar.selectbox("Predicted Class", sw_wv.WHALE_CLASSES)
+        override_prediction = st.sidebar.checkbox("Override Prediction")
+        if override_prediction:
+            overridden_class = st.sidebar.selectbox("Override Class", sw_wv.WHALE_CLASSES)
+            st.session_state.full_data['class_overriden'] = overridden_class
+        else:
+            st.session_state.full_data['class_overriden'] = None
+    with tab_map:
+        # visual structure: a couple of toggles at the top, then the map inlcuding a
+        # dropdown for tileset selection.
+        tab_map_ui_cols = st.columns(2)
+        with tab_map_ui_cols[0]:
+            show_db_points = st.toggle("Show Points from DB", True)
+        with tab_map_ui_cols[1]:
+            dbg_show_extra = st.toggle("Show Extra points (test)", False)
+        if show_db_points:
+            # show a nicer map, observations marked, tileset selectable.
+            st_data = sw_map.present_obs_map(
+                dataset_id=dataset_id, data_files=data_files,
+                dbg_show_extra=dbg_show_extra)
+        else:
+            # development map.
+            st_data = sw_am.present_alps_map()
+    with tab_log:
+        handler = st.session_state['handler']
+        if handler is not None:
+            records = sw_logs.parse_log_buffer(handler.buffer)
+            st.dataframe(records[::-1], use_container_width=True,)
+            st.info(f"Length of records: {len(records)}")
+        else:
+            st.error("⚠️ No log handler found!")
+    with tab_data:
+        # the goal of this tab is to allow selection of the new obsvation's location by map click/adjust.
+        st.markdown("Coming later hope! :construction:")
+        st.write("Click on the map to capture a location.")
+        #m = folium.Map(location=visp_loc, zoom_start=7)
+        mm = folium.Map(location=[39.949610, -75.150282], zoom_start=16)
+        folium.Marker( [39.949610, -75.150282], popup="Liberty Bell", tooltip="Liberty Bell"
+    ).add_to(mm)
+        st_data2 = st_folium(mm, width=725)
+        st.write("below the map...")
+        if st_data2['last_clicked'] is not None:
+            print(st_data2)
+            st.info(st_data2['last_clicked'])
+    with tab_gallery:
+        # here we make a container to allow filtering css properties
+        # specific to the gallery (otherwise we get side effects)
+        tg_cont = st.container(key="swgallery")
+        with tg_cont:
+            sw_wg.render_whale_gallery(n_cols=4)
+    # Display submitted data
+    if st.sidebar.button("Validate"):
+        # create a dictionary with the submitted data
+        submitted_data = observation.to_dict()
+        #print(submitted_data)
+        #full_data.update(**submitted_data)
+        for k, v in submitted_data.items():
+            st.session_state.full_data[k] = v
+        #st.write(f"full dict of data: {json.dumps(submitted_data)}")
+        #tab_inference.info(f"{st.session_state.full_data}")
+        tab_log.info(f"{st.session_state.full_data}")
+        df = pd.DataFrame(submitted_data, index=[0])
+        with tab_data:
+            st.table(df)
+    # inside the inference tab, on button press we call the model (on huggingface hub)
+    # which will be run locally.
+    # - the model predicts the top 3 most likely species from the input image
+    # - these species are shown
+    # - the user can override the species prediction using the dropdown
+    # - an observation is uploaded if the user chooses.
+    if tab_inference.button("Identify with cetacean classifier"):
+        #pipe = pipeline("image-classification", model="Saving-Willy/cetacean-classifier", trust_remote_code=True)
+        cetacean_classifier = AutoModelForImageClassification.from_pretrained("Saving-Willy/cetacean-classifier",
+                                                                            revision='0f9c15e2db4d64e7f622ade518854b488d8d35e6', trust_remote_code=True)
+        if st.session_state.image is None:
+            # TODO: cleaner design to disable the button until data input done?
+            st.info("Please upload an image first.")
+        else:
+            # run classifier model on `image`, and persistently store the output
+            out = cetacean_classifier(st.session_state.image) # get top 3 matches
+            st.session_state.whale_prediction1 = out['predictions'][0]
+            st.session_state.classify_whale_done = True
+            msg = f"[D]2 classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}"
+            st.info(msg)
+            g_logger.info(msg)
+            # dropdown for selecting/overriding the species prediction
+            #st.info(f"[D] classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}")
+            if not st.session_state.classify_whale_done:
+                selected_class = tab_inference.sidebar.selectbox("Species", sw_wv.WHALE_CLASSES, index=None, placeholder="Species not yet identified...", disabled=True)
+            else:
+                pred1 = st.session_state.whale_prediction1
+                # get index of pred1 from WHALE_CLASSES, none if not present
+                print(f"[D] pred1: {pred1}")
+                ix = sw_wv.WHALE_CLASSES.index(pred1) if pred1 in sw_wv.WHALE_CLASSES else None
+                selected_class = tab_inference.selectbox("Species", sw_wv.WHALE_CLASSES, index=ix)
+            st.session_state.full_data['predicted_class'] = selected_class
+            if selected_class != st.session_state.whale_prediction1:
+                st.session_state.full_data['class_overriden'] = selected_class
+            btn = st.button("Upload observation to THE INTERNET!", on_click=push_observation)
+            # TODO: the metadata only fills properly if `validate` was clicked.
+            tab_inference.markdown(metadata2md())
+            msg = f"[D] full data after inference: {st.session_state.full_data}"
+            g_logger.debug(msg)
+            print(msg)
+            # TODO: add a link to more info on the model, next to the button.
+            whale_classes = out['predictions'][:]
+            # render images for the top 3 (that is what the model api returns)
+            with tab_inference:
+                st.markdown("## Species detected")
+                for i in range(len(whale_classes)):
+                    sw_wv.display_whale(whale_classes, i)
+    # inside the hotdog tab, on button press we call a 2nd model (totally unrelated at present, just for demo
+    # purposes, an hotdog image classifier) which will be run locally.
+    # - this model predicts if the image is a hotdog or not, and returns probabilities
+    # - the input image is the same as for the ceteacean classifier - defined in the sidebar
+    if tab_hotdogs.button("Get Hotdog Prediction"):
+        pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
+        tab_hotdogs.title("Hot Dog? Or Not?")
+        if st.session_state.image is None:
+            st.info("Please upload an image first.")
+            st.info(str(observation.to_dict()))
+        else:
+            col1, col2 = tab_hotdogs.columns(2)
+            # display the image (use cached version, no need to reread)
+            col1.image(st.session_state.image, use_column_width=True)
+            # and then run inference on the image
+            predictions = pipeline(st.session_state.image)
+            col2.header("Probabilities")
+            first = True
+            for p in predictions:
+                col2.subheader(f"{ p['label'] }: { round(p['score'] * 100, 1)}%")
+                if first:
+                    st.session_state.full_data['predicted_class'] = p['label']
+                    st.session_state.full_data['predicted_score'] = round(p['score'] * 100, 1)
+                    first = False
+            tab_hotdogs.write(f"Session Data: {json.dumps(st.session_state.full_data)}")

call_models/fix_tabrender.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import streamlit as st
+# code for fixing the issue with streamlit tabs rendering height 0 when not active
+# https://github.com/streamlit/streamlit/issues/7376
+#
+# see also https://github.com/randyzwitch/streamlit-folium/issues/128, got
+#   closed becasue it is apparently a st.tabs problem
+import uuid, html
+# workaround for streamlit making tabs height 0 when not active, breaks map
+def inject_iframe_js_code(source: str) -> None:
+    div_id = uuid.uuid4()
+    st.markdown(
+        f"""
+    <div style="height: 0; width: 0; overflow: hidden;" id="{div_id}">
+        <iframe src="javascript: \
+            var script = document.createElement('script'); \
+            script.type = 'text/javascript'; \
+            script.text = {html.escape(repr(source))}; \
+            var div = window.parent.document.getElementById('{div_id}'); \
+            div.appendChild(script); \
+            setTimeout(function() {{ }}, 0); \
+        "></iframe>
+    </div>
+    """,
+        unsafe_allow_html=True,
+    )
+def js_show_zeroheight_iframe(component_iframe_title: str, height: str = "auto"):
+    source = f"""
+    (function() {{
+    var attempts = 0;
+    const maxAttempts = 20; // Max attempts to find the iframe
+    const intervalMs = 250; // Interval between attempts in milliseconds
+    function setIframeHeight() {{
+        const intervalId = setInterval(function() {{
+            var iframes = document.querySelectorAll('iframe[title="{component_iframe_title}"]');
+            if (iframes.length > 0 || attempts > maxAttempts) {{
+                if (iframes.length > 0) {{
+                    iframes.forEach(iframe => {{
+                        if (iframe || iframe.height === "0" || iframe.style.height === "0px") {{
+                            iframe.style.height = "{height}";
+                            iframe.setAttribute("height", "{height}");
+                            console.log('Height of iframe with title "{component_iframe_title}" set to {height}.');
+                        }}
+                    }});
+                }} else {{
+                    console.log('Iframes with title "{component_iframe_title}" not found after ' + maxAttempts + ' attempts.');
+                }}
+                clearInterval(intervalId); // Stop checking
+            }}
+            attempts++;
+        }}, intervalMs);
+    }}
+    function trackInteraction(event) {{
+        console.log('User interaction detected:', event.type);
+        setIframeHeight();
+    }}
+    setIframeHeight();
+    document.addEventListener('click', trackInteraction);
+}})();
+    """
+    inject_iframe_js_code(source)

call_models/hotdogs.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import streamlit as st
+from transformers import pipeline
+from PIL import Image
+import time
+pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
+st.title("Hot Dog? Or Not?")
+file_name = st.file_uploader("Upload a hot dog candidate image")
+if file_name is not None:
+    col1, col2 = st.columns(2)
+    image = Image.open(file_name)
+    col1.image(image, use_column_width=True)
+    predictions = pipeline(image)
+    col2.header("Probabilities")
+    for p in predictions:
+        col2.subheader(f"{ p['label'] }: { round(p['score'] * 100, 1)}%")

call_models/images/references/640x427-atlantic-white-sided-dolphin.jpg ADDED Viewed

call_models/images/references/640x427-long-finned-pilot-whale.webp ADDED Viewed

call_models/images/references/640x427-southern-right-whale.jpg ADDED Viewed

call_models/images/references/Humpback.webp ADDED Viewed

call_models/images/references/Whale_Short-Finned_Pilot-markedDW.png ADDED Viewed

call_models/images/references/beluga.webp ADDED Viewed

call_models/images/references/blue-whale.webp ADDED Viewed

call_models/images/references/bottlenose_dolphin.webp ADDED Viewed

call_models/images/references/brydes.webp ADDED Viewed

call_models/images/references/common_dolphin.webp ADDED Viewed

call_models/images/references/cuviers_beaked_whale.webp ADDED Viewed

call_models/images/references/false-killer-whale.webp ADDED Viewed

call_models/images/references/fin-whale.webp ADDED Viewed

call_models/images/references/gray-whale.webp ADDED Viewed

call_models/images/references/killer_whale.webp ADDED Viewed

call_models/images/references/melon.webp ADDED Viewed

call_models/images/references/minke-whale.webp ADDED Viewed

call_models/images/references/pantropical-spotted-dolphin.webp ADDED Viewed

call_models/images/references/pygmy-killer-whale.webp ADDED Viewed

call_models/images/references/rough-toothed-dolphin.webp ADDED Viewed

call_models/images/references/sei.webp ADDED Viewed

call_models/images/references/spinner.webp ADDED Viewed

call_models/imgs/cakes.jpg ADDED Viewed

call_models/input_handling.py ADDED Viewed

	@@ -0,0 +1,184 @@

+from PIL import Image
+from PIL import ExifTags
+import re
+import datetime
+import hashlib
+import logging
+import streamlit as st
+m_logger = logging.getLogger(__name__)
+# we can set the log level locally for funcs in this module
+#g_m_logger.setLevel(logging.DEBUG)
+m_logger.setLevel(logging.INFO)
+'''
+A module to setup the input handling for the whale observation guidance tool
+both the UI elements (setup_input_UI) and the validation functions.
+'''
+#allowed_image_types = ['webp']
+allowed_image_types = ['jpg', 'jpeg', 'png', 'webp']
+# autogenerated class to hold the input data
+class InputObservation:
+    def __init__(self, image=None, latitude=None, longitude=None, author_email=None, date=None, time=None, date_option=None, time_option=None, uploaded_filename=None):
+        self.image = image
+        self.latitude = latitude
+        self.longitude = longitude
+        self.author_email = author_email
+        self.date = date
+        self.time = time
+        self.date_option = date_option
+        self.time_option = time_option
+        self.uploaded_filename = uploaded_filename
+    def __str__(self):
+        return f"Observation: {self.image}, {self.latitude}, {self.longitude}, {self.author_email}, {self.date}, {self.time}, {self.date_option}, {self.time_option}, {self.uploaded_filename}"
+    def __repr__(self):
+        return f"Observation: {self.image}, {self.latitude}, {self.longitude}, {self.author_email}, {self.date}, {self.time}, {self.date_option}, {self.time_option}, {self.uploaded_filename}"
+    def __eq__(self, other):
+        return (self.image == other.image and self.latitude == other.latitude and self.longitude == other.longitude and
+                self.author_email == other.author_email and self.date == other.date and self.time == other.time and
+                self.date_option == other.date_option and self.time_option == other.time_option and self.uploaded_filename == other.uploaded_filename)
+    def __ne__(self, other):
+        return not self.__eq__(other)
+    def __hash__(self):
+        return hash((self.image, self.latitude, self.longitude, self.author_email, self.date, self.time, self.date_option, self.time_option, self.uploaded_filename))
+    def to_dict(self):
+        return {
+            #"image": self.image,
+            "image_filename": self.uploaded_filename.name if self.uploaded_filename else None,
+            "image_md5": hashlib.md5(self.uploaded_filename.read()).hexdigest() if self.uploaded_filename else None,
+            "latitude": self.latitude,
+            "longitude": self.longitude,
+            "author_email": self.author_email,
+            "date": self.date,
+            "time": self.time,
+            "date_option": self.date_option,
+            "time_option": self.time_option,
+            "uploaded_filename": self.uploaded_filename
+        }
+    @classmethod
+    def from_dict(cls, data):
+        return cls(data["image"], data["latitude"], data["longitude"], data["author_email"], data["date"], data["time"], data["date_option"], data["time_option"], data["uploaded_filename"])
+    @classmethod
+    def from_input(cls, input):
+        return cls(input.image, input.latitude, input.longitude, input.author_email, input.date, input.time, input.date_option, input.time_option, input.uploaded_filename)
+    @staticmethod
+    def from_input(input):
+        return InputObservation(input.image, input.latitude, input.longitude, input.author_email, input.date, input.time, input.date_option, input.time_option, input.uploaded_filename)
+    @staticmethod
+    def from_dict(data):
+        return InputObservation(data["image"], data["latitude"], data["longitude"], data["author_email"], data["date"], data["time"], data["date_option"], data["time_option"], data["uploaded_filename"])
+# define function to validate number, allowing signed float
+def is_valid_number(number:str) -> bool:
+    pattern = r'^[-+]?[0-9]*\.?[0-9]+$'
+    return re.match(pattern, number) is not None
+# Function to validate email address
+def is_valid_email(email):
+    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
+    return re.match(pattern, email) is not None
+# Function to extract date and time from image metadata
+def get_image_datetime(image_file):
+    try:
+        image = Image.open(image_file)
+        exif_data = image._getexif()
+        if exif_data is not None:
+            for tag, value in exif_data.items():
+                if ExifTags.TAGS.get(tag) == 'DateTimeOriginal':
+                    return value
+    except Exception as e:
+        st.warning("Could not extract date from image metadata.")
+    return None
+# an arbitrary set of defaults so testing is less painful...
+# ideally we add in some randomization to the defaults
+spoof_metadata = {
+    "latitude": 23.5,
+    "longitude": 44,
+    "author_email": "[email protected]",
+    "date": None,
+    "time": None,
+}
+#def display_whale(whale_classes:List[str], i:int, viewcontainer=None):
+def setup_input(viewcontainer: st.delta_generator.DeltaGenerator=None, _allowed_image_types: list=None, ):
+    if viewcontainer is None:
+        viewcontainer = st.sidebar
+    if _allowed_image_types is None:
+        _allowed_image_types = allowed_image_types
+    viewcontainer.title("Input image and data")
+    # 1. Image Selector
+    uploaded_filename = viewcontainer.file_uploader("Upload an image", type=allowed_image_types)
+    image_datetime = None  # For storing date-time from image
+    if uploaded_filename is not None:
+        # Display the uploaded image
+        image = Image.open(uploaded_filename)
+        viewcontainer.image(image, caption='Uploaded Image.', use_column_width=True)
+        # store the image in the session state
+        st.session_state.image = image
+        # Extract and display image date-time
+        image_datetime = get_image_datetime(uploaded_filename)
+        print(f"[D] image date extracted as {image_datetime}")
+        m_logger.debug(f"image date extracted as {image_datetime} (from {uploaded_filename})")
+    # 2. Latitude Entry Box
+    latitude = viewcontainer.text_input("Latitude", spoof_metadata.get('latitude', ""))
+    if latitude and not is_valid_number(latitude):
+        viewcontainer.error("Please enter a valid latitude (numerical only).")
+        m_logger.error(f"Invalid latitude entered: {latitude}.")
+    # 3. Longitude Entry Box
+    longitude = viewcontainer.text_input("Longitude", spoof_metadata.get('longitude', ""))
+    if longitude and not is_valid_number(longitude):
+        viewcontainer.error("Please enter a valid longitude (numerical only).")
+        m_logger.error(f"Invalid latitude entered: {latitude}.")
+    # 4. Author Box with Email Address Validator
+    author_email = viewcontainer.text_input("Author Email", spoof_metadata.get('author_email', ""))
+    if author_email and not is_valid_email(author_email):
+        viewcontainer.error("Please enter a valid email address.")
+    # 5. date/time
+    ## first from image metadata
+    if image_datetime is not None:
+        time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
+        date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
+    else:
+        time_value = datetime.datetime.now().time()  # Default to current time
+        date_value = datetime.datetime.now().date()
+    ## if not, give user the option to enter manually
+    date_option = st.sidebar.date_input("Date", value=date_value)
+    time_option = st.sidebar.time_input("Time", time_value)
+    observation = InputObservation(image=uploaded_filename, latitude=latitude, longitude=longitude,
+                                   author_email=author_email, date=image_datetime, time=None,
+                                   date_option=date_option, time_option=time_option)
+    return observation

call_models/obs_map.py ADDED Viewed

	@@ -0,0 +1,163 @@

+from typing import Tuple
+import logging
+import pandas as pd
+from datasets import load_dataset
+import streamlit as st
+import folium
+from streamlit_folium import st_folium
+import whale_viewer as sw_wv
+from fix_tabrender import js_show_zeroheight_iframe
+m_logger = logging.getLogger(__name__)
+# we can set the log level locally for funcs in this module
+#g_m_logger.setLevel(logging.DEBUG)
+m_logger.setLevel(logging.INFO)
+# TODO: refactor so we have richer data: a tuple or dict combining
+# the dropdown label, the tileset name, the attribution - everything
+# needed to make the map logic simplified
+tile_sets = [
+    'Open Street Map',
+    #'Stamen Terrain',
+    #'Stamen Toner',
+    'Esri Ocean',
+    'Esri Images',
+    'Stamen Watercolor',
+    'CartoDB Positron',
+    #'CartoDB Dark_Matter'
+]
+# a list of unique colours for each whale class (for the map)
+_colors = [
+    "#FFD700", # Gold
+    "#FF5733", # Red
+    "#33FF57", # Green
+    "#3357FF", # Blue
+    "#FFFF33", # Yellow
+    "#FF33FF", # Magenta
+    "#33FFFF", # Cyan
+    "#FF8C00", # Dark Orange
+    "#8A2BE2", # Blue Violet
+    "#DEB887", # Burlywood
+    "#5F9EA0", # Cadet Blue
+    "#D2691E", # Chocolate
+    "#FF4500", # Orange Red
+    "#2E8B57", # Sea Green
+    "#DA70D6", # Orchid
+    "#FF6347", # Tomato
+    "#7FFF00", # Chartreuse
+    "#DDA0DD", # Plum
+    "#A0522D", # Sienna
+    "#4682B4", # Steel Blue
+    "#7B68EE", # Medium Slate Blue
+    "#F0E68C", # Khaki
+    "#B22222", # Firebrick
+    "#FF1493", # Deep Pink
+    "#FFFACD", # Lemon Chiffon
+    "#20B2AA", # Light Sea Green
+    "#778899" # Light Slate Gray
+]
+whale2color = {k: v for k, v in zip(sw_wv.WHALE_CLASSES, _colors)}
+def create_map(tile_name:str, location:Tuple, zoom_start: int = 7):
+    # https://xyzservices.readthedocs.io/en/stable/gallery.html
+    # get teh attribtuions from here once we pick the 2-3-4 options
+    # make esri ocean the default
+    m = folium.Map(location=location, zoom_start=zoom_start,
+                   tiles='Esri.OceanBasemap', attr="Esri")
+    #m = folium.Map(location=location, zoom_start=zoom_start)
+    attr = ""
+    if tile_name == 'Open Street Map':
+        folium.TileLayer('openstreetmap').add_to(m)
+        pass
+    #Esri.OceanBasemap
+    elif tile_name == 'Esri Ocean':
+        pass # made this one default ()
+        #attr = "Esri"
+        #folium.TileLayer('Esri.OceanBasemap', attr=attr).add_to(m)
+    elif tile_name == 'Esri Images':
+        attr = "Esri &mdash; Source: Esri, i-cubed, USDA"
+        #folium.TileLayer('stamenterrain', attr=attr).add_to(m)
+        folium.TileLayer('Esri.WorldImagery', attr=attr).add_to(m)
+    elif tile_name == 'Stamen Toner':
+        attr = "Stamen"
+        folium.TileLayer('stamentoner', attr=attr).add_to(m)
+    elif tile_name == 'Stamen Watercolor':
+        attr = "Stamen"
+        folium.TileLayer('Stadia.StamenWatercolor', attr=attr).add_to(m)
+    elif tile_name == 'CartoDB Positron':
+        folium.TileLayer('cartodb positron').add_to(m)
+    elif tile_name == 'CartoDB Dark_Matter':
+        folium.TileLayer('cartodb dark_matter').add_to(m)
+    #folium.LayerControl().add_to(m)
+    return m
+def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
+                    data_files:str = "data/train-00000-of-00001.parquet",
+                    dbg_show_extra:bool = False):
+    '''
+    render a map, with a selectable tileset, and show markers for each of the whale
+    observations
+    '''
+    # load/download data from huggingface dataset
+    metadata = load_dataset(dataset_id, data_files=data_files)
+    # make a pandas df that is compliant with folium/streamlit maps
+    _df = pd.DataFrame({
+        'lat': metadata["train"]["latitude"],
+        'lon': metadata["train"]["longitude"],
+        'species': metadata["train"]["predicted_class"],}
+    )
+    if dbg_show_extra:
+        # add a few samples to visualise colours
+        _df.loc[len(_df)] = {'lat': 0, 'lon': 0, 'species': 'rough_toothed_dolphin'}
+        _df.loc[len(_df)] = {'lat': -3, 'lon': 0, 'species': 'pygmy_killer_whale'}
+        _df.loc[len(_df)] = {'lat': 45.7, 'lon': -2.6, 'species': 'humpback_whale'}
+    ocean_loc = 0, 10
+    selected_tile = st.selectbox("Choose a tile set", tile_sets, index=None, placeholder="Choose a tile set...", disabled=False)
+    map_ = create_map(selected_tile, ocean_loc, zoom_start=2)
+    folium.Marker(
+        location=ocean_loc,
+        popup="Atlantis",
+        tooltip="Atlantis",
+        icon=folium.Icon(color='blue', icon='info-sign')
+    ).add_to(map_)
+    for _, row in _df.iterrows():
+        c = whale2color.get(row['species'], 'red')
+        msg = f"[D] color for {row['species']} is {c}"
+        m_logger.debug(msg) # depends on m_logger logging level (*not* the main st app's logger)
+        #m_logger.info(msg)
+        kw = {"prefix": "fa", "color": 'gray', "icon_color": c, "icon": "binoculars" }
+        folium.Marker(
+            location=[row['lat'], row['lon']],
+            popup=f"{row['species']} ",
+            tooltip=row['species'],
+            icon=folium.Icon(**kw)
+        ).add_to(map_)
+        #st.info(f"Added marker for {row['name']} {row['lat']} {row['lon']}")
+    st_data = st_folium(map_, width=725)
+    # workaround for correctly showing js components in tabs
+    js_show_zeroheight_iframe(
+        component_iframe_title="streamlit_folium.st_folium",
+        height=800,
+    )
+    # this is just debug info --
+    #st.info("[D]" + str(metadata.column_names))
+    return st_data

call_models/requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+transformers
+streamlit
+huggingface_hub
+torch
+pandas
+numpy
+datasets
+# for nice map tiles
+folium
+streamlit_folium
+# for ceatatean
+pytorch_lightning
+timm

call_models/st_logs.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import logging
+from datetime import datetime
+import re
+from collections import deque
+import streamlit as st
+# some discussions with code snippets from:
+# https://discuss.streamlit.io/t/capture-and-display-logger-in-ui/69136
+# configure log parsing (seems to need some tweaking)
+_log_n_re = r'\[(\d+)\]'
+_log_date_re = r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})'
+_log_mod_re = r'(\w+(?:\.\w+)*|__\w+__|<\w+>)'
+_log_func_re = r'(\w+|<\w+>)'
+_log_level_re = r'(\w+)'
+_log_msg_re = '(.*)'
+_sep = r' - '
+log_pattern = re.compile(_log_n_re + _log_date_re + _sep + _log_mod_re + _sep +
+    _log_func_re + _sep + _log_level_re + _sep + _log_msg_re)
+class StreamlitLogHandler(logging.Handler):
+    # Initializes a custom log handler with a Streamlit container for displaying logs
+    def __init__(self, container, maxlen:int=15, debug:bool=False):
+        super().__init__()
+        # Store the Streamlit container for log output
+        self.container = container
+        self.debug = debug
+        self.ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') # Regex to remove ANSI codes
+        self.log_area = self.container.empty() # Prepare an empty conatiner for log output
+        self.buffer = deque(maxlen=maxlen)
+        self._n = 0
+    def n_elems(self, verb:bool=False):
+        ''' return a string with num elements seen and num elements in buffer '''
+        if verb:
+            return f"total: {self._n}|| in buffer:{len(self.buffer)}"
+        return f"{self._n}||{len(self.buffer)}"
+    def emit(self, record):
+        self._n += 1
+        msg = f"[{self._n}]" + self.format(record)
+        self.buffer.append(msg)
+        clean_msg = self.ansi_escape.sub('', msg)  # Strip ANSI codes
+        if self.debug:
+            self.log_area.markdown(clean_msg)
+    def clear_logs(self):
+        self.log_area.empty()  # Clear previous logs
+        self.buffer.clear()
+# Set up logging to capture all info level logs from the root logger
+@st.cache_resource
+def setup_logging(level: int=logging.INFO, buffer_len:int=15):
+    root_logger = logging.getLogger() # Get the root logger
+    log_container = st.container() # Create a container within which we display logs
+    handler = StreamlitLogHandler(log_container, maxlen=buffer_len)
+    handler.setLevel(level)
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - %(levelname)s - %(message)s')
+    handler.setFormatter(formatter)
+    root_logger.addHandler(handler)
+    #if 'handler' not in st.session_state:
+    #    st.session_state['handler'] = handler
+    return handler
+def parse_log_buffer(log_contents: deque) -> list:
+    ''' convert log buffer to a list of dictionaries '''
+    j = 0
+    records = []
+    for line in log_contents:
+        if line:  # Skip empty lines
+            j+=1
+            try:
+                # regex to parsse log lines, with an example line:
+                # '[1]2024-11-09 11:19:06,688 - task - run - INFO - 🏃 Running task '
+                match = log_pattern.match(line)
+                if match:
+                    n, timestamp_str, name, func_name, level, message = match.groups()
+                # Convert timestamp string to datetime
+                timestamp = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S,%f')
+                records.append({
+                    'timestamp': timestamp,
+                    'n': n,
+                    'level': level,
+                    'module': name,
+                    'func': func_name,
+                    'message': message
+                })
+            except Exception as e:
+                print(f"Failed to parse line: {line}")
+                print(f"Error: {e}")
+                continue
+    return records
+def something():
+    '''function to demo adding log entries'''
+    logger = logging.getLogger(__name__)
+    logger.setLevel(logging.DEBUG)
+    logger.debug("debug message")
+    logger.info("info message")
+    logger.warning("warning message")
+    logger.error("error message")
+    logger.critical("critical message")
+if __name__ == "__main__":
+    # create a logging handler for streamlit + regular python logging module
+    handler = setup_logging()
+    # get buffered log data and parse, ready for display as dataframe
+    records = parse_log_buffer(handler.buffer)
+    c1, c2 = st.columns([1, 3])
+    with c1:
+        button = st.button("do something", on_click=something)
+    with c2:
+        st.info(f"Length of records: {len(records)}")
+    #tab = st.table(records)
+    tab = st.dataframe(records[::-1], use_container_width=True)  # scrollable, selectable.

call_models/test_upload.py ADDED Viewed

	@@ -0,0 +1,49 @@

+from huggingface_hub import HfApi
+import json
+import tempfile
+import os
+#huggingface_hub
+submission = {'latitude': '3.5', 'longitude': '44', 'author_email':
+    '[email protected]', 'date': '2024-10-25', 'time': '12:07:04.487612',
+    'predicted_class': 'bottlenose_dolphin', 'class_overriden': None,
+    'image_filename': '000a8f2d5c316a.webp', 'image_md5':
+    'd41d8cd98f00b204e9800998ecf8427e'}
+imgname = submission['image_filename']
+api = HfApi()
+# generate a tempdirectory to store the image
+#tempdir = tempfile.TemporaryDirectory()
+# write a tempfile
+# write submission to a tempfile in json format with the name of the image, giving the filename and path as a string
+f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
+jstr = json.dumps(submission)
+f.write(jstr)
+f.close()
+#with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
+#    jstr = json.dumps(submission)
+#    f.write(jstr)
+#    #print(f.path)
+path_in_repo= f"metadata/{submission['author_email']}/{submission['image_md5']}.json"
+print(f"fname: {f.name} | path: {path_in_repo}")
+rv = api.upload_file(
+    path_or_fileobj=f.name,
+    path_in_repo=path_in_repo,
+    repo_id="Saving-Willy/Happywhale-kaggle",
+    repo_type="dataset",
+)
+print(rv)

call_models/whale_gallery.py ADDED Viewed

	@@ -0,0 +1,89 @@

+from itertools import cycle
+import streamlit as st
+import whale_viewer as sw_wv
+def render_whale_gallery(n_cols:int = 4):
+    """
+    A function to display a gallery of whale images in a grid
+    """
+    def format_whale_name(name):
+        return name.replace("_", " ").capitalize()
+    # make a grid of images, use some css to get more uniform
+    # https://discuss.streamlit.io/t/grid-of-images-with-the-same-height/10668/12
+    # nb: I think there are some community components, need to investigate their usage
+    st.markdown(
+    """
+<style>
+    .st-key-swgallery div[data-testid="stVerticalBlock"] {
+        justify-content: center;
+    }
+    .st-key-swgallery div[data-testid="stVerticalBlockBorderWrapper"] {
+        display: flex !important;
+        min-height: 185px !important; //185 for image+caption or 255 with link
+        align-items: center;
+        //background-color: darkgreen;
+    }
+/*
+    .st-key-swheader div[data-testid="stVerticalBlockBorderWrapper"] {
+        background-color: lightgreen;
+        min-height: 16px !important;
+        border: 1px solid #ccc;
+    }
+*/
+    .st-key-swgallery div[data-testid="stColumn"] {
+        flex: 1 !important; /* additionally, equal width */
+        padding: 1em !important;
+        align-items: center;
+        border: solid !important;
+        border-radius: 0px !important;
+        max-width: 220px !important;
+        border-color: #0000 !important;
+    }
+</style>
+""",
+    unsafe_allow_html=True,
+)
+    cols = cycle(st.columns(n_cols))
+    for ix in range(len(sw_wv.df_whale_img_ref)):
+        img_name = sw_wv.df_whale_img_ref.iloc[ix].loc["WHALE_IMAGES"]
+        whale_name = format_whale_name(str(sw_wv.df_whale_img_ref.iloc[ix].name))
+        url = sw_wv.df_whale_img_ref.iloc[ix].loc['WHALE_REFERENCES']
+        image_path = f"images/references/{img_name}"
+        #next(cols).image(image_path, width=150, caption=f"{whale_name}")
+        thing = next(cols)
+        with thing:
+            with st.container(border=True):
+                # using the caption for name is most compact but no link.
+                #st.image(image_path, width=150, caption=f"{whale_name}")
+                st.image(image_path, width=150)
+                #st.markdown(f"[{whale_name}]({url})" ) # doesn't seem to allow styling, just do in raw html:w
+                html = f"<div style='text-align: center; font-size: 14px'><a href='{url}'>{whale_name}</a></div>"
+                st.markdown(html, unsafe_allow_html=True)
+        #next(cols).image(image_path, width=150, caption=f"{whale_name}")
+if __name__ == "__main__":
+    ''' example usage, with some other elements to help illustrate how
+    streamlit keys can be used to target specific css properties
+    '''
+    # define a container just to hold a couple of elements
+    header_cont = st.container(key='swheader')
+    with header_cont:
+        c1, c2 = st.columns([2, 3])
+        c1.markdown('left')
+        c2.button("Refresh Gallery (noop)")
+    # here we make a container to allow filtering css properties
+    # specific to the gallery (otherwise we get side effects)
+    tg_cont = st.container(key="swgallery")
+    with tg_cont:
+        render_whale_gallery(n_cols=4)
+    pass

call_models/whale_viewer.py ADDED Viewed

	@@ -0,0 +1,145 @@

+from typing import List
+from PIL import Image
+import pandas as pd
+WHALE_CLASSES = [
+        "beluga",
+        "blue_whale",
+        "bottlenose_dolphin",
+        "brydes_whale",
+        "commersons_dolphin",
+        "common_dolphin",
+        "cuviers_beaked_whale",
+        "dusky_dolphin",
+        "false_killer_whale",
+        "fin_whale",
+        "frasiers_dolphin",
+        "gray_whale",
+        "humpback_whale",
+        "killer_whale",
+        "long_finned_pilot_whale",
+        "melon_headed_whale",
+        "minke_whale",
+        "pantropic_spotted_dolphin",
+        "pygmy_killer_whale",
+        "rough_toothed_dolphin",
+        "sei_whale",
+        "short_finned_pilot_whale",
+        "southern_right_whale",
+        "spinner_dolphin",
+        "spotted_dolphin",
+        "white_sided_dolphin",
+    ]
+WHALE_IMAGES = [
+        "beluga.webp",
+        "blue-whale.webp",
+        "bottlenose_dolphin.webp",
+        "brydes.webp",
+        "common_dolphin.webp",
+        "common_dolphin.webp",
+        "cuviers_beaked_whale.webp",
+        "common_dolphin.webp",
+        "false-killer-whale.webp",
+        "fin-whale.webp",
+        "fin-whale.webp",
+        "gray-whale.webp",
+        "Humpback.webp",
+        "killer_whale.webp",
+        "640x427-long-finned-pilot-whale.webp",
+        "melon.webp",
+        "minke-whale.webp",
+        "pantropical-spotted-dolphin.webp",
+        "pygmy-killer-whale.webp",
+        "rough-toothed-dolphin.webp",
+        "sei.webp",
+        "Whale_Short-Finned_Pilot-markedDW.png",  ## Background
+        "640x427-southern-right-whale.jpg",  ## background
+        "spinner.webp",
+        "pantropical-spotted-dolphin.webp",  ## duplicate also used for
+        "640x427-atlantic-white-sided-dolphin.jpg",  ##background
+    ]
+WHALE_REFERENCES = [
+        "https://www.fisheries.noaa.gov/species/beluga-whale",
+        "https://www.fisheries.noaa.gov/species/blue-whale",
+        "https://www.fisheries.noaa.gov/species/common-bottlenose-dolphin",
+        "https://www.fisheries.noaa.gov/species/brydes-whale",
+        "https://en.wikipedia.org/wiki/Commerson's_dolphin",
+        #"commersons_dolphin - reference missing - classification to be verified",  ## class matching to be verified
+        "https://www.fisheries.noaa.gov/species/short-beaked-common-dolphin",
+        "https://www.fisheries.noaa.gov/species/cuviers-beaked-whale",
+        "https://en.wikipedia.org/wiki/Dusky_dolphin",
+        #"dusky_dolphin - reference missing - classification to be verified",  ## class matching to be verified
+        "https://www.fisheries.noaa.gov/species/false-killer-whale",
+        "https://www.fisheries.noaa.gov/species/fin-whale",
+        "https://www.fisheries.noaa.gov/species/frasers-dolphin",
+        #"frasiers_dolphin - reference missing - classification to be verified",  ## class matching to be verified
+        "https://www.fisheries.noaa.gov/species/gray-whale",
+        "https://www.fisheries.noaa.gov/species/humpback-whale",
+        "https://www.fisheries.noaa.gov/species/killer-whale",
+        "https://www.fisheries.noaa.gov/species/long-finned-pilot-whale",
+        "https://www.fisheries.noaa.gov/species/melon-headed-whale",
+        "https://www.fisheries.noaa.gov/species/minke-whale",
+        "https://www.fisheries.noaa.gov/species/pantropical-spotted-dolphin",
+        "https://www.fisheries.noaa.gov/species/pygmy-killer-whale",
+        "https://www.fisheries.noaa.gov/species/rough-toothed-dolphin",
+        "https://www.fisheries.noaa.gov/species/sei-whale",
+        "https://www.fisheries.noaa.gov/species/short-finned-pilot-whale",
+        "https://www.fisheries.noaa.gov/species/southern-right-whale",
+        "https://www.fisheries.noaa.gov/species/spinner-dolphin",
+        "https://www.fisheries.noaa.gov/species/pantropical-spotted-dolphin",
+        "https://www.fisheries.noaa.gov/species/atlantic-white-sided-dolphin",
+    ]
+# Create a dataframe
+df_whale_img_ref = pd.DataFrame(
+    {
+        "WHALE_CLASSES": WHALE_CLASSES,
+        "WHALE_IMAGES": WHALE_IMAGES,
+        "WHALE_REFERENCES": WHALE_REFERENCES,
+    }
+).set_index("WHALE_CLASSES")
+def format_whale_name(whale_class:str):
+    whale_name = whale_class.replace("_", " ").title()
+    return whale_name
+def display_whale(whale_classes:List[str], i:int, viewcontainer=None):
+    """
+    Display whale image and reference to the provided viewcontainer.
+    Args:
+        whale_classes (List[str]): A list of whale class names.
+        i (int): The index of the whale class to display.
+        viewcontainer: The container to display the whale information. If
+            not provided, use the current streamlit context (works via
+            'with <container>' syntax)
+    Returns:
+        None
+    TODO: how to find the object type of viewcontainer.? they are just "deltagenerators" but
+    we want the result of the generator.. In any case, it works ok with either call signature.
+    """
+    import streamlit as st
+    if viewcontainer is None:
+        viewcontainer = st
+    # validate the input i should be within the range of the whale_classes
+    if i >= len(whale_classes):
+        raise ValueError(f"Index {i} is out of range. The whale_classes list has only {len(whale_classes)} elements.")
+    # validate the existence of the whale class in the dataframe as a row key
+    if whale_classes[i] not in df_whale_img_ref.index:
+        raise ValueError(f"Whale class {whale_classes[i]} not found in the dataframe.")
+    viewcontainer.markdown(
+        "### :whale:  #" + str(i + 1) + ": " + format_whale_name(whale_classes[i])
+    )
+    image = Image.open("images/references/" + df_whale_img_ref.loc[whale_classes[i], "WHALE_IMAGES"])
+    viewcontainer.image(image, caption=df_whale_img_ref.loc[whale_classes[i], "WHALE_REFERENCES"])
+    # link st.markdown(f"[{df.loc[whale_classes[i], 'WHALE_REFERENCES']}]({df.loc[whale_classes[i], 'WHALE_REFERENCES']})")

git ADDED Viewed

File without changes

images/references/640x427-atlantic-white-sided-dolphin.jpg ADDED Viewed

images/references/640x427-long-finned-pilot-whale.webp ADDED Viewed

images/references/640x427-southern-right-whale.jpg ADDED Viewed

images/references/Humpback.webp ADDED Viewed

images/references/Whale_Short-Finned_Pilot-markedDW.png ADDED Viewed

images/references/beluga.webp ADDED Viewed