diff --git a/README.md b/README.md index 1f590a37ea99ebe0abac11fd9901dd0cac520980..c2295b7b24cd2da31e56bd5abf4a2abdc32159ec 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ colorTo: red sdk: streamlit sdk_version: 1.39.0 python_version: "3.10" -app_file: call_models/entry_and_hotdog.py +app_file: src/main.py pinned: false license: apache-2.0 short_description: 'SDSC Hackathon - Project 10. ' diff --git a/app.py b/app.py deleted file mode 120000 index da3a856d4d62e4ab8a59611895876522d77a2431..0000000000000000000000000000000000000000 --- a/app.py +++ /dev/null @@ -1 +0,0 @@ -call_models/entry_and_hotdog.py \ No newline at end of file diff --git a/dev/call_hf_batch.py b/dev/call_hf_batch.py new file mode 100644 index 0000000000000000000000000000000000000000..cff687710f1c8c302dc2a94d3bf822f86954314b --- /dev/null +++ b/dev/call_hf_batch.py @@ -0,0 +1,94 @@ +import os +from huggingface_hub import HfApi +import cv2 +from pathlib import Path +import pandas as pd + +from transformers import pipeline +from transformers import AutoModelForImageClassification +import time + +''' +how to use this script: +1. get data from the kaggle competition, including images and the train.csv file +edit the "base" variable, assuming the following layout + +ceteans/ +├── images +│   ├── 00021adfb725ed.jpg +│   ├── 000562241d384d.jpg +│   ├── ... +└── train.csv + +2. inspect the df_results dataframe to see how the model is performing + + +''' +# setup for the ML model on huggingface (our wrapper) +os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" +rev = 'main' + +# load the model +cetacean_classifier = AutoModelForImageClassification.from_pretrained( + "Saving-Willy/cetacean-classifier", + revision=rev, + trust_remote_code=True) + +# get ready to load images +base = Path('~/Documents/ceteans/').expanduser() +df = pd.read_csv(base / 'train.csv') + +i_max = 100 # put a limit on the number of images to classify in this test (or None) + +# for each file in the folder base/images, 1/ load image, 2/ classify, 3/ compare against the relevant row in df +# also keep track of the time it takes to classify each image + + +classifications = [] + +img_pth = base / 'images' +img_files = list(img_pth.glob('*.jpg')) + + +for i, img_file in enumerate(img_files): + # lets check we can get the right target. + img_id = img_file.name # includes .jpg + target = df.loc[df['image'] == img_id, 'species'].item() + #print(img_id, target) + + start_time = time.time() + image = cv2.imread(str(img_file)) + load_time = time.time() - start_time + + start_time = time.time() + out = cetacean_classifier(image) # get top 3 matches + classify_time = time.time() - start_time + + whale_prediction1 = out['predictions'][0] + + # comparison + ok = whale_prediction1 == target + any = target in [x for x in out['predictions']] + row = [img_id, target, ok, any, load_time, classify_time] + list(out['predictions']) + + print(i, row) + + classifications.append(row) + + if i_max is not None and i >= i_max: + break + + +df_results = pd.DataFrame(classifications, columns=['img_id', 'target', 'ok', 'any', 'load_time', 'classify_time'] + [f'pred_{i}' for i in range(3)]) + +# print out a few summary stats +# mean time to load and classify (formatted 3dp), +- std dev (formatted to 2dp), +print(f"Mean load time: {df_results['load_time'].mean():.3f} +- {df_results['load_time'].std():.2f} s") +print(f"Mean classify time: {df_results['classify_time'].mean():.3f} +- {df_results['classify_time'].std():.2f} s") + +# accuracy: count of ok / count of any +print(f"Accuracy: correct with top prediction: {df_results['ok'].sum()} | any of top 3 correct: {df_results['any'].sum():.3f} (of total {df_results.shape[0]})") + +# diversity: is the model just predicting one class for everything it sees? +print("Which classes are predicted?") +print(df_results.pred_0.value_counts()) diff --git a/docs/dev_notes.md b/docs/dev_notes.md new file mode 100644 index 0000000000000000000000000000000000000000..7c57bd1e10c5cda7434b87ac00428333f10239a3 --- /dev/null +++ b/docs/dev_notes.md @@ -0,0 +1,40 @@ +# How to run the UI + +We set this up so it is hosted as a huggingface space. Each commit to `main` triggers a push and a rebuild on their servers. + +For local testing, assuming you have all the required packages installed in a +conda env or virtualenv, and that env is activated: + +``` +cd src +streamlit run main.py +``` +Then use a web browser to view the site indiciated, by default: http://localhost:8501 + +# How to build and view docs locally + +We have a CI action to presesnt the docs on github.io. +To validate locally, you need the deps listed in `requirements.txt` installed. + +Run +``` +mkdocs serve +``` +And navigate to the wish server running locally, by default: http://127.0.0.1:8888/ + +This automatically watches for changes in the markdown files, but if you edit the +something else like the docstrings in py files, triggering a rebuild in another terminal +refreshes the site, without having to quit and restart the server. +``` +mkdocs build -c +``` + + + +# Set up a venv + +(standard stuff) + +# Set up a conda env + +(Standard stuff) \ No newline at end of file diff --git a/docs/fix_tabrender.md b/docs/fix_tabrender.md new file mode 100644 index 0000000000000000000000000000000000000000..5fc31820051b44e3f9acc819551c995a0f388cf2 --- /dev/null +++ b/docs/fix_tabrender.md @@ -0,0 +1,5 @@ +A js fix for certain UI elements, including maps, getting rendered into a +zero-sized frame by default. Here we resize it so it is visible once the tab is +clicked and no further interaction is required to see it. + +::: src.fix_tabrender diff --git a/docs/input_handling.md b/docs/input_handling.md new file mode 100644 index 0000000000000000000000000000000000000000..79f96e94e9ac4e81fce0300315883ecf74fceb85 --- /dev/null +++ b/docs/input_handling.md @@ -0,0 +1,8 @@ +This module focuses on image and metadata entry: + +- UI elements to upload an image and populate the metadata (or edit the + auto-discovered metadata) +- a container class for an observation + + +::: src.input_handling \ No newline at end of file diff --git a/docs/main.md b/docs/main.md new file mode 100644 index 0000000000000000000000000000000000000000..67d3c4286e6980ce933d3db324413d2d92b291b2 --- /dev/null +++ b/docs/main.md @@ -0,0 +1,10 @@ +# Main entry point + +This module sets up the streamlit UI frontend, +as well as logger and session state elements in the backend. + +The session state is used to retain values from one interaction to the next, since the streamlit execution model is to re-run the entire script top-to-bottom upon each user interaction (e.g. click). +See streamlit [docs](https://docs.streamlit.io/develop/api-reference/caching-and-state/st.session_state). + + +::: src.entry_and_hotdog \ No newline at end of file diff --git a/docs/obs_map.md b/docs/obs_map.md new file mode 100644 index 0000000000000000000000000000000000000000..77557a275f458029fa9a1a29edb72d6f6a10d2c7 --- /dev/null +++ b/docs/obs_map.md @@ -0,0 +1,7 @@ +This module provides rendering of observations on an interactive map, with a variety of tilesets available. + +Note: OSM, ESRI, and CartoDB map tiles are served without authentication/tokens, +and so render correctly on the huggingface deployment. The Stamen tiles render +on localhost but require a token to present on a 3rd-party site. + +::: src.obs_map \ No newline at end of file diff --git a/docs/st_logs.md b/docs/st_logs.md new file mode 100644 index 0000000000000000000000000000000000000000..b3f07935aa8c8561933b3d9a52af6666d7f9e555 --- /dev/null +++ b/docs/st_logs.md @@ -0,0 +1,7 @@ +This module provides utilities to incorporate a standard python logger within streamlit. + + +# Streamlit log handler + +::: src.st_logs + diff --git a/docs/whale_gallery.md b/docs/whale_gallery.md new file mode 100644 index 0000000000000000000000000000000000000000..9570c4abff94def7429ee10c9d0a3b9b58d4cb31 --- /dev/null +++ b/docs/whale_gallery.md @@ -0,0 +1,4 @@ +This module provides a gallery of the whales and dolphins that the classifier +is trained on. It diplays the images and links to further info on the species. + +::: src.whale_gallery diff --git a/docs/whale_viewer.md b/docs/whale_viewer.md new file mode 100644 index 0000000000000000000000000000000000000000..cade0b0c349ae9437c0d606339e3667460cf5166 --- /dev/null +++ b/docs/whale_viewer.md @@ -0,0 +1,4 @@ +This module provides a streamlit rendering for the whales and dolphins that the classifier is aware of, and also holds the +metadata for them (images, class names that the classifier uses, and URLS for further information about each species). + +::: src.whale_viewer diff --git a/images/references/640x427-atlantic-white-sided-dolphin.jpg b/images/references/640x427-atlantic-white-sided-dolphin.jpg deleted file mode 100644 index 490a63534b6d9323494bc67db7b68ff1cdc16bcc..0000000000000000000000000000000000000000 --- a/images/references/640x427-atlantic-white-sided-dolphin.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ba6a9d014030f57a16f8c9a2a1fd757367ce384d6c3e00d23fa78d34ae29ea4b -size 20879 diff --git a/images/references/640x427-long-finned-pilot-whale.webp b/images/references/640x427-long-finned-pilot-whale.webp deleted file mode 100644 index 69861e8e9527b0f64f57cb26a57872b23ec6c5df..0000000000000000000000000000000000000000 --- a/images/references/640x427-long-finned-pilot-whale.webp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c4aa414e4412adc13101518a5d6d2b5829d37d2881e52813081c40dd664b2525 -size 107070 diff --git a/images/references/640x427-southern-right-whale.jpg b/images/references/640x427-southern-right-whale.jpg deleted file mode 100644 index 79e52bc7601ac4426453a85756aff5cf354d8d24..0000000000000000000000000000000000000000 --- a/images/references/640x427-southern-right-whale.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f49f60d729b2dd810a3ee69171b6beee8c6baaa8df26926c6d9df82bd620844e -size 20890 diff --git a/images/references/Humpback.webp b/images/references/Humpback.webp deleted file mode 100644 index c6f4110b08ab024c6525e338c71669fee4bbc92d..0000000000000000000000000000000000000000 --- a/images/references/Humpback.webp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5660b04a32b5f154e9e1e7d74fb85a498f42b54e3503266738687ad7128731ee -size 168673 diff --git a/images/references/Whale_Short-Finned_Pilot-markedDW.png b/images/references/Whale_Short-Finned_Pilot-markedDW.png deleted file mode 100644 index c53b1d3a98f910df6d68895d840606be50d293fa..0000000000000000000000000000000000000000 --- a/images/references/Whale_Short-Finned_Pilot-markedDW.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:afb8e1b9c39761f1a4e4252fe47c6362c155fdec846c070af7770bc037f870a8 -size 131790 diff --git a/images/references/beluga.webp b/images/references/beluga.webp deleted file mode 100644 index 044078c8023db4d4f60447752cee6cc7678bf3c9..0000000000000000000000000000000000000000 --- a/images/references/beluga.webp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a806dbc284f56e9821ea5d92b9b2f29757def579d03eb46abebaa568d3634748 -size 106173 diff --git a/images/references/blue-whale.webp b/images/references/blue-whale.webp deleted file mode 100644 index 665219cfd2bd76b4490c517b4caedfa3f050e852..0000000000000000000000000000000000000000 --- a/images/references/blue-whale.webp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7c3c8fad25250868f01a96421cf2aa10d9a06a297f52bf72d2489d084465c633 -size 59922 diff --git a/images/references/bottlenose_dolphin.webp b/images/references/bottlenose_dolphin.webp deleted file mode 100644 index 18c5b01f75abd49b7caeb85f0ed672d5464e2c33..0000000000000000000000000000000000000000 --- a/images/references/bottlenose_dolphin.webp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d01bdc2317ea829d9aca7e947dd0c17548288ef4111780a508b0f6c4e1640278 -size 134786 diff --git a/images/references/brydes.webp b/images/references/brydes.webp deleted file mode 100644 index bea63ad753c887e9fd7ad55fd191005eeb9e6428..0000000000000000000000000000000000000000 --- a/images/references/brydes.webp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e0fd3cc26bec1ac00ccf5b35232844f0e88584f8ff45db109a60d45c776273ea -size 125904 diff --git a/images/references/common_dolphin.webp b/images/references/common_dolphin.webp deleted file mode 100644 index 2bfc43ccf286ad5b6b74a044d5c25940db66dbcc..0000000000000000000000000000000000000000 --- a/images/references/common_dolphin.webp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:86972fe463ac13428cfc45ef6a0c62a5ea6000ee5acb8c68424db1f26a3faad0 -size 121328 diff --git a/images/references/cuviers_beaked_whale.webp b/images/references/cuviers_beaked_whale.webp deleted file mode 100644 index 2ea1bc5dc54778471b47e2c6dda4a25bb2622db0..0000000000000000000000000000000000000000 --- a/images/references/cuviers_beaked_whale.webp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9315fb8c1907a425dde8793268ef743c660b13b5cc53c77c70a7953df74698f6 -size 128783 diff --git a/images/references/false-killer-whale.webp b/images/references/false-killer-whale.webp deleted file mode 100644 index 5027ad93dd9b76fc0a44909fc1d45c339702c77f..0000000000000000000000000000000000000000 --- a/images/references/false-killer-whale.webp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:65dfc2aefd7f4a16ed30ba0bc93b06324cb67efad4dd21fc5b82b67db27af443 -size 113622 diff --git a/images/references/fin-whale.webp b/images/references/fin-whale.webp deleted file mode 100644 index 8d6f75493745ca2bf6fe31e9ad3bf96d23222899..0000000000000000000000000000000000000000 --- a/images/references/fin-whale.webp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bbc78f05705020c12db3063eb63dc2a0ca9f217088ddf8ab434aebf2b4796e49 -size 17664 diff --git a/images/references/gray-whale.webp b/images/references/gray-whale.webp deleted file mode 100644 index 45c41781120d9627a0b6437781249f3a964064e0..0000000000000000000000000000000000000000 --- a/images/references/gray-whale.webp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:19e42e897fa9fec312d968b209818253c75f6b3b3130c44225dbc95dc724c048 -size 136507 diff --git a/images/references/killer_whale.webp b/images/references/killer_whale.webp deleted file mode 100644 index c1a6b916f5138f54db5cd3b002b357aa7d9acc2a..0000000000000000000000000000000000000000 --- a/images/references/killer_whale.webp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9df433f88111f0fd967e937f8c03d98a97aefe15eb9bc319ed5a7580380ff88e -size 84980 diff --git a/images/references/melon.webp b/images/references/melon.webp deleted file mode 100644 index 55c9d22381e12edb8f147eff9607819be78874b1..0000000000000000000000000000000000000000 --- a/images/references/melon.webp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9468f1a324feb02faf1709d733a9353aadf27a4a609c3e8d025125836fae3c42 -size 105644 diff --git a/images/references/minke-whale.webp b/images/references/minke-whale.webp deleted file mode 100644 index aadf51110605ccc41380fbaebcb8eeceed0e6fb1..0000000000000000000000000000000000000000 --- a/images/references/minke-whale.webp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e429d1835e9cb370a8ba9791be16bfbcc5706dcc0e5f4e0c75c792b5e7a88095 -size 119527 diff --git a/images/references/pantropical-spotted-dolphin.webp b/images/references/pantropical-spotted-dolphin.webp deleted file mode 100644 index 4360c5da1a18fb63d42174f1f5bbd76776d1b3ac..0000000000000000000000000000000000000000 --- a/images/references/pantropical-spotted-dolphin.webp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2539bf6b2cd45a7d09527c9c6d50f1eb63e8c2296b6b467d5058433a2f405c7a -size 136974 diff --git a/images/references/pygmy-killer-whale.webp b/images/references/pygmy-killer-whale.webp deleted file mode 100644 index 9269df2b2952d7996ecb8b981403979ac642cf2f..0000000000000000000000000000000000000000 --- a/images/references/pygmy-killer-whale.webp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:84c7cd4b7aa1e943b0281061208062807297b538badb93ffa86bb7b59b650357 -size 19544 diff --git a/images/references/rough-toothed-dolphin.webp b/images/references/rough-toothed-dolphin.webp deleted file mode 100644 index b6069d3b0cd55b80d68b4eb7fb3efe5bc8c6703b..0000000000000000000000000000000000000000 --- a/images/references/rough-toothed-dolphin.webp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e26ec510c284ec27c25e8ff23128244d4ef952c07ef8b816e4d79455c61e7098 -size 159632 diff --git a/images/references/sei.webp b/images/references/sei.webp deleted file mode 100644 index 1b05ea99500dafd6894fa42eca044b49d2ab2b25..0000000000000000000000000000000000000000 --- a/images/references/sei.webp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:13099859ac1ac3fa45c58ecb3ea19a841d4b3b654592e631202d24271ae40d43 -size 105313 diff --git a/images/references/spinner.webp b/images/references/spinner.webp deleted file mode 100644 index 7666ff4f9877e1b48344bbed4594153e4f0b2a04..0000000000000000000000000000000000000000 --- a/images/references/spinner.webp +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dc4ef6d4401f7342ef69eeaeaf4e62c098805196b00b3e814545befdd01e1b17 -size 114030 diff --git a/mkdocs.yaml b/mkdocs.yaml index 6bfffde3044ff3acbf4f1502645dd3eee2c041db..48549cfd7694f419913e0bdddb24b5e96cf98831 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -16,10 +16,27 @@ plugins: - mkdocstrings: default_handler: python handlers: - python: - paths: [src] + python: + paths: [nonexistent_path_on_purpose] nav: - README: index.md - - App: app.md \ No newline at end of file + #- Quickstart: + #- Installation: installation.md + #- Usage: usage.md + - API: + - Main app: main.md + - Modules: + - Data entry handling: input_handling.md + - Map of observations: obs_map.md + - Whale gallery: whale_gallery.md + - Whale viewer: whale_viewer.md + - Logging: st_logs.md + - Tab-rendering fix (js): fix_tabrender.md + + - Development clutter: + - Demo app: app.md + + - How to contribute: + - Dev Notes: dev_notes.md diff --git a/call_models/click_map.py b/snippets/click_map.py similarity index 100% rename from call_models/click_map.py rename to snippets/click_map.py diff --git a/call_models/d_entry.py b/snippets/d_entry.py similarity index 100% rename from call_models/d_entry.py rename to snippets/d_entry.py diff --git a/snippets/extract_meta.py b/snippets/extract_meta.py index 3eca8e806152ec96c209b9d0136655c7a8c8dd93..36084641d2173088c0bcb8afd224b0ac4b32ab26 100755 --- a/snippets/extract_meta.py +++ b/snippets/extract_meta.py @@ -38,7 +38,7 @@ def extract_gps(image_path): return (lat, lon) # Example usage -image_path = '../call_models/imgs/cakes.jpg' +image_path = 'imgs/cakes.jpg' # this file has good exif data, inc GPS, timestamps etc. datetime_info = extract_datetime(image_path) gps_info = extract_gps(image_path) print(f'Date and Time: {datetime_info}') diff --git a/call_models/hotdogs.py b/snippets/hotdogs.py similarity index 100% rename from call_models/hotdogs.py rename to snippets/hotdogs.py diff --git a/call_models/imgs/cakes.jpg b/snippets/imgs/cakes.jpg similarity index 100% rename from call_models/imgs/cakes.jpg rename to snippets/imgs/cakes.jpg diff --git a/call_models/test_upload.py b/snippets/test_upload.py similarity index 100% rename from call_models/test_upload.py rename to snippets/test_upload.py diff --git a/call_models/alps_map.py b/src/alps_map.py similarity index 100% rename from call_models/alps_map.py rename to src/alps_map.py diff --git a/call_models/entry_and_hotdog.py b/src/entry_and_hotdog.py similarity index 90% rename from call_models/entry_and_hotdog.py rename to src/entry_and_hotdog.py index 6535f7b28399def17488e5667eb9bb232f3e2e75..e4e47a1c6e8e7605527383b26f954f849099f294 100644 --- a/call_models/entry_and_hotdog.py +++ b/src/entry_and_hotdog.py @@ -1,27 +1,27 @@ -import datetime -import os +#import datetime import json import logging +import os import tempfile + import pandas as pd import streamlit as st +from streamlit.delta_generator import DeltaGenerator # for type hinting import folium from streamlit_folium import st_folium from huggingface_hub import HfApi -#from datasets import load_dataset -#from fix_tabrender import js_show_zeroheight_iframe +from transformers import pipeline +from transformers import AutoModelForImageClassification -import whale_viewer as sw_wv -import input_handling as sw_inp import alps_map as sw_am -import whale_gallery as sw_wg +import input_handling as sw_inp import obs_map as sw_map import st_logs as sw_logs +import whale_gallery as sw_wg +import whale_viewer as sw_wv -from transformers import pipeline -from transformers import AutoModelForImageClassification # setup for the ML model on huggingface (our wrapper) os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" @@ -64,14 +64,29 @@ if "tab_log" not in st.session_state: st.session_state.tab_log = None -def metadata2md(): +def metadata2md() -> str: + """Get metadata from cache and return as markdown-formatted key-value list + + Returns: + str: Markdown-formatted key-value list of metadata + + """ markdown_str = "\n" for key, value in st.session_state.full_data.items(): markdown_str += f"- **{key}**: {value}\n" return markdown_str -def push_observation(tab_log=None): +def push_observation(tab_log:DeltaGenerator=None): + """ + Push the observation to the Hugging Face dataset + + Args: + tab_log (streamlit.container): The container to log messages to. If not provided, + log messages are in any case written to the global logger (TODO: test - didn't + push any data since generating the logger) + + """ # we get the data from session state: 1 is the dict 2 is the image. # first, lets do an info display (popup) metadata_str = json.dumps(st.session_state.full_data) @@ -105,7 +120,26 @@ def push_observation(tab_log=None): st.info(msg) -if __name__ == "__main__": + +def main() -> None: + """ + Main entry point to set up the streamlit UI and run the application. + + The organisation is as follows: + + 1. data input (a new observation) is handled in the sidebar + 2. the rest of the interface is organised in tabs: + + - cetean classifier + - hotdog classifier + - map to present the obersvations + - table of recent log entries + - gallery of whale images + + The majority of the tabs are instantiated from modules. Currently the two + classifiers are still in-line here. + + """ g_logger.info("App started.") g_logger.warning(f"[D] Streamlit version: {st.__version__}. Python version: {os.sys.version}") @@ -306,3 +340,6 @@ if __name__ == "__main__": tab_hotdogs.write(f"Session Data: {json.dumps(st.session_state.full_data)}") + +if __name__ == "__main__": + main() diff --git a/call_models/fix_tabrender.py b/src/fix_tabrender.py similarity index 62% rename from call_models/fix_tabrender.py rename to src/fix_tabrender.py index b865231672d5696c580505b2b64c942ae44d5d06..e4b1f7f4948cdfc1b8b79d879e3eb7a26b6c79a2 100644 --- a/call_models/fix_tabrender.py +++ b/src/fix_tabrender.py @@ -10,6 +10,21 @@ import streamlit as st import uuid, html # workaround for streamlit making tabs height 0 when not active, breaks map def inject_iframe_js_code(source: str) -> None: + """ + Injects JavaScript code into a Streamlit app using an iframe. + + This function creates a hidden div with a unique ID and injects the provided + JavaScript code into the parent document using an iframe. The iframe's source + is a JavaScript URL that creates a script element, sets its type to 'text/javascript', + and assigns the provided JavaScript code to its text content. The script element + is then appended to the hidden div in the parent document. + + Args: + source (str): The JavaScript code to be injected. + + Returns: + None + """ div_id = uuid.uuid4() st.markdown( @@ -28,7 +43,28 @@ def inject_iframe_js_code(source: str) -> None: unsafe_allow_html=True, ) -def js_show_zeroheight_iframe(component_iframe_title: str, height: str = "auto"): +def js_show_zeroheight_iframe(component_iframe_title: str, height: str = "auto") -> None: + """ + Injects JavaScript code to dynamically set iframe height (located by title) + + This function generates and injects JavaScript code that searches for + iframes with the given title and sets their height to the specified value. + The script attempts to find the iframes up to a maximum number of attempts, + and also listens for user interactions to reattempt setting the height. + + See https://github.com/streamlit/streamlit/issues/7376 + + + Args: + component_iframe_title (str): The title attribute of the iframes to target. + height (str, optional): The height to set for the iframes. Defaults to "auto". + + Notes: + - The JavaScript code will attempt to find the iframes every 250 + milliseconds, up to a maximum of 20 attempts. + - If the iframes are found, their height will be set to the specified value. + - User interactions (e.g., click events) triggers a reattempt to set the height. + """ source = f""" (function() {{ var attempts = 0; diff --git a/call_models/images/references/640x427-atlantic-white-sided-dolphin.jpg b/src/images/references/640x427-atlantic-white-sided-dolphin.jpg similarity index 100% rename from call_models/images/references/640x427-atlantic-white-sided-dolphin.jpg rename to src/images/references/640x427-atlantic-white-sided-dolphin.jpg diff --git a/call_models/images/references/640x427-long-finned-pilot-whale.webp b/src/images/references/640x427-long-finned-pilot-whale.webp similarity index 100% rename from call_models/images/references/640x427-long-finned-pilot-whale.webp rename to src/images/references/640x427-long-finned-pilot-whale.webp diff --git a/call_models/images/references/640x427-southern-right-whale.jpg b/src/images/references/640x427-southern-right-whale.jpg similarity index 100% rename from call_models/images/references/640x427-southern-right-whale.jpg rename to src/images/references/640x427-southern-right-whale.jpg diff --git a/call_models/images/references/Humpback.webp b/src/images/references/Humpback.webp similarity index 100% rename from call_models/images/references/Humpback.webp rename to src/images/references/Humpback.webp diff --git a/call_models/images/references/Whale_Short-Finned_Pilot-markedDW.png b/src/images/references/Whale_Short-Finned_Pilot-markedDW.png similarity index 100% rename from call_models/images/references/Whale_Short-Finned_Pilot-markedDW.png rename to src/images/references/Whale_Short-Finned_Pilot-markedDW.png diff --git a/call_models/images/references/beluga.webp b/src/images/references/beluga.webp similarity index 100% rename from call_models/images/references/beluga.webp rename to src/images/references/beluga.webp diff --git a/call_models/images/references/blue-whale.webp b/src/images/references/blue-whale.webp similarity index 100% rename from call_models/images/references/blue-whale.webp rename to src/images/references/blue-whale.webp diff --git a/call_models/images/references/bottlenose_dolphin.webp b/src/images/references/bottlenose_dolphin.webp similarity index 100% rename from call_models/images/references/bottlenose_dolphin.webp rename to src/images/references/bottlenose_dolphin.webp diff --git a/call_models/images/references/brydes.webp b/src/images/references/brydes.webp similarity index 100% rename from call_models/images/references/brydes.webp rename to src/images/references/brydes.webp diff --git a/call_models/images/references/common_dolphin.webp b/src/images/references/common_dolphin.webp similarity index 100% rename from call_models/images/references/common_dolphin.webp rename to src/images/references/common_dolphin.webp diff --git a/call_models/images/references/cuviers_beaked_whale.webp b/src/images/references/cuviers_beaked_whale.webp similarity index 100% rename from call_models/images/references/cuviers_beaked_whale.webp rename to src/images/references/cuviers_beaked_whale.webp diff --git a/call_models/images/references/false-killer-whale.webp b/src/images/references/false-killer-whale.webp similarity index 100% rename from call_models/images/references/false-killer-whale.webp rename to src/images/references/false-killer-whale.webp diff --git a/call_models/images/references/fin-whale.webp b/src/images/references/fin-whale.webp similarity index 100% rename from call_models/images/references/fin-whale.webp rename to src/images/references/fin-whale.webp diff --git a/call_models/images/references/gray-whale.webp b/src/images/references/gray-whale.webp similarity index 100% rename from call_models/images/references/gray-whale.webp rename to src/images/references/gray-whale.webp diff --git a/call_models/images/references/killer_whale.webp b/src/images/references/killer_whale.webp similarity index 100% rename from call_models/images/references/killer_whale.webp rename to src/images/references/killer_whale.webp diff --git a/call_models/images/references/melon.webp b/src/images/references/melon.webp similarity index 100% rename from call_models/images/references/melon.webp rename to src/images/references/melon.webp diff --git a/call_models/images/references/minke-whale.webp b/src/images/references/minke-whale.webp similarity index 100% rename from call_models/images/references/minke-whale.webp rename to src/images/references/minke-whale.webp diff --git a/call_models/images/references/pantropical-spotted-dolphin.webp b/src/images/references/pantropical-spotted-dolphin.webp similarity index 100% rename from call_models/images/references/pantropical-spotted-dolphin.webp rename to src/images/references/pantropical-spotted-dolphin.webp diff --git a/call_models/images/references/pygmy-killer-whale.webp b/src/images/references/pygmy-killer-whale.webp similarity index 100% rename from call_models/images/references/pygmy-killer-whale.webp rename to src/images/references/pygmy-killer-whale.webp diff --git a/call_models/images/references/rough-toothed-dolphin.webp b/src/images/references/rough-toothed-dolphin.webp similarity index 100% rename from call_models/images/references/rough-toothed-dolphin.webp rename to src/images/references/rough-toothed-dolphin.webp diff --git a/call_models/images/references/sei.webp b/src/images/references/sei.webp similarity index 100% rename from call_models/images/references/sei.webp rename to src/images/references/sei.webp diff --git a/call_models/images/references/spinner.webp b/src/images/references/spinner.webp similarity index 100% rename from call_models/images/references/spinner.webp rename to src/images/references/spinner.webp diff --git a/call_models/input_handling.py b/src/input_handling.py similarity index 68% rename from call_models/input_handling.py rename to src/input_handling.py index c52e23f6c635d5b7c456b52be0367707157a464b..497ee8f53700aa4dbe232835d9662766da5f18a1 100644 --- a/call_models/input_handling.py +++ b/src/input_handling.py @@ -6,6 +6,9 @@ import hashlib import logging import streamlit as st +from streamlit.runtime.uploaded_file_manager import UploadedFile # for type hinting +from streamlit.delta_generator import DeltaGenerator + import cv2 import numpy as np @@ -25,6 +28,47 @@ allowed_image_types = ['jpg', 'jpeg', 'png', 'webp'] # autogenerated class to hold the input data class InputObservation: + """ + A class to hold an input observation and associated metadata + + Attributes: + image (Any): + The image associated with the observation. + latitude (float): + The latitude where the observation was made. + longitude (float): + The longitude where the observation was made. + author_email (str): + The email of the author of the observation. + date (str): + The date when the observation was made. + time (str): + The time when the observation was made. + date_option (str): + Additional date option for the observation. + time_option (str): + Additional time option for the observation. + uploaded_filename (Any): + The uploaded filename associated with the observation. + + Methods: + __str__(): + Returns a string representation of the observation. + __repr__(): + Returns a string representation of the observation. + __eq__(other): + Checks if two observations are equal. + __ne__(other): + Checks if two observations are not equal. + __hash__(): + Returns the hash of the observation. + to_dict(): + Converts the observation to a dictionary. + from_dict(data): + Creates an observation from a dictionary. + from_input(input): + Creates an observation from another input observation. + """ def __init__(self, image=None, latitude=None, longitude=None, author_email=None, date=None, time=None, date_option=None, time_option=None, uploaded_filename=None): self.image = image self.latitude = latitude @@ -84,19 +128,49 @@ class InputObservation: def from_dict(data): return InputObservation(data["image"], data["latitude"], data["longitude"], data["author_email"], data["date"], data["time"], data["date_option"], data["time_option"], data["uploaded_filename"]) -# define function to validate number, allowing signed float + def is_valid_number(number:str) -> bool: + """ + Check if the given string is a valid number (int or float, sign ok) + + Args: + number (str): The string to be checked. + + Returns: + bool: True if the string is a valid number, False otherwise. + """ pattern = r'^[-+]?[0-9]*\.?[0-9]+$' return re.match(pattern, number) is not None # Function to validate email address -def is_valid_email(email): +def is_valid_email(email:str) -> bool: + """ + Validates if the provided email address is in a correct format. + + Args: + email (str): The email address to validate. + + Returns: + bool: True if the email address is valid, False otherwise. + """ pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$' return re.match(pattern, email) is not None # Function to extract date and time from image metadata -def get_image_datetime(image_file): +def get_image_datetime(image_file: UploadedFile) -> str | None: + """ + Extracts the original date and time from the EXIF metadata of an uploaded image file. + + Args: + image_file (UploadedFile): The uploaded image file from which to extract the date and time. + + Returns: + str: The original date and time as a string if available, otherwise None. + + Raises: + Warning: If the date and time could not be extracted from the image metadata. + """ try: image = Image.open(image_file) exif_data = image._getexif() @@ -104,8 +178,9 @@ def get_image_datetime(image_file): for tag, value in exif_data.items(): if ExifTags.TAGS.get(tag) == 'DateTimeOriginal': return value - except Exception as e: - st.warning("Could not extract date from image metadata.") + except Exception as e: # FIXME: what types of exception? + st.warning(f"Could not extract date from image metadata. (file: {image_file.name})") + # TODO: add to logger return None @@ -120,7 +195,23 @@ spoof_metadata = { } #def display_whale(whale_classes:List[str], i:int, viewcontainer=None): -def setup_input(viewcontainer: st.delta_generator.DeltaGenerator=None, _allowed_image_types: list=None, ): +def setup_input( + viewcontainer: DeltaGenerator=None, + _allowed_image_types: list=None, ) -> InputObservation: + """ + Sets up the input interface for uploading an image and entering metadata. + + It provides input fields for an image upload, lat/lon, author email, and date-time. + In the ideal case, the image metadata will be used to populate location and datetime. + + Parameters: + viewcontainer (DeltaGenerator, optional): The Streamlit container to use for the input interface. Defaults to st.sidebar. + _allowed_image_types (list, optional): List of allowed image file types for upload. Defaults to allowed_image_types. + + Returns: + InputObservation: An object containing the uploaded image and entered metadata. + + """ if viewcontainer is None: viewcontainer = st.sidebar diff --git a/call_models/obs_map.py b/src/obs_map.py similarity index 76% rename from call_models/obs_map.py rename to src/obs_map.py index 426ca68477f5987f6515fd2b25800922323c650f..d46c7a1ddb3e56dea5eea024c6c4b15bebca1011 100644 --- a/call_models/obs_map.py +++ b/src/obs_map.py @@ -62,7 +62,21 @@ _colors = [ whale2color = {k: v for k, v in zip(sw_wv.WHALE_CLASSES, _colors)} -def create_map(tile_name:str, location:Tuple, zoom_start: int = 7): +def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> folium.Map: + """ + Create a folium map with the specified tile layer + + Parameters: + tile_name (str): The name of the tile layer to use. Options include: + 'Open Street Map', 'Esri Ocean', 'Esri Images', + 'Stamen Toner', 'Stamen Watercolor', + 'CartoDB Positron', 'CartoDB Dark_Matter'. + location (Tuple): Coordinates (lat, lon) of the map center, as floats. + zoom_start (int, optional): The initial zoom level for the map. Default is 7. + + Returns: + folium.Map: A folium Map object with the specified settings. + """ # https://xyzservices.readthedocs.io/en/stable/gallery.html # get teh attribtuions from here once we pick the 2-3-4 options # make esri ocean the default @@ -103,12 +117,27 @@ def create_map(tile_name:str, location:Tuple, zoom_start: int = 7): def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle", data_files:str = "data/train-00000-of-00001.parquet", - dbg_show_extra:bool = False): - ''' - render a map, with a selectable tileset, and show markers for each of the whale - observations + dbg_show_extra:bool = False) -> dict: + """ + Render map plus tile selector, with markers for whale observations - ''' + + This function loads whale observation data from a specified dataset and + file, creates a pandas DataFrame compliant with Folium/Streamlit maps, and + renders an interactive map with markers for each observation. The map + allows users to select a tileset, and displays markers with species-specific + colors. + + Args: + dataset_id (str): The ID of the dataset to load from Hugging Face. Default is "Saving-Willy/Happywhale-kaggle". + data_files (str): The path to the data file to load. Default is "data/train-00000-of-00001.parquet". + dbg_show_extra (bool): If True, add a few extra sample markers for visualization. Default is False. + + Returns: + dict: Selected data from the Folium/leaflet.js interactions in the browser. + + """ + # load/download data from huggingface dataset metadata = load_dataset(dataset_id, data_files=data_files) diff --git a/call_models/st_logs.py b/src/st_logs.py similarity index 51% rename from call_models/st_logs.py rename to src/st_logs.py index ae4d9401133aed5c005475c37b9bff0fb409c995..1df892f8e0e320028fbe830e86dd9788e3d200ec 100644 --- a/call_models/st_logs.py +++ b/src/st_logs.py @@ -1,3 +1,4 @@ +from typing import List import logging from datetime import datetime import re @@ -22,8 +23,35 @@ log_pattern = re.compile(_log_n_re + _log_date_re + _sep + _log_mod_re + _sep + class StreamlitLogHandler(logging.Handler): - # Initializes a custom log handler with a Streamlit container for displaying logs + """ + Custom Streamlit log handler to display logs in a Streamlit container + + A custom logging handler for Streamlit applications that displays log + messages in a Streamlit container. + + Attributes: + container (streamlit.DeltaGenerator): The Streamlit container where log messages will be displayed. + debug (bool): A flag to indicate whether to display debug messages. + ansi_escape (re.Pattern): A compiled regular expression to remove ANSI escape sequences from log messages. + log_area (streamlit.DeltaGenerator): An empty Streamlit container for log output. + buffer (collections.deque): A deque buffer to store log messages with a maximum length. + _n (int): A counter to keep track of the number of log messages seen. + + Methods: + __init__(container, maxlen=15, debug=False): + Initializes the StreamlitLogHandler with a Streamlit container, buffer length, and debug flag. + n_elems(verb=False): + Returns a string with the total number of elements seen and the number of elements in the buffer. + If verb is True, returns a verbose string; otherwise, returns a concise string. + emit(record): + Processes a log record, formats it, appends it to the buffer, and displays it in the Streamlit container. + Strips ANSI escape sequences from the log message if present. + clear_logs(): + Clears the log messages from the Streamlit container and the buffer. + """ + # Initialize a custom log handler with a Streamlit container for displaying logs def __init__(self, container, maxlen:int=15, debug:bool=False): + #TODO: find the type for streamlit generic containers super().__init__() # Store the Streamlit container for log output self.container = container @@ -34,14 +62,28 @@ class StreamlitLogHandler(logging.Handler): self.buffer = deque(maxlen=maxlen) self._n = 0 - def n_elems(self, verb:bool=False): - ''' return a string with num elements seen and num elements in buffer ''' + def n_elems(self, verb:bool=False) -> str: + """ + Return a string with the number of elements seen and the number of elements in the buffer. + + Args: + verb (bool): If True, returns a verbose string. Defaults to False. + + Returns: + str: A string representing the total number of elements seen and the number of elements in the buffer. + """ if verb: return f"total: {self._n}|| in buffer:{len(self.buffer)}" return f"{self._n}||{len(self.buffer)}" - def emit(self, record): + def emit(self, record) -> None: + '''put the record into buffer so it gets displayed + + Args: + record (logging.LogRecord): The log record to process and display. + + ''' self._n += 1 msg = f"[{self._n}]" + self.format(record) self.buffer.append(msg) @@ -49,13 +91,28 @@ class StreamlitLogHandler(logging.Handler): if self.debug: self.log_area.markdown(clean_msg) - def clear_logs(self): + def clear_logs(self) -> None: + """ + Clears the log area and buffer. + + This method empties the log area to remove any previous logs and clears the buffer to reset the log storage. + """ self.log_area.empty() # Clear previous logs self.buffer.clear() # Set up logging to capture all info level logs from the root logger @st.cache_resource -def setup_logging(level: int=logging.INFO, buffer_len:int=15): +def setup_logging(level:int=logging.INFO, buffer_len:int=15) -> StreamlitLogHandler: + """ + Set up logging for the application using Streamlit's container for log display. + + Args: + level (int): The logging level (e.g., logging.INFO, logging.DEBUG). Default is logging.INFO. + buffer_len (int): The maximum number of log messages to display in the Streamlit container. Default is 15. + + Returns: + StreamlitLogHandler: The handler that has been added to the root logger. + """ root_logger = logging.getLogger() # Get the root logger log_container = st.container() # Create a container within which we display logs handler = StreamlitLogHandler(log_container, maxlen=buffer_len) @@ -69,8 +126,23 @@ def setup_logging(level: int=logging.INFO, buffer_len:int=15): # st.session_state['handler'] = handler return handler -def parse_log_buffer(log_contents: deque) -> list: - ''' convert log buffer to a list of dictionaries ''' +def parse_log_buffer(log_contents: deque) -> List[dict]: + """ + Convert log buffer to a list of dictionaries for use with a streamlit datatable. + + Args: + log_contents (deque): A deque containing log lines as strings. + + Returns: + list: A list of dictionaries, each representing a parsed log entry with the following keys: + - 'timestamp' (datetime): The timestamp of the log entry. + - 'n' (str): The log entry number. + - 'level' (str): The log level (e.g., INFO, ERROR). + - 'module' (str): The name of the module. + - 'func' (str): The name of the function. + - 'message' (str): The log message. + """ + j = 0 records = [] for line in log_contents: @@ -100,8 +172,9 @@ def parse_log_buffer(log_contents: deque) -> list: continue return records -def something(): - '''function to demo adding log entries''' +def demo_log_callback() -> None: + '''basic demo of adding log entries as a callback function''' + logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) logger.debug("debug message") @@ -121,7 +194,7 @@ if __name__ == "__main__": c1, c2 = st.columns([1, 3]) with c1: - button = st.button("do something", on_click=something) + button = st.button("do something", on_click=demo_log_callback) with c2: st.info(f"Length of records: {len(records)}") #tab = st.table(records) diff --git a/call_models/whale_gallery.py b/src/whale_gallery.py similarity index 83% rename from call_models/whale_gallery.py rename to src/whale_gallery.py index ef0ec8a80e4ab7d5e2d99522765227fed7faff9f..49e93f84abc6cec74bb10d5c50a91b49654dc4c7 100644 --- a/call_models/whale_gallery.py +++ b/src/whale_gallery.py @@ -3,11 +3,22 @@ import streamlit as st import whale_viewer as sw_wv -def render_whale_gallery(n_cols:int = 4): + +def render_whale_gallery(n_cols:int = 4) -> None: """ - A function to display a gallery of whale images in a grid + Renders a gallery of whale images + urls in a grid format using Streamlit. + + The function formats whale names, creates a grid layout for images, and + applies custom CSS styles Each image is displayed with a caption and a link + to a reference URL. + + + Parameters: + n_cols (int): Number of columns in the grid. Default is 4. + """ - def format_whale_name(name): + def _format_whale_name(name:str) -> str: + '''clean up the whale name for display''' return name.replace("_", " ").capitalize() # make a grid of images, use some css to get more uniform @@ -52,7 +63,7 @@ def render_whale_gallery(n_cols:int = 4): cols = cycle(st.columns(n_cols)) for ix in range(len(sw_wv.df_whale_img_ref)): img_name = sw_wv.df_whale_img_ref.iloc[ix].loc["WHALE_IMAGES"] - whale_name = format_whale_name(str(sw_wv.df_whale_img_ref.iloc[ix].name)) + whale_name = _format_whale_name(str(sw_wv.df_whale_img_ref.iloc[ix].name)) url = sw_wv.df_whale_img_ref.iloc[ix].loc['WHALE_REFERENCES'] image_path = f"images/references/{img_name}" #next(cols).image(image_path, width=150, caption=f"{whale_name}") @@ -85,5 +96,4 @@ if __name__ == "__main__": tg_cont = st.container(key="swgallery") with tg_cont: render_whale_gallery(n_cols=4) - - pass \ No newline at end of file + \ No newline at end of file diff --git a/call_models/whale_viewer.py b/src/whale_viewer.py similarity index 93% rename from call_models/whale_viewer.py rename to src/whale_viewer.py index faf70ab8a83459af876b939033a8688e35be85eb..d5b80f02a8fa801b5e30284f87541e02cd0f87ff 100644 --- a/call_models/whale_viewer.py +++ b/src/whale_viewer.py @@ -22,7 +22,7 @@ WHALE_CLASSES = [ "melon_headed_whale", "minke_whale", "pantropic_spotted_dolphin", - "pygmy_killer_whale", + "pygmy_killer_whale", "rough_toothed_dolphin", "sei_whale", "short_finned_pilot_whale", @@ -102,7 +102,16 @@ df_whale_img_ref = pd.DataFrame( } ).set_index("WHALE_CLASSES") -def format_whale_name(whale_class:str): +def format_whale_name(whale_class:str) -> str: + """ + Formats a whale class name for display + + Args: + whale_class (str): The class name of the whale, with words separated by underscores. + + Returns: + str: The formatted whale name with spaces instead of underscores and each word capitalized. + """ whale_name = whale_class.replace("_", " ").title() return whale_name @@ -116,7 +125,8 @@ def display_whale(whale_classes:List[str], i:int, viewcontainer=None): i (int): The index of the whale class to display. viewcontainer: The container to display the whale information. If not provided, use the current streamlit context (works via - 'with ' syntax) + 'with `container`' syntax) + Returns: None