diff --git a/.github/workflows/push_to_hf.yml b/.github/workflows/push_to_hf.yml new file mode 100644 index 0000000000000000000000000000000000000000..f9faa910a00f4f5475109d958c4298a1886120a7 --- /dev/null +++ b/.github/workflows/push_to_hf.yml @@ -0,0 +1,20 @@ +name: Sync to Hugging Face hub +on: + push: + branches: [main] + # to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + sync-to-hub: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + lfs: true + - name: Push to hub + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + HF_USERNAME: ${{secrets.HF_USERNAME}} + run: git push --force https://$HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/Saving-Willy/saving-willy-space main \ No newline at end of file diff --git a/.gitignore b/.gitignore index 82f927558a3dff0ea8c20858856e70779fd02c93..517e80568237fe43e1c0f8495baf3a641402124f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# OS Related +.DS_Store + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/README.md b/README.md index 2bb1d86cb47126ea6262b8cf25f70d011529c3cf..82ae003d62bcbbe146a3bec794ddc31f4d31dc4f 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,39 @@ -# saving-willy -Research Data Infrastructure for cetacean identification +--- +title: Saving Willy +emoji: 👀 +colorFrom: yellow +colorTo: red +sdk: streamlit +sdk_version: 1.39.0 +app_file: call_models/entry_and_hotdog.py +pinned: false +license: apache-2.0 +short_description: 'SDSC Hackathon - Project 10. ' +--- + +Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference + +app.py is the one and only app + + +## Install + +``` +git clone git@hf.co:spaces/Saving-Willy/saving-willy-space + +pip install -r requirements.txt +``` + +``` +streamlit run app.py +``` + + +## Test data + +https://www.kaggle.com/competitions/happy-whale-and-dolphin/data + + + + +Have a lot of fun! diff --git a/app.py b/app.py new file mode 120000 index 0000000000000000000000000000000000000000..da3a856d4d62e4ab8a59611895876522d77a2431 --- /dev/null +++ b/app.py @@ -0,0 +1 @@ +call_models/entry_and_hotdog.py \ No newline at end of file diff --git a/basic_map/app.py b/basic_map/app.py new file mode 100644 index 0000000000000000000000000000000000000000..ed5695d488a27487f3a02fbd8c025cd4f75a9493 --- /dev/null +++ b/basic_map/app.py @@ -0,0 +1,21 @@ +import pandas as pd +import streamlit as st +import folium + +from streamlit_folium import st_folium +from streamlit_folium import folium_static + + +visp_loc = 46.295833, 7.883333 +#m = folium.Map(location=visp_loc, zoom_start=9) + + +st.markdown("# :whale: :whale: Cetaceans :red[& friends] :balloon:") + +m = folium.Map(location=visp_loc, zoom_start=9, + tiles='https://tile.opentopomap.org/{z}/{x}/{y}.png', + attr='Open Topo Map') + +folium_static(m) + + diff --git a/basic_map/app1.py b/basic_map/app1.py new file mode 100644 index 0000000000000000000000000000000000000000..3339a3fa4b658cd18cb60d3bfbfdfce450de2c13 --- /dev/null +++ b/basic_map/app1.py @@ -0,0 +1,42 @@ +# lets try using map stuff without folium, maybe stlite doesnt support that. + +import streamlit as st +import pandas as pd + +# Load data +f = 'mountains_clr.csv' +df = pd.read_csv(f).dropna() + +print(df) + +st.markdown("# :whale: :whale: Cetaceans :red[& friends] :balloon:") + +st.markdown("## :mountain: Mountains") +st.markdown(f"library version: **{st.__version__}**") +# not sure where my versions are getting pegged from, but we have a 1y spread :( +# https://github.com/streamlit/streamlit/blob/1.24.1/lib/streamlit/elements/map.py +# rather hard to find the docs for old versions, no selector unlike many libraries. + +visp_loc = 46.295833, 7.883333 +tile_xyz = 'https://tile.opentopomap.org/{z}/{x}/{y}.png' +tile_attr = 'Open Topo Map' +st.map(df, latitude='lat', longitude='lon', color='color', size='size', zoom=7) +#, tiles=tile_xyz, attr=tile_attr) + +#st.map(df) + +#st.map(df, latitude="col1", longitude="col2", size="col3", color="col4") + +import numpy as np + +df2 = pd.DataFrame( + { + "col1": np.random.randn(1000) / 50 + 37.76, + "col2": np.random.randn(1000) / 50 + -122.4, + "col3": np.random.randn(1000) * 100, + "col4": np.random.rand(1000, 4).tolist(), + } +) +#st.map(df, latitude="col1", longitude="col2", size="col3", color="col4") + + diff --git a/basic_map/requirements.txt b/basic_map/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..87065fad5929731e0bc25d641daf9e6424578918 --- /dev/null +++ b/basic_map/requirements.txt @@ -0,0 +1,4 @@ +streamlit +folium +streamlit-folium + diff --git a/call_models/alps_map.py b/call_models/alps_map.py new file mode 100644 index 0000000000000000000000000000000000000000..64a5c73ed6193f0333e8a214c248e622e08e0411 --- /dev/null +++ b/call_models/alps_map.py @@ -0,0 +1,171 @@ +import pandas as pd +import streamlit as st +import folium +from streamlit_folium import st_folium + +_map_data = { + 'name': { + 0: 'matterhorn', + 1: 'zinalrothorn', + 2: 'alphubel', + 3: 'allalinhorn', + 4: 'weissmies', + 5: 'lagginhorn', + 6: 'lenzspitze', + 10: 'strahlhorn', + 11: 'parrotspitze'}, + 'lat': { + 0: 45.9764263, + 1: 46.0648271, + 2: 46.0628767, + 3: 46.0460858, + 4: 46.127633, + 5: 46.1570635, + 6: 46.1045505, + 10: 46.0131498, + 11: 45.9197881}, + 'lon': { + 0: 7.6586024, + 1: 7.6901238, + 2: 7.8638549, + 3: 7.8945842, + 4: 8.0120569, + 5: 8.0031044, + 6: 7.8686568, + 10: 7.9021703, + 11: 7.8710552}, + 'height': { + 0: 4181.0, + 1: 3944.0, + 2: 4174.0, + 3: 3940.0, + 4: 3983.0, + 5: 3916.0, + 6: 4255.0, + 10: 4072.0, + 11: 4419.0}, + 'color': { + 0: '#aa0000', + 1: '#aa0000', + 2: '#aa0000', + 3: '#aa0000', + 4: '#aa0000', + 5: '#aa0000', + 6: '#aa0000', + 10: '#00aa00', + 11: '#aa0000'}, + 'size': {0: 30, 1: 30, 2: 30, 3: 30, 4: 30, 5: 30, 6: 30, 10: 500, 11: 30} +} + +tile_sets = [ + 'Open Street Map', + #'Stamen Terrain', + #'Stamen Toner', + 'Esri Ocean', + 'Esri Images', + 'Stamen Watercolor', + 'CartoDB Positron', + #'CartoDB Dark_Matter' +] + +def create_map(tile_name, location, zoom_start: int = 7): + # https://xyzservices.readthedocs.io/en/stable/gallery.html + # get teh attribtuions from here once we pick the 2-3-4 options + # make esri ocean the default + m = folium.Map(location=location, zoom_start=zoom_start, + tiles='Esri.OceanBasemap', attr="Esri") + #m = folium.Map(location=location, zoom_start=zoom_start) + + attr = "" + if tile_name == 'Open Street Map': + folium.TileLayer('openstreetmap').add_to(m) + pass + + #Esri.OceanBasemap + elif tile_name == 'Esri Ocean': + pass # made this one default () + #attr = "Esri" + #folium.TileLayer('Esri.OceanBasemap', attr=attr).add_to(m) + + elif tile_name == 'Esri Images': + attr = "Esri — Source: Esri, i-cubed, USDA" + #folium.TileLayer('stamenterrain', attr=attr).add_to(m) + folium.TileLayer('Esri.WorldImagery', attr=attr).add_to(m) + elif tile_name == 'Stamen Toner': + attr = "Stamen" + folium.TileLayer('stamentoner', attr=attr).add_to(m) + elif tile_name == 'Stamen Watercolor': + attr = "Stamen" + folium.TileLayer('Stadia.StamenWatercolor', attr=attr).add_to(m) + elif tile_name == 'CartoDB Positron': + folium.TileLayer('cartodb positron').add_to(m) + elif tile_name == 'CartoDB Dark_Matter': + folium.TileLayer('cartodb dark_matter').add_to(m) + + #folium.LayerControl().add_to(m) + return m + + +def present_alps_map(): + '''show a map of the alps with peaks (from the event's teamnames) marked + + there are two rendering modes: + a) basic - this uses a streamlit map, which doesn't offer much flexibility on + the tiles, but if you supply a dataframe then you just tell it the columns to + use for lat, lon, color, size of points + + b) advanced - this uses folium, which allows for more control over the tiles, + but sadly it seems much less flexible for the point markers. + + ''' + + st.markdown("# :whale: :whale: Cetaceans :red[& friends] :balloon:") + show_points = st.toggle("Show Points", False) + basic_map = st.toggle("Use Basic Map", False) + + visp_loc = 46.295833, 7.883333 # position of town nearby to the peaks + # (maybe zermatt or Taesch better? all the mountains seem on valais gauche) + _df = pd.DataFrame(_map_data) + if basic_map: + # render using streamlit map element + st.map(_df, latitude='lat', longitude='lon', color='color', size='size', zoom=7) + else: + # setup a dropdown to pick tiles, and render with folium + selected_tile = st.selectbox("Choose a tile set", tile_sets) + #st.info(f"Selected tile: {selected_tile}") + # don't get why the default selection doesn't get renderd. + # generate a layer + map_ = create_map(selected_tile, visp_loc) + # and render it + #tile_xyz = 'https://tile.opentopomap.org/{z}/{x}/{y}.png' + #tile_attr = 'Open Topo Map' + + if show_points: + folium.Marker( + location=visp_loc, + popup="Visp", + tooltip="Visp", + icon=folium.Icon(color='blue', icon='info-sign') + ).add_to(map_) + + for i, row in _df.iterrows(): + c = 'red' + if row['name'] == 'strahlhorn': + c = 'green' + kw = {"prefix": "fa", "color": c, "icon": "mountain-sun"} + folium.Marker( + location=[row['lat'], row['lon']], + popup=f"{row['name']} ({row['height']} m)", + tooltip=row['name'], + icon=folium.Icon(**kw) + ).add_to(map_) + #st.info(f"Added marker for {row['name']} {row['lat']} {row['lon']}") + + + #folium_static(map_) + st_data = st_folium(map_, width=725) + + # maybe solution for click => new marker + # https://discuss.streamlit.io/t/add-marker-after-clicking-on-map/69472 + return st_data + diff --git a/call_models/click_map.py b/call_models/click_map.py new file mode 100644 index 0000000000000000000000000000000000000000..5386292679dfc305f29027751fca0b2e836d5858 --- /dev/null +++ b/call_models/click_map.py @@ -0,0 +1,18 @@ +import folium +import streamlit as st + +from streamlit_folium import st_folium + +# center on Liberty Bell, add marker +m = folium.Map(location=[39.949610, -75.150282], zoom_start=16) +folium.Marker( + [39.949610, -75.150282], popup="Liberty Bell", tooltip="Liberty Bell" +).add_to(m) + +# call to render Folium map in Streamlit +st_data = st_folium(m, width=725) + +if st_data['last_clicked'] is not None: + print(st_data) + st.info(st_data['last_clicked']) + \ No newline at end of file diff --git a/call_models/d_entry.py b/call_models/d_entry.py new file mode 100644 index 0000000000000000000000000000000000000000..183bcaebd07f7604ecffc054c81409113ab45ae8 --- /dev/null +++ b/call_models/d_entry.py @@ -0,0 +1,108 @@ +import streamlit as st +from PIL import Image +import datetime +import re +#import os +import json + +import hashlib + + +allowed_image_types = ['webp'] +#allowed_image_types = ['jpg', 'jpeg', 'png', 'webp'] + + +# Function to validate email address +def is_valid_email(email): + pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$' + return re.match(pattern, email) is not None + +# Function to extract date and time from image metadata +def get_image_datetime(image_file): + try: + from PIL import ExifTags + image = Image.open(image_file) + exif_data = image._getexif() + if exif_data is not None: + for tag, value in exif_data.items(): + if ExifTags.TAGS.get(tag) == 'DateTimeOriginal': + return value + except Exception as e: + st.warning("Could not extract date from image metadata.") + return None + +# Streamlit app +st.sidebar.title("Input Form") + +# 1. Image Selector +uploaded_filename = st.sidebar.file_uploader("Upload an image", type=allowed_image_types) +image_datetime = None # For storing date-time from image + +if uploaded_filename is not None: + # Display the uploaded image + image = Image.open(uploaded_filename) + st.sidebar.image(image, caption='Uploaded Image.', use_column_width=True) + + # Extract and display image date-time + image_datetime = get_image_datetime(uploaded_filename) + print(f"[D] image date extracted as {image_datetime}") + +metadata = { + "latitude": 23.5, + "longitude": 44, + "author_email": "super@whale.org", + "date": None, + "time": None, +} + +# 2. Latitude Entry Box +latitude = st.sidebar.text_input("Latitude", metadata.get('latitude', "")) +# 3. Longitude Entry Box +longitude = st.sidebar.text_input("Longitude", metadata.get('longitude', "")) +# 4. Author Box with Email Address Validator +author_email = st.sidebar.text_input("Author Email", metadata.get('author_email', "")) + +if author_email and not is_valid_email(author_email): + st.sidebar.error("Please enter a valid email address.") + + + + +# 5. date/time +## first from image metadata +if image_datetime is not None: + time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time() + date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date() +else: + time_value = datetime.datetime.now().time() # Default to current time + date_value = datetime.datetime.now().date() + +## if not, give user the option to enter manually +date_option = st.sidebar.date_input("Date", value=date_value) +time_option = st.sidebar.time_input("Time", time_value) + + + +# Display submitted data +if st.sidebar.button("Upload"): + # create a dictionary with the submitted data + submitted_data = { + "latitude": latitude, + "longitude": longitude, + "author_email": author_email, + "date": str(date_option), + "time": str(time_option), + "predicted_class": None, + "image_filename": uploaded_filename.name if uploaded_filename else None, + "image_md5": hashlib.md5(uploaded_filename.read()).hexdigest() if uploaded_filename else None, + + } + + st.write("Submitted Data:") + st.write(f"Latitude: {submitted_data['latitude']}") + st.write(f"Longitude: {submitted_data['longitude']}") + st.write(f"Author Email: {submitted_data['author_email']}") + st.write(f"Date: {submitted_data['date']}") + st.write(f"Time: {submitted_data['time']}") + + st.write(f"full dict of data: {json.dumps(submitted_data)}") \ No newline at end of file diff --git a/call_models/entry_and_hotdog.py b/call_models/entry_and_hotdog.py new file mode 100644 index 0000000000000000000000000000000000000000..63475ddab9038d527ad36e1580e5a32501142e19 --- /dev/null +++ b/call_models/entry_and_hotdog.py @@ -0,0 +1,304 @@ +import datetime +import os +import json +import logging +import tempfile +import pandas as pd +import streamlit as st +import folium +from streamlit_folium import st_folium +from huggingface_hub import HfApi +#from datasets import load_dataset +#from fix_tabrender import js_show_zeroheight_iframe + +import whale_viewer as sw_wv +import input_handling as sw_inp +import alps_map as sw_am +import whale_gallery as sw_wg +import obs_map as sw_map +import st_logs as sw_logs + + + +from transformers import pipeline +from transformers import AutoModelForImageClassification + +# setup for the ML model on huggingface (our wrapper) +os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" +# and the dataset of observations (hf dataset in our space) +dataset_id = "Saving-Willy/Happywhale-kaggle" +data_files = "data/train-00000-of-00001.parquet" + +USE_BASIC_MAP = False +DEV_SIDEBAR_LIB = True + +# get a global var for logger accessor in this module +LOG_LEVEL = logging.DEBUG +g_logger = logging.getLogger(__name__) +g_logger.setLevel(LOG_LEVEL) + +st.set_page_config(layout="wide") +#sw_logs.setup_logging(level=LOG_LEVEL, buffer_len=40) + + + +# initialise various session state variables +if "handler" not in st.session_state: + st.session_state['handler'] = sw_logs.setup_logging() + +if "full_data" not in st.session_state: + st.session_state.full_data = {} + +if "classify_whale_done" not in st.session_state: + st.session_state.classify_whale_done = False + +if "whale_prediction1" not in st.session_state: + st.session_state.whale_prediction1 = None + +if "image" not in st.session_state: + st.session_state.image = None + +if "tab_log" not in st.session_state: + st.session_state.tab_log = None + + +def metadata2md(): + markdown_str = "\n" + for key, value in st.session_state.full_data.items(): + markdown_str += f"- **{key}**: {value}\n" + return markdown_str + + +def push_observation(tab_log=None): + # we get the data from session state: 1 is the dict 2 is the image. + # first, lets do an info display (popup) + metadata_str = json.dumps(st.session_state.full_data) + + st.toast(f"Uploading observation: {metadata_str}", icon="🦭") + tab_log = st.session_state.tab_log + if tab_log is not None: + tab_log.info(f"Uploading observation: {metadata_str}") + + # get huggingface api + api = HfApi() + + f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) + f.write(metadata_str) + f.close() + st.info(f"temp file: {f.name} with metadata written...") + + path_in_repo= f"metadata/{st.session_state.full_data['author_email']}/{st.session_state.full_data['image_md5']}.json" + msg = f"fname: {f.name} | path: {path_in_repo}" + print(msg) + st.warning(msg) + rv = api.upload_file( + path_or_fileobj=f.name, + path_in_repo=path_in_repo, + repo_id="Saving-Willy/Happywhale-kaggle", + repo_type="dataset", + ) + print(rv) + msg = f"data attempted tx to repo happy walrus: {rv}" + g_logger.info(msg) + st.info(msg) + + +if __name__ == "__main__": + + g_logger.info("App started.") + + #g_logger.debug("debug message") + #g_logger.info("info message") + #g_logger.warning("warning message") + + # Streamlit app + #tab_gallery, tab_inference, tab_hotdogs, tab_map, tab_data, tab_log = st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "Data", "Log", "Beautiful cetaceans"]) + tab_inference, tab_hotdogs, tab_map, tab_data, tab_log, tab_gallery = st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "Data", "Log", "Beautiful cetaceans"]) + st.session_state.tab_log = tab_log + + + # create a sidebar, and parse all the input (returned as `observation` object) + observation = sw_inp.setup_input(viewcontainer=st.sidebar) + + + if 0:## WIP + # goal of this code is to allow the user to override the ML prediction, before transmitting an observation + predicted_class = st.sidebar.selectbox("Predicted Class", sw_wv.WHALE_CLASSES) + override_prediction = st.sidebar.checkbox("Override Prediction") + + if override_prediction: + overridden_class = st.sidebar.selectbox("Override Class", sw_wv.WHALE_CLASSES) + st.session_state.full_data['class_overriden'] = overridden_class + else: + st.session_state.full_data['class_overriden'] = None + + + with tab_map: + # visual structure: a couple of toggles at the top, then the map inlcuding a + # dropdown for tileset selection. + tab_map_ui_cols = st.columns(2) + with tab_map_ui_cols[0]: + show_db_points = st.toggle("Show Points from DB", True) + with tab_map_ui_cols[1]: + dbg_show_extra = st.toggle("Show Extra points (test)", False) + + if show_db_points: + # show a nicer map, observations marked, tileset selectable. + st_data = sw_map.present_obs_map( + dataset_id=dataset_id, data_files=data_files, + dbg_show_extra=dbg_show_extra) + + else: + # development map. + st_data = sw_am.present_alps_map() + + + with tab_log: + handler = st.session_state['handler'] + if handler is not None: + records = sw_logs.parse_log_buffer(handler.buffer) + st.dataframe(records[::-1], use_container_width=True,) + st.info(f"Length of records: {len(records)}") + else: + st.error("⚠️ No log handler found!") + + + + with tab_data: + # the goal of this tab is to allow selection of the new obsvation's location by map click/adjust. + st.markdown("Coming later hope! :construction:") + + st.write("Click on the map to capture a location.") + #m = folium.Map(location=visp_loc, zoom_start=7) + mm = folium.Map(location=[39.949610, -75.150282], zoom_start=16) + folium.Marker( [39.949610, -75.150282], popup="Liberty Bell", tooltip="Liberty Bell" + ).add_to(mm) + + st_data2 = st_folium(mm, width=725) + st.write("below the map...") + if st_data2['last_clicked'] is not None: + print(st_data2) + st.info(st_data2['last_clicked']) + + + with tab_gallery: + # here we make a container to allow filtering css properties + # specific to the gallery (otherwise we get side effects) + tg_cont = st.container(key="swgallery") + with tg_cont: + sw_wg.render_whale_gallery(n_cols=4) + + + # Display submitted data + if st.sidebar.button("Validate"): + # create a dictionary with the submitted data + submitted_data = observation.to_dict() + #print(submitted_data) + + #full_data.update(**submitted_data) + for k, v in submitted_data.items(): + st.session_state.full_data[k] = v + + #st.write(f"full dict of data: {json.dumps(submitted_data)}") + #tab_inference.info(f"{st.session_state.full_data}") + tab_log.info(f"{st.session_state.full_data}") + + df = pd.DataFrame(submitted_data, index=[0]) + with tab_data: + st.table(df) + + + + + # inside the inference tab, on button press we call the model (on huggingface hub) + # which will be run locally. + # - the model predicts the top 3 most likely species from the input image + # - these species are shown + # - the user can override the species prediction using the dropdown + # - an observation is uploaded if the user chooses. + + if tab_inference.button("Identify with cetacean classifier"): + #pipe = pipeline("image-classification", model="Saving-Willy/cetacean-classifier", trust_remote_code=True) + cetacean_classifier = AutoModelForImageClassification.from_pretrained("Saving-Willy/cetacean-classifier", + revision='0f9c15e2db4d64e7f622ade518854b488d8d35e6', trust_remote_code=True) + + if st.session_state.image is None: + # TODO: cleaner design to disable the button until data input done? + st.info("Please upload an image first.") + else: + # run classifier model on `image`, and persistently store the output + out = cetacean_classifier(st.session_state.image) # get top 3 matches + st.session_state.whale_prediction1 = out['predictions'][0] + st.session_state.classify_whale_done = True + msg = f"[D]2 classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}" + st.info(msg) + g_logger.info(msg) + + # dropdown for selecting/overriding the species prediction + #st.info(f"[D] classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}") + if not st.session_state.classify_whale_done: + selected_class = tab_inference.sidebar.selectbox("Species", sw_wv.WHALE_CLASSES, index=None, placeholder="Species not yet identified...", disabled=True) + else: + pred1 = st.session_state.whale_prediction1 + # get index of pred1 from WHALE_CLASSES, none if not present + print(f"[D] pred1: {pred1}") + ix = sw_wv.WHALE_CLASSES.index(pred1) if pred1 in sw_wv.WHALE_CLASSES else None + selected_class = tab_inference.selectbox("Species", sw_wv.WHALE_CLASSES, index=ix) + + st.session_state.full_data['predicted_class'] = selected_class + if selected_class != st.session_state.whale_prediction1: + st.session_state.full_data['class_overriden'] = selected_class + + btn = st.button("Upload observation to THE INTERNET!", on_click=push_observation) + # TODO: the metadata only fills properly if `validate` was clicked. + tab_inference.markdown(metadata2md()) + + msg = f"[D] full data after inference: {st.session_state.full_data}" + g_logger.debug(msg) + print(msg) + # TODO: add a link to more info on the model, next to the button. + + whale_classes = out['predictions'][:] + # render images for the top 3 (that is what the model api returns) + with tab_inference: + st.markdown("## Species detected") + for i in range(len(whale_classes)): + sw_wv.display_whale(whale_classes, i) + + + + + # inside the hotdog tab, on button press we call a 2nd model (totally unrelated at present, just for demo + # purposes, an hotdog image classifier) which will be run locally. + # - this model predicts if the image is a hotdog or not, and returns probabilities + # - the input image is the same as for the ceteacean classifier - defined in the sidebar + + if tab_hotdogs.button("Get Hotdog Prediction"): + + pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog") + tab_hotdogs.title("Hot Dog? Or Not?") + + if st.session_state.image is None: + st.info("Please upload an image first.") + st.info(str(observation.to_dict())) + + else: + col1, col2 = tab_hotdogs.columns(2) + + # display the image (use cached version, no need to reread) + col1.image(st.session_state.image, use_column_width=True) + # and then run inference on the image + predictions = pipeline(st.session_state.image) + + col2.header("Probabilities") + first = True + for p in predictions: + col2.subheader(f"{ p['label'] }: { round(p['score'] * 100, 1)}%") + if first: + st.session_state.full_data['predicted_class'] = p['label'] + st.session_state.full_data['predicted_score'] = round(p['score'] * 100, 1) + first = False + + tab_hotdogs.write(f"Session Data: {json.dumps(st.session_state.full_data)}") + + diff --git a/call_models/fix_tabrender.py b/call_models/fix_tabrender.py new file mode 100644 index 0000000000000000000000000000000000000000..b865231672d5696c580505b2b64c942ae44d5d06 --- /dev/null +++ b/call_models/fix_tabrender.py @@ -0,0 +1,69 @@ +import streamlit as st + +# code for fixing the issue with streamlit tabs rendering height 0 when not active +# https://github.com/streamlit/streamlit/issues/7376 +# +# see also https://github.com/randyzwitch/streamlit-folium/issues/128, got +# closed becasue it is apparently a st.tabs problem + + +import uuid, html +# workaround for streamlit making tabs height 0 when not active, breaks map +def inject_iframe_js_code(source: str) -> None: + div_id = uuid.uuid4() + + st.markdown( + f""" +
+ """, + unsafe_allow_html=True, + ) + +def js_show_zeroheight_iframe(component_iframe_title: str, height: str = "auto"): + source = f""" + (function() {{ + var attempts = 0; + const maxAttempts = 20; // Max attempts to find the iframe + const intervalMs = 250; // Interval between attempts in milliseconds + + function setIframeHeight() {{ + const intervalId = setInterval(function() {{ + var iframes = document.querySelectorAll('iframe[title="{component_iframe_title}"]'); + if (iframes.length > 0 || attempts > maxAttempts) {{ + if (iframes.length > 0) {{ + iframes.forEach(iframe => {{ + if (iframe || iframe.height === "0" || iframe.style.height === "0px") {{ + iframe.style.height = "{height}"; + iframe.setAttribute("height", "{height}"); + console.log('Height of iframe with title "{component_iframe_title}" set to {height}.'); + }} + }}); + }} else {{ + console.log('Iframes with title "{component_iframe_title}" not found after ' + maxAttempts + ' attempts.'); + }} + clearInterval(intervalId); // Stop checking + }} + attempts++; + }}, intervalMs); + }} + + + function trackInteraction(event) {{ + console.log('User interaction detected:', event.type); + setIframeHeight(); + }} + + setIframeHeight(); + document.addEventListener('click', trackInteraction); +}})(); + """ + inject_iframe_js_code(source) diff --git a/call_models/hotdogs.py b/call_models/hotdogs.py new file mode 100644 index 0000000000000000000000000000000000000000..108d788b7fc0e88ab90479fbadbd14bb44a00b03 --- /dev/null +++ b/call_models/hotdogs.py @@ -0,0 +1,24 @@ +import streamlit as st +from transformers import pipeline +from PIL import Image +import time + + +pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog") + + + +st.title("Hot Dog? Or Not?") + +file_name = st.file_uploader("Upload a hot dog candidate image") + +if file_name is not None: + col1, col2 = st.columns(2) + + image = Image.open(file_name) + col1.image(image, use_column_width=True) + predictions = pipeline(image) + + col2.header("Probabilities") + for p in predictions: + col2.subheader(f"{ p['label'] }: { round(p['score'] * 100, 1)}%") \ No newline at end of file diff --git a/call_models/images/references/640x427-atlantic-white-sided-dolphin.jpg b/call_models/images/references/640x427-atlantic-white-sided-dolphin.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ad5274992e534c1f717686fa3e1b65745f8be268 Binary files /dev/null and b/call_models/images/references/640x427-atlantic-white-sided-dolphin.jpg differ diff --git a/call_models/images/references/640x427-long-finned-pilot-whale.webp b/call_models/images/references/640x427-long-finned-pilot-whale.webp new file mode 100644 index 0000000000000000000000000000000000000000..49a5beef02fa80c88dfd1818573d4aebb1d11e81 Binary files /dev/null and b/call_models/images/references/640x427-long-finned-pilot-whale.webp differ diff --git a/call_models/images/references/640x427-southern-right-whale.jpg b/call_models/images/references/640x427-southern-right-whale.jpg new file mode 100644 index 0000000000000000000000000000000000000000..06f5a1654bed7b5d22bd225f99e1d7041f6e4bc2 Binary files /dev/null and b/call_models/images/references/640x427-southern-right-whale.jpg differ diff --git a/call_models/images/references/Humpback.webp b/call_models/images/references/Humpback.webp new file mode 100644 index 0000000000000000000000000000000000000000..084a129314f51750ccaf46c4f9a6c31e7c69ec11 Binary files /dev/null and b/call_models/images/references/Humpback.webp differ diff --git a/call_models/images/references/Whale_Short-Finned_Pilot-markedDW.png b/call_models/images/references/Whale_Short-Finned_Pilot-markedDW.png new file mode 100644 index 0000000000000000000000000000000000000000..595619303a14b5b98c5cbfd16d50d6a408081f62 Binary files /dev/null and b/call_models/images/references/Whale_Short-Finned_Pilot-markedDW.png differ diff --git a/call_models/images/references/beluga.webp b/call_models/images/references/beluga.webp new file mode 100644 index 0000000000000000000000000000000000000000..599acbbe6b0072ea5610a0707546f4d291955ea3 Binary files /dev/null and b/call_models/images/references/beluga.webp differ diff --git a/call_models/images/references/blue-whale.webp b/call_models/images/references/blue-whale.webp new file mode 100644 index 0000000000000000000000000000000000000000..acc4d5044f03464579cfe5afab52fa0d51d632db Binary files /dev/null and b/call_models/images/references/blue-whale.webp differ diff --git a/call_models/images/references/bottlenose_dolphin.webp b/call_models/images/references/bottlenose_dolphin.webp new file mode 100644 index 0000000000000000000000000000000000000000..3371d3959b7337d6c4af2099b44e899a4054cf37 Binary files /dev/null and b/call_models/images/references/bottlenose_dolphin.webp differ diff --git a/call_models/images/references/brydes.webp b/call_models/images/references/brydes.webp new file mode 100644 index 0000000000000000000000000000000000000000..94ebecfefc33caac7cee66ddd0fdbad5847d5992 Binary files /dev/null and b/call_models/images/references/brydes.webp differ diff --git a/call_models/images/references/common_dolphin.webp b/call_models/images/references/common_dolphin.webp new file mode 100644 index 0000000000000000000000000000000000000000..019f1c54f93acddc0fea65ddce99320815559f35 Binary files /dev/null and b/call_models/images/references/common_dolphin.webp differ diff --git a/call_models/images/references/cuviers_beaked_whale.webp b/call_models/images/references/cuviers_beaked_whale.webp new file mode 100644 index 0000000000000000000000000000000000000000..ee233ee986d478d42cfa3026d15502bd8e9b5970 Binary files /dev/null and b/call_models/images/references/cuviers_beaked_whale.webp differ diff --git a/call_models/images/references/false-killer-whale.webp b/call_models/images/references/false-killer-whale.webp new file mode 100644 index 0000000000000000000000000000000000000000..8c85cdab6d0d101341426bec1abc608b88791e0d Binary files /dev/null and b/call_models/images/references/false-killer-whale.webp differ diff --git a/call_models/images/references/fin-whale.webp b/call_models/images/references/fin-whale.webp new file mode 100644 index 0000000000000000000000000000000000000000..cf95af361659845f9de1cbb0f5aab29bf08340c5 Binary files /dev/null and b/call_models/images/references/fin-whale.webp differ diff --git a/call_models/images/references/gray-whale.webp b/call_models/images/references/gray-whale.webp new file mode 100644 index 0000000000000000000000000000000000000000..0177a6baca80797b833dab45e06a2aa3b24554a9 Binary files /dev/null and b/call_models/images/references/gray-whale.webp differ diff --git a/call_models/images/references/killer_whale.webp b/call_models/images/references/killer_whale.webp new file mode 100644 index 0000000000000000000000000000000000000000..cec8b567624f20354e5691696fd349a7282a3ca1 Binary files /dev/null and b/call_models/images/references/killer_whale.webp differ diff --git a/call_models/images/references/melon.webp b/call_models/images/references/melon.webp new file mode 100644 index 0000000000000000000000000000000000000000..d0397349e1de5ae424669a82dffbc69518b4f6fb Binary files /dev/null and b/call_models/images/references/melon.webp differ diff --git a/call_models/images/references/minke-whale.webp b/call_models/images/references/minke-whale.webp new file mode 100644 index 0000000000000000000000000000000000000000..7562b0d0de0db09db781047fb7610f9fc2ad49d6 Binary files /dev/null and b/call_models/images/references/minke-whale.webp differ diff --git a/call_models/images/references/pantropical-spotted-dolphin.webp b/call_models/images/references/pantropical-spotted-dolphin.webp new file mode 100644 index 0000000000000000000000000000000000000000..a13e4b3f6212f844d89ace5cb563c216ad6c1a6a Binary files /dev/null and b/call_models/images/references/pantropical-spotted-dolphin.webp differ diff --git a/call_models/images/references/pygmy-killer-whale.webp b/call_models/images/references/pygmy-killer-whale.webp new file mode 100644 index 0000000000000000000000000000000000000000..47b397fa2b8854951c59ed57dfe273d240231ed2 Binary files /dev/null and b/call_models/images/references/pygmy-killer-whale.webp differ diff --git a/call_models/images/references/rough-toothed-dolphin.webp b/call_models/images/references/rough-toothed-dolphin.webp new file mode 100644 index 0000000000000000000000000000000000000000..5380bdd8e72004c132609c81da6d7b4434863c24 Binary files /dev/null and b/call_models/images/references/rough-toothed-dolphin.webp differ diff --git a/call_models/images/references/sei.webp b/call_models/images/references/sei.webp new file mode 100644 index 0000000000000000000000000000000000000000..7e993a394190a14c3eb4426c1749fb307e9d974c Binary files /dev/null and b/call_models/images/references/sei.webp differ diff --git a/call_models/images/references/spinner.webp b/call_models/images/references/spinner.webp new file mode 100644 index 0000000000000000000000000000000000000000..2c79857d45c1b01d060f9de4de90f66324053589 Binary files /dev/null and b/call_models/images/references/spinner.webp differ diff --git a/call_models/imgs/cakes.jpg b/call_models/imgs/cakes.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ed5c25b7b450bd6c4b01b016c27e6f3dc9766a6b Binary files /dev/null and b/call_models/imgs/cakes.jpg differ diff --git a/call_models/input_handling.py b/call_models/input_handling.py new file mode 100644 index 0000000000000000000000000000000000000000..519440390e4efbc0118a538c1bfa9e8f17081a23 --- /dev/null +++ b/call_models/input_handling.py @@ -0,0 +1,184 @@ +from PIL import Image +from PIL import ExifTags +import re +import datetime +import hashlib +import logging + +import streamlit as st + +m_logger = logging.getLogger(__name__) +# we can set the log level locally for funcs in this module +#g_m_logger.setLevel(logging.DEBUG) +m_logger.setLevel(logging.INFO) + +''' +A module to setup the input handling for the whale observation guidance tool + +both the UI elements (setup_input_UI) and the validation functions. +''' +#allowed_image_types = ['webp'] +allowed_image_types = ['jpg', 'jpeg', 'png', 'webp'] + + +# autogenerated class to hold the input data +class InputObservation: + def __init__(self, image=None, latitude=None, longitude=None, author_email=None, date=None, time=None, date_option=None, time_option=None, uploaded_filename=None): + self.image = image + self.latitude = latitude + self.longitude = longitude + self.author_email = author_email + self.date = date + self.time = time + self.date_option = date_option + self.time_option = time_option + self.uploaded_filename = uploaded_filename + + def __str__(self): + return f"Observation: {self.image}, {self.latitude}, {self.longitude}, {self.author_email}, {self.date}, {self.time}, {self.date_option}, {self.time_option}, {self.uploaded_filename}" + + def __repr__(self): + return f"Observation: {self.image}, {self.latitude}, {self.longitude}, {self.author_email}, {self.date}, {self.time}, {self.date_option}, {self.time_option}, {self.uploaded_filename}" + + def __eq__(self, other): + return (self.image == other.image and self.latitude == other.latitude and self.longitude == other.longitude and + self.author_email == other.author_email and self.date == other.date and self.time == other.time and + self.date_option == other.date_option and self.time_option == other.time_option and self.uploaded_filename == other.uploaded_filename) + + def __ne__(self, other): + return not self.__eq__(other) + + def __hash__(self): + return hash((self.image, self.latitude, self.longitude, self.author_email, self.date, self.time, self.date_option, self.time_option, self.uploaded_filename)) + + def to_dict(self): + return { + #"image": self.image, + "image_filename": self.uploaded_filename.name if self.uploaded_filename else None, + "image_md5": hashlib.md5(self.uploaded_filename.read()).hexdigest() if self.uploaded_filename else None, + "latitude": self.latitude, + "longitude": self.longitude, + "author_email": self.author_email, + "date": self.date, + "time": self.time, + "date_option": self.date_option, + "time_option": self.time_option, + "uploaded_filename": self.uploaded_filename + } + + @classmethod + def from_dict(cls, data): + return cls(data["image"], data["latitude"], data["longitude"], data["author_email"], data["date"], data["time"], data["date_option"], data["time_option"], data["uploaded_filename"]) + + @classmethod + def from_input(cls, input): + return cls(input.image, input.latitude, input.longitude, input.author_email, input.date, input.time, input.date_option, input.time_option, input.uploaded_filename) + + @staticmethod + def from_input(input): + return InputObservation(input.image, input.latitude, input.longitude, input.author_email, input.date, input.time, input.date_option, input.time_option, input.uploaded_filename) + + @staticmethod + def from_dict(data): + return InputObservation(data["image"], data["latitude"], data["longitude"], data["author_email"], data["date"], data["time"], data["date_option"], data["time_option"], data["uploaded_filename"]) + +# define function to validate number, allowing signed float +def is_valid_number(number:str) -> bool: + pattern = r'^[-+]?[0-9]*\.?[0-9]+$' + return re.match(pattern, number) is not None + + +# Function to validate email address +def is_valid_email(email): + pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$' + return re.match(pattern, email) is not None + +# Function to extract date and time from image metadata +def get_image_datetime(image_file): + try: + image = Image.open(image_file) + exif_data = image._getexif() + if exif_data is not None: + for tag, value in exif_data.items(): + if ExifTags.TAGS.get(tag) == 'DateTimeOriginal': + return value + except Exception as e: + st.warning("Could not extract date from image metadata.") + return None + + +# an arbitrary set of defaults so testing is less painful... +# ideally we add in some randomization to the defaults +spoof_metadata = { + "latitude": 23.5, + "longitude": 44, + "author_email": "super@whale.org", + "date": None, + "time": None, +} + +#def display_whale(whale_classes:List[str], i:int, viewcontainer=None): +def setup_input(viewcontainer: st.delta_generator.DeltaGenerator=None, _allowed_image_types: list=None, ): + + if viewcontainer is None: + viewcontainer = st.sidebar + + if _allowed_image_types is None: + _allowed_image_types = allowed_image_types + + + viewcontainer.title("Input image and data") + + # 1. Image Selector + uploaded_filename = viewcontainer.file_uploader("Upload an image", type=allowed_image_types) + image_datetime = None # For storing date-time from image + + if uploaded_filename is not None: + # Display the uploaded image + image = Image.open(uploaded_filename) + viewcontainer.image(image, caption='Uploaded Image.', use_column_width=True) + # store the image in the session state + st.session_state.image = image + + + # Extract and display image date-time + image_datetime = get_image_datetime(uploaded_filename) + print(f"[D] image date extracted as {image_datetime}") + m_logger.debug(f"image date extracted as {image_datetime} (from {uploaded_filename})") + + + # 2. Latitude Entry Box + latitude = viewcontainer.text_input("Latitude", spoof_metadata.get('latitude', "")) + if latitude and not is_valid_number(latitude): + viewcontainer.error("Please enter a valid latitude (numerical only).") + m_logger.error(f"Invalid latitude entered: {latitude}.") + # 3. Longitude Entry Box + longitude = viewcontainer.text_input("Longitude", spoof_metadata.get('longitude', "")) + if longitude and not is_valid_number(longitude): + viewcontainer.error("Please enter a valid longitude (numerical only).") + m_logger.error(f"Invalid latitude entered: {latitude}.") + + # 4. Author Box with Email Address Validator + author_email = viewcontainer.text_input("Author Email", spoof_metadata.get('author_email', "")) + + if author_email and not is_valid_email(author_email): + viewcontainer.error("Please enter a valid email address.") + + # 5. date/time + ## first from image metadata + if image_datetime is not None: + time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time() + date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date() + else: + time_value = datetime.datetime.now().time() # Default to current time + date_value = datetime.datetime.now().date() + + ## if not, give user the option to enter manually + date_option = st.sidebar.date_input("Date", value=date_value) + time_option = st.sidebar.time_input("Time", time_value) + + observation = InputObservation(image=uploaded_filename, latitude=latitude, longitude=longitude, + author_email=author_email, date=image_datetime, time=None, + date_option=date_option, time_option=time_option) + return observation + diff --git a/call_models/obs_map.py b/call_models/obs_map.py new file mode 100644 index 0000000000000000000000000000000000000000..426ca68477f5987f6515fd2b25800922323c650f --- /dev/null +++ b/call_models/obs_map.py @@ -0,0 +1,163 @@ +from typing import Tuple +import logging + +import pandas as pd +from datasets import load_dataset +import streamlit as st +import folium +from streamlit_folium import st_folium + +import whale_viewer as sw_wv +from fix_tabrender import js_show_zeroheight_iframe + +m_logger = logging.getLogger(__name__) +# we can set the log level locally for funcs in this module +#g_m_logger.setLevel(logging.DEBUG) +m_logger.setLevel(logging.INFO) + +# TODO: refactor so we have richer data: a tuple or dict combining +# the dropdown label, the tileset name, the attribution - everything +# needed to make the map logic simplified +tile_sets = [ + 'Open Street Map', + #'Stamen Terrain', + #'Stamen Toner', + 'Esri Ocean', + 'Esri Images', + 'Stamen Watercolor', + 'CartoDB Positron', + #'CartoDB Dark_Matter' +] + +# a list of unique colours for each whale class (for the map) +_colors = [ + "#FFD700", # Gold + "#FF5733", # Red + "#33FF57", # Green + "#3357FF", # Blue + "#FFFF33", # Yellow + "#FF33FF", # Magenta + "#33FFFF", # Cyan + "#FF8C00", # Dark Orange + "#8A2BE2", # Blue Violet + "#DEB887", # Burlywood + "#5F9EA0", # Cadet Blue + "#D2691E", # Chocolate + "#FF4500", # Orange Red + "#2E8B57", # Sea Green + "#DA70D6", # Orchid + "#FF6347", # Tomato + "#7FFF00", # Chartreuse + "#DDA0DD", # Plum + "#A0522D", # Sienna + "#4682B4", # Steel Blue + "#7B68EE", # Medium Slate Blue + "#F0E68C", # Khaki + "#B22222", # Firebrick + "#FF1493", # Deep Pink + "#FFFACD", # Lemon Chiffon + "#20B2AA", # Light Sea Green + "#778899" # Light Slate Gray +] + +whale2color = {k: v for k, v in zip(sw_wv.WHALE_CLASSES, _colors)} + +def create_map(tile_name:str, location:Tuple, zoom_start: int = 7): + # https://xyzservices.readthedocs.io/en/stable/gallery.html + # get teh attribtuions from here once we pick the 2-3-4 options + # make esri ocean the default + m = folium.Map(location=location, zoom_start=zoom_start, + tiles='Esri.OceanBasemap', attr="Esri") + #m = folium.Map(location=location, zoom_start=zoom_start) + + attr = "" + if tile_name == 'Open Street Map': + folium.TileLayer('openstreetmap').add_to(m) + pass + + #Esri.OceanBasemap + elif tile_name == 'Esri Ocean': + pass # made this one default () + #attr = "Esri" + #folium.TileLayer('Esri.OceanBasemap', attr=attr).add_to(m) + + elif tile_name == 'Esri Images': + attr = "Esri — Source: Esri, i-cubed, USDA" + #folium.TileLayer('stamenterrain', attr=attr).add_to(m) + folium.TileLayer('Esri.WorldImagery', attr=attr).add_to(m) + elif tile_name == 'Stamen Toner': + attr = "Stamen" + folium.TileLayer('stamentoner', attr=attr).add_to(m) + elif tile_name == 'Stamen Watercolor': + attr = "Stamen" + folium.TileLayer('Stadia.StamenWatercolor', attr=attr).add_to(m) + elif tile_name == 'CartoDB Positron': + folium.TileLayer('cartodb positron').add_to(m) + elif tile_name == 'CartoDB Dark_Matter': + folium.TileLayer('cartodb dark_matter').add_to(m) + + #folium.LayerControl().add_to(m) + return m + + + +def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle", + data_files:str = "data/train-00000-of-00001.parquet", + dbg_show_extra:bool = False): + ''' + render a map, with a selectable tileset, and show markers for each of the whale + observations + + ''' + # load/download data from huggingface dataset + metadata = load_dataset(dataset_id, data_files=data_files) + + # make a pandas df that is compliant with folium/streamlit maps + _df = pd.DataFrame({ + 'lat': metadata["train"]["latitude"], + 'lon': metadata["train"]["longitude"], + 'species': metadata["train"]["predicted_class"],} + ) + if dbg_show_extra: + # add a few samples to visualise colours + _df.loc[len(_df)] = {'lat': 0, 'lon': 0, 'species': 'rough_toothed_dolphin'} + _df.loc[len(_df)] = {'lat': -3, 'lon': 0, 'species': 'pygmy_killer_whale'} + _df.loc[len(_df)] = {'lat': 45.7, 'lon': -2.6, 'species': 'humpback_whale'} + + ocean_loc = 0, 10 + selected_tile = st.selectbox("Choose a tile set", tile_sets, index=None, placeholder="Choose a tile set...", disabled=False) + map_ = create_map(selected_tile, ocean_loc, zoom_start=2) + + folium.Marker( + location=ocean_loc, + popup="Atlantis", + tooltip="Atlantis", + icon=folium.Icon(color='blue', icon='info-sign') + ).add_to(map_) + + for _, row in _df.iterrows(): + c = whale2color.get(row['species'], 'red') + msg = f"[D] color for {row['species']} is {c}" + m_logger.debug(msg) # depends on m_logger logging level (*not* the main st app's logger) + #m_logger.info(msg) + + kw = {"prefix": "fa", "color": 'gray', "icon_color": c, "icon": "binoculars" } + folium.Marker( + location=[row['lat'], row['lon']], + popup=f"{row['species']} ", + tooltip=row['species'], + icon=folium.Icon(**kw) + ).add_to(map_) + #st.info(f"Added marker for {row['name']} {row['lat']} {row['lon']}") + + st_data = st_folium(map_, width=725) + + # workaround for correctly showing js components in tabs + js_show_zeroheight_iframe( + component_iframe_title="streamlit_folium.st_folium", + height=800, + ) + # this is just debug info -- + #st.info("[D]" + str(metadata.column_names)) + + return st_data \ No newline at end of file diff --git a/call_models/requirements.txt b/call_models/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..08dd3e4eadfb91213d36e799c82fcdc102df7028 --- /dev/null +++ b/call_models/requirements.txt @@ -0,0 +1,17 @@ +transformers +streamlit +huggingface_hub +torch + +pandas +numpy + +datasets + +# for nice map tiles +folium +streamlit_folium + +# for ceatatean +pytorch_lightning +timm diff --git a/call_models/st_logs.py b/call_models/st_logs.py new file mode 100644 index 0000000000000000000000000000000000000000..ae4d9401133aed5c005475c37b9bff0fb409c995 --- /dev/null +++ b/call_models/st_logs.py @@ -0,0 +1,128 @@ +import logging +from datetime import datetime +import re +from collections import deque + +import streamlit as st + +# some discussions with code snippets from: +# https://discuss.streamlit.io/t/capture-and-display-logger-in-ui/69136 + +# configure log parsing (seems to need some tweaking) +_log_n_re = r'\[(\d+)\]' +_log_date_re = r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})' +_log_mod_re = r'(\w+(?:\.\w+)*|__\w+__|<\w+>)' +_log_func_re = r'(\w+|<\w+>)' +_log_level_re = r'(\w+)' +_log_msg_re = '(.*)' +_sep = r' - ' + +log_pattern = re.compile(_log_n_re + _log_date_re + _sep + _log_mod_re + _sep + + _log_func_re + _sep + _log_level_re + _sep + _log_msg_re) + + +class StreamlitLogHandler(logging.Handler): + # Initializes a custom log handler with a Streamlit container for displaying logs + def __init__(self, container, maxlen:int=15, debug:bool=False): + super().__init__() + # Store the Streamlit container for log output + self.container = container + self.debug = debug + self.ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') # Regex to remove ANSI codes + self.log_area = self.container.empty() # Prepare an empty conatiner for log output + + self.buffer = deque(maxlen=maxlen) + self._n = 0 + + def n_elems(self, verb:bool=False): + ''' return a string with num elements seen and num elements in buffer ''' + if verb: + return f"total: {self._n}|| in buffer:{len(self.buffer)}" + + return f"{self._n}||{len(self.buffer)}" + + def emit(self, record): + self._n += 1 + msg = f"[{self._n}]" + self.format(record) + self.buffer.append(msg) + clean_msg = self.ansi_escape.sub('', msg) # Strip ANSI codes + if self.debug: + self.log_area.markdown(clean_msg) + + def clear_logs(self): + self.log_area.empty() # Clear previous logs + self.buffer.clear() + +# Set up logging to capture all info level logs from the root logger +@st.cache_resource +def setup_logging(level: int=logging.INFO, buffer_len:int=15): + root_logger = logging.getLogger() # Get the root logger + log_container = st.container() # Create a container within which we display logs + handler = StreamlitLogHandler(log_container, maxlen=buffer_len) + handler.setLevel(level) + + formatter = logging.Formatter('%(asctime)s - %(name)s - %(funcName)s - %(levelname)s - %(message)s') + handler.setFormatter(formatter) + root_logger.addHandler(handler) + + #if 'handler' not in st.session_state: + # st.session_state['handler'] = handler + return handler + +def parse_log_buffer(log_contents: deque) -> list: + ''' convert log buffer to a list of dictionaries ''' + j = 0 + records = [] + for line in log_contents: + if line: # Skip empty lines + j+=1 + try: + # regex to parsse log lines, with an example line: + # '[1]2024-11-09 11:19:06,688 - task - run - INFO - 🏃 Running task ' + match = log_pattern.match(line) + if match: + n, timestamp_str, name, func_name, level, message = match.groups() + + # Convert timestamp string to datetime + timestamp = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S,%f') + + records.append({ + 'timestamp': timestamp, + 'n': n, + 'level': level, + 'module': name, + 'func': func_name, + 'message': message + }) + except Exception as e: + print(f"Failed to parse line: {line}") + print(f"Error: {e}") + continue + return records + +def something(): + '''function to demo adding log entries''' + logger = logging.getLogger(__name__) + logger.setLevel(logging.DEBUG) + logger.debug("debug message") + logger.info("info message") + logger.warning("warning message") + logger.error("error message") + logger.critical("critical message") + + +if __name__ == "__main__": + + # create a logging handler for streamlit + regular python logging module + handler = setup_logging() + + # get buffered log data and parse, ready for display as dataframe + records = parse_log_buffer(handler.buffer) + + c1, c2 = st.columns([1, 3]) + with c1: + button = st.button("do something", on_click=something) + with c2: + st.info(f"Length of records: {len(records)}") + #tab = st.table(records) + tab = st.dataframe(records[::-1], use_container_width=True) # scrollable, selectable. diff --git a/call_models/test_upload.py b/call_models/test_upload.py new file mode 100644 index 0000000000000000000000000000000000000000..543ea8f783511d990cee603c527add6502053fed --- /dev/null +++ b/call_models/test_upload.py @@ -0,0 +1,49 @@ +from huggingface_hub import HfApi +import json +import tempfile +import os + +#huggingface_hub + +submission = {'latitude': '3.5', 'longitude': '44', 'author_email': + 'super@whale.org', 'date': '2024-10-25', 'time': '12:07:04.487612', + 'predicted_class': 'bottlenose_dolphin', 'class_overriden': None, + 'image_filename': '000a8f2d5c316a.webp', 'image_md5': + 'd41d8cd98f00b204e9800998ecf8427e'} + +imgname = submission['image_filename'] + +api = HfApi() + + +# generate a tempdirectory to store the image +#tempdir = tempfile.TemporaryDirectory() +# write a tempfile + +# write submission to a tempfile in json format with the name of the image, giving the filename and path as a string + +f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) +jstr = json.dumps(submission) +f.write(jstr) +f.close() + + + +#with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: +# jstr = json.dumps(submission) +# f.write(jstr) +# #print(f.path) + +path_in_repo= f"metadata/{submission['author_email']}/{submission['image_md5']}.json" +print(f"fname: {f.name} | path: {path_in_repo}") +rv = api.upload_file( + path_or_fileobj=f.name, + path_in_repo=path_in_repo, + repo_id="Saving-Willy/Happywhale-kaggle", + repo_type="dataset", +) +print(rv) + + + + \ No newline at end of file diff --git a/call_models/whale_gallery.py b/call_models/whale_gallery.py new file mode 100644 index 0000000000000000000000000000000000000000..ef0ec8a80e4ab7d5e2d99522765227fed7faff9f --- /dev/null +++ b/call_models/whale_gallery.py @@ -0,0 +1,89 @@ +from itertools import cycle +import streamlit as st + +import whale_viewer as sw_wv + +def render_whale_gallery(n_cols:int = 4): + """ + A function to display a gallery of whale images in a grid + """ + def format_whale_name(name): + return name.replace("_", " ").capitalize() + + # make a grid of images, use some css to get more uniform + # https://discuss.streamlit.io/t/grid-of-images-with-the-same-height/10668/12 + # nb: I think there are some community components, need to investigate their usage + st.markdown( + """ + +""", + unsafe_allow_html=True, +) + + cols = cycle(st.columns(n_cols)) + for ix in range(len(sw_wv.df_whale_img_ref)): + img_name = sw_wv.df_whale_img_ref.iloc[ix].loc["WHALE_IMAGES"] + whale_name = format_whale_name(str(sw_wv.df_whale_img_ref.iloc[ix].name)) + url = sw_wv.df_whale_img_ref.iloc[ix].loc['WHALE_REFERENCES'] + image_path = f"images/references/{img_name}" + #next(cols).image(image_path, width=150, caption=f"{whale_name}") + thing = next(cols) + with thing: + with st.container(border=True): + # using the caption for name is most compact but no link. + #st.image(image_path, width=150, caption=f"{whale_name}") + st.image(image_path, width=150) + #st.markdown(f"[{whale_name}]({url})" ) # doesn't seem to allow styling, just do in raw html:w + html = f"" + st.markdown(html, unsafe_allow_html=True) + + + #next(cols).image(image_path, width=150, caption=f"{whale_name}") + + +if __name__ == "__main__": + ''' example usage, with some other elements to help illustrate how + streamlit keys can be used to target specific css properties + ''' + # define a container just to hold a couple of elements + header_cont = st.container(key='swheader') + with header_cont: + c1, c2 = st.columns([2, 3]) + c1.markdown('left') + c2.button("Refresh Gallery (noop)") + # here we make a container to allow filtering css properties + # specific to the gallery (otherwise we get side effects) + tg_cont = st.container(key="swgallery") + with tg_cont: + render_whale_gallery(n_cols=4) + + pass \ No newline at end of file diff --git a/call_models/whale_viewer.py b/call_models/whale_viewer.py new file mode 100644 index 0000000000000000000000000000000000000000..faf70ab8a83459af876b939033a8688e35be85eb --- /dev/null +++ b/call_models/whale_viewer.py @@ -0,0 +1,145 @@ +from typing import List + +from PIL import Image +import pandas as pd + +WHALE_CLASSES = [ + "beluga", + "blue_whale", + "bottlenose_dolphin", + "brydes_whale", + "commersons_dolphin", + "common_dolphin", + "cuviers_beaked_whale", + "dusky_dolphin", + "false_killer_whale", + "fin_whale", + "frasiers_dolphin", + "gray_whale", + "humpback_whale", + "killer_whale", + "long_finned_pilot_whale", + "melon_headed_whale", + "minke_whale", + "pantropic_spotted_dolphin", + "pygmy_killer_whale", + "rough_toothed_dolphin", + "sei_whale", + "short_finned_pilot_whale", + "southern_right_whale", + "spinner_dolphin", + "spotted_dolphin", + "white_sided_dolphin", + ] + +WHALE_IMAGES = [ + "beluga.webp", + "blue-whale.webp", + "bottlenose_dolphin.webp", + "brydes.webp", + "common_dolphin.webp", + "common_dolphin.webp", + "cuviers_beaked_whale.webp", + "common_dolphin.webp", + "false-killer-whale.webp", + "fin-whale.webp", + "fin-whale.webp", + "gray-whale.webp", + "Humpback.webp", + "killer_whale.webp", + "640x427-long-finned-pilot-whale.webp", + "melon.webp", + "minke-whale.webp", + "pantropical-spotted-dolphin.webp", + "pygmy-killer-whale.webp", + "rough-toothed-dolphin.webp", + "sei.webp", + "Whale_Short-Finned_Pilot-markedDW.png", ## Background + "640x427-southern-right-whale.jpg", ## background + "spinner.webp", + "pantropical-spotted-dolphin.webp", ## duplicate also used for + "640x427-atlantic-white-sided-dolphin.jpg", ##background + ] + +WHALE_REFERENCES = [ + "https://www.fisheries.noaa.gov/species/beluga-whale", + "https://www.fisheries.noaa.gov/species/blue-whale", + "https://www.fisheries.noaa.gov/species/common-bottlenose-dolphin", + "https://www.fisheries.noaa.gov/species/brydes-whale", + "https://en.wikipedia.org/wiki/Commerson's_dolphin", + #"commersons_dolphin - reference missing - classification to be verified", ## class matching to be verified + "https://www.fisheries.noaa.gov/species/short-beaked-common-dolphin", + "https://www.fisheries.noaa.gov/species/cuviers-beaked-whale", + "https://en.wikipedia.org/wiki/Dusky_dolphin", + #"dusky_dolphin - reference missing - classification to be verified", ## class matching to be verified + "https://www.fisheries.noaa.gov/species/false-killer-whale", + "https://www.fisheries.noaa.gov/species/fin-whale", + "https://www.fisheries.noaa.gov/species/frasers-dolphin", + #"frasiers_dolphin - reference missing - classification to be verified", ## class matching to be verified + "https://www.fisheries.noaa.gov/species/gray-whale", + "https://www.fisheries.noaa.gov/species/humpback-whale", + "https://www.fisheries.noaa.gov/species/killer-whale", + "https://www.fisheries.noaa.gov/species/long-finned-pilot-whale", + "https://www.fisheries.noaa.gov/species/melon-headed-whale", + "https://www.fisheries.noaa.gov/species/minke-whale", + "https://www.fisheries.noaa.gov/species/pantropical-spotted-dolphin", + "https://www.fisheries.noaa.gov/species/pygmy-killer-whale", + "https://www.fisheries.noaa.gov/species/rough-toothed-dolphin", + "https://www.fisheries.noaa.gov/species/sei-whale", + "https://www.fisheries.noaa.gov/species/short-finned-pilot-whale", + "https://www.fisheries.noaa.gov/species/southern-right-whale", + "https://www.fisheries.noaa.gov/species/spinner-dolphin", + "https://www.fisheries.noaa.gov/species/pantropical-spotted-dolphin", + "https://www.fisheries.noaa.gov/species/atlantic-white-sided-dolphin", + ] + +# Create a dataframe +df_whale_img_ref = pd.DataFrame( + { + "WHALE_CLASSES": WHALE_CLASSES, + "WHALE_IMAGES": WHALE_IMAGES, + "WHALE_REFERENCES": WHALE_REFERENCES, + } +).set_index("WHALE_CLASSES") + +def format_whale_name(whale_class:str): + whale_name = whale_class.replace("_", " ").title() + return whale_name + + +def display_whale(whale_classes:List[str], i:int, viewcontainer=None): + """ + Display whale image and reference to the provided viewcontainer. + + Args: + whale_classes (List[str]): A list of whale class names. + i (int): The index of the whale class to display. + viewcontainer: The container to display the whale information. If + not provided, use the current streamlit context (works via + 'with