Spaces:

Saving-Willy
/

saving-willy-dev

Sleeping

App Files Files Community

vancauwe commited on Jan 17

Commit

54319e9

1 Parent(s): c4d6745

feat: multi image input

Browse files

Files changed (9) hide show

Dockerfile +42 -0
README.md +1 -1
basic_map/app.py +0 -21
basic_map/app1.py +0 -42
basic_map/requirements.txt +0 -4
docs/app.md +0 -5
requirements.txt +1 -1
src/input_handling.py +59 -62
src/main.py +82 -76

Dockerfile ADDED Viewed

	@@ -0,0 +1,42 @@

+From ubuntu:latest
+RUN apt-get update
+RUN apt-get install python3 python3-pip -y
+# https://stackoverflow.com/questions/75608323/how-do-i-solve-error-externally-managed-environment-every-time-i-use-pip-3
+# https://veronneau.org/python-311-pip-and-breaking-system-packages.html
+ENV PIP_BREAK_SYSTEM_PACKAGES 1
+##################################################
+# Ubuntu setup
+##################################################
+RUN  apt-get update \
+  && apt-get install -y wget \
+  && rm -rf /var/lib/apt/lists/*
+RUN apt-get update && apt-get -y upgrade \
+  && apt-get install -y --no-install-recommends \
+    unzip \
+    nano \
+    git \
+    g++ \
+    gcc \
+    htop \
+    zip \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+##################################################
+# ODTP setup
+##################################################
+RUN mkdir /app
+COPY . /saving-willy
+RUN pip3 install --upgrade setuptools
+RUN pip3 install -r /saving-willy/requirements.txt
+WORKDIR /saving-willy
+ENTRYPOINT bash

README.md CHANGED Viewed

@@ -28,7 +28,7 @@ pip install -r requirements.txt
 ```
 ```
-streamlit run app.py
 ```

 ```
 ```
+streamlit run src/main.py
 ```

basic_map/app.py DELETED Viewed

@@ -1,21 +0,0 @@
-import pandas as pd
-import streamlit as st
-import folium
-from streamlit_folium import st_folium
-from streamlit_folium import folium_static
-visp_loc = 46.295833, 7.883333
-#m = folium.Map(location=visp_loc, zoom_start=9)
-st.markdown("# :whale: :whale: Cetaceans :red[& friends] :balloon:")
-m = folium.Map(location=visp_loc, zoom_start=9,
-                              tiles='https://tile.opentopomap.org/{z}/{x}/{y}.png',
-                              attr='<a href="https://opentopomap.org/">Open Topo Map</a>')
-folium_static(m)

basic_map/app1.py DELETED Viewed

@@ -1,42 +0,0 @@
-# lets try using map stuff without folium, maybe stlite doesnt support that.
-import streamlit as st
-import pandas as pd
-# Load data
-f = 'mountains_clr.csv'
-df = pd.read_csv(f).dropna()
-print(df)
-st.markdown("# :whale: :whale: Cetaceans :red[& friends] :balloon:")
-st.markdown("## :mountain: Mountains")
-st.markdown(f"library version: **{st.__version__}**")
-# not sure where my versions are getting pegged from, but we have a 1y spread :(
-# https://github.com/streamlit/streamlit/blob/1.24.1/lib/streamlit/elements/map.py
-#    rather hard to find the docs for old versions, no selector unlike many libraries.
-visp_loc = 46.295833, 7.883333
-tile_xyz = 'https://tile.opentopomap.org/{z}/{x}/{y}.png'
-tile_attr = '<a href="https://opentopomap.org/">Open Topo Map</a>'
-st.map(df, latitude='lat', longitude='lon', color='color', size='size', zoom=7)
-#, tiles=tile_xyz, attr=tile_attr)
-#st.map(df)
-#st.map(df, latitude="col1", longitude="col2", size="col3", color="col4")
-import numpy as np
-df2 = pd.DataFrame(
-    {
-        "col1": np.random.randn(1000) / 50 + 37.76,
-        "col2": np.random.randn(1000) / 50 + -122.4,
-        "col3": np.random.randn(1000) * 100,
-        "col4": np.random.rand(1000, 4).tolist(),
-    }
-)
-#st.map(df, latitude="col1", longitude="col2", size="col3", color="col4")

basic_map/requirements.txt DELETED Viewed

@@ -1,4 +0,0 @@
-streamlit
-folium
-streamlit-folium

docs/app.md CHANGED Viewed

@@ -1,5 +0,0 @@
-Here is the documentation for the app code generating the streamlit front-end.
-# Streamlit App
-::: basic_map.app

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-numpy==1.24
 pandas==2.2.3


1	+ numpy==1.26.4
2	pandas==2.2.3
3
4

src/input_handling.py CHANGED Viewed

@@ -12,9 +12,10 @@ from streamlit.delta_generator import DeltaGenerator
 import cv2
 import numpy as np
 m_logger = logging.getLogger(__name__)
-# we can set the log level locally for funcs in this module
-#g_m_logger.setLevel(logging.DEBUG)
 m_logger.setLevel(logging.INFO)
 '''
@@ -22,11 +23,8 @@ A module to setup the input handling for the whale observation guidance tool
 both the UI elements (setup_input_UI) and the validation functions.
 '''
-#allowed_image_types = ['webp']
 allowed_image_types = ['jpg', 'jpeg', 'png', 'webp']
-import random
-import string
 def generate_random_md5():
     # Generate a random string
     random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=16))
@@ -115,8 +113,6 @@ class InputObservation:
             "author_email": self.author_email,
             "date": self.date,
             "time": self.time,
-            # "date_option": self.date_option,
-            # "time_option": self.time_option,
             "date_option": str(self.date_option),
             "time_option": str(self.time_option),
             "uploaded_filename": self.uploaded_filename
@@ -168,7 +164,8 @@ def is_valid_email(email:str) -> bool:
     return re.match(pattern, email) is not None
 # Function to extract date and time from image metadata
-def get_image_datetime(image_file: UploadedFile) -> str | None:
     """
     Extracts the original date and time from the EXIF metadata of an uploaded image file.
@@ -204,7 +201,6 @@ spoof_metadata = {
     "time": None,
 }
-#def display_whale(whale_classes:List[str], i:int, viewcontainer=None):
 def setup_input(
     viewcontainer: DeltaGenerator=None,
     _allowed_image_types: list=None, ) -> InputObservation:
@@ -232,61 +228,62 @@ def setup_input(
     viewcontainer.title("Input image and data")
-    # 1. Image Selector
-    uploaded_filename = viewcontainer.file_uploader("Upload an image", type=allowed_image_types)
-    image_datetime = None  # For storing date-time from image
-    if uploaded_filename is not None:
-        # Display the uploaded image
-        #image = Image.open(uploaded_filename)
-        # load image using cv2 format, so it is compatible with the ML models
-        file_bytes = np.asarray(bytearray(uploaded_filename.read()), dtype=np.uint8)
-        image = cv2.imdecode(file_bytes, 1)
-        viewcontainer.image(image, caption='Uploaded Image.', use_column_width=True)
-        # store the image in the session state
-        st.session_state.image = image
-        # Extract and display image date-time
-        image_datetime = get_image_datetime(uploaded_filename)
-        print(f"[D] image date extracted as {image_datetime}")
-        m_logger.debug(f"image date extracted as {image_datetime} (from {uploaded_filename})")
-    # 2. Latitude Entry Box
-    latitude = viewcontainer.text_input("Latitude", spoof_metadata.get('latitude', ""))
-    if latitude and not is_valid_number(latitude):
-        viewcontainer.error("Please enter a valid latitude (numerical only).")
-        m_logger.error(f"Invalid latitude entered: {latitude}.")
-    # 3. Longitude Entry Box
-    longitude = viewcontainer.text_input("Longitude", spoof_metadata.get('longitude', ""))
-    if longitude and not is_valid_number(longitude):
-        viewcontainer.error("Please enter a valid longitude (numerical only).")
-        m_logger.error(f"Invalid latitude entered: {latitude}.")
-    # 4. Author Box with Email Address Validator
     author_email = viewcontainer.text_input("Author Email", spoof_metadata.get('author_email', ""))
     if author_email and not is_valid_email(author_email):
         viewcontainer.error("Please enter a valid email address.")
-    # 5. date/time
-    ## first from image metadata
-    if image_datetime is not None:
-        time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
-        date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
-    else:
-        time_value = datetime.datetime.now().time()  # Default to current time
-        date_value = datetime.datetime.now().date()
-    ## if not, give user the option to enter manually
-    date_option = st.sidebar.date_input("Date", value=date_value)
-    time_option = st.sidebar.time_input("Time", time_value)
-    observation = InputObservation(image=uploaded_filename, latitude=latitude, longitude=longitude,
-                                   author_email=author_email, date=image_datetime, time=None,
-                                   date_option=date_option, time_option=time_option)
-    return observation

 import cv2
 import numpy as np
+import random
+import string
 m_logger = logging.getLogger(__name__)
 m_logger.setLevel(logging.INFO)
 '''
 both the UI elements (setup_input_UI) and the validation functions.
 '''
 allowed_image_types = ['jpg', 'jpeg', 'png', 'webp']
 def generate_random_md5():
     # Generate a random string
     random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=16))
             "author_email": self.author_email,
             "date": self.date,
             "time": self.time,
             "date_option": str(self.date_option),
             "time_option": str(self.time_option),
             "uploaded_filename": self.uploaded_filename
     return re.match(pattern, email) is not None
 # Function to extract date and time from image metadata
+# def get_image_datetime(image_file: UploadedFile) -> str | None:
+def get_image_datetime(image_file):
     """
     Extracts the original date and time from the EXIF metadata of an uploaded image file.
     "time": None,
 }
 def setup_input(
     viewcontainer: DeltaGenerator=None,
     _allowed_image_types: list=None, ) -> InputObservation:
     viewcontainer.title("Input image and data")
+    # 1. Input the author email
     author_email = viewcontainer.text_input("Author Email", spoof_metadata.get('author_email', ""))
     if author_email and not is_valid_email(author_email):
         viewcontainer.error("Please enter a valid email address.")
+    # 2. Image Selector
+    uploaded_files = viewcontainer.file_uploader("Upload an image", type=allowed_image_types, accept_multiple_files=True)
+    observations = {}
+    images = {}
+    if uploaded_files is not None:
+        for file in uploaded_files:
+            viewcontainer.title(f"Metadata for {file.name}")
+            # Display the uploaded image
+            # load image using cv2 format, so it is compatible with the ML models
+            file_bytes = np.asarray(bytearray(file.read()), dtype=np.uint8)
+            filename = file.name
+            image = cv2.imdecode(file_bytes, 1)
+            # Extract and display image date-time
+            image_datetime = None  # For storing date-time from image
+            image_datetime = get_image_datetime(file)
+            m_logger.debug(f"image date extracted as {image_datetime} (from {uploaded_files})")
+            # 3. Latitude Entry Box
+            latitude = viewcontainer.text_input("Latitude for "+filename, spoof_metadata.get('latitude', ""))
+            if latitude and not is_valid_number(latitude):
+                viewcontainer.error("Please enter a valid latitude (numerical only).")
+                m_logger.error(f"Invalid latitude entered: {latitude}.")
+            # 4. Longitude Entry Box
+            longitude = viewcontainer.text_input("Longitude for "+filename, spoof_metadata.get('longitude', ""))
+            if longitude and not is_valid_number(longitude):
+                viewcontainer.error("Please enter a valid longitude (numerical only).")
+                m_logger.error(f"Invalid latitude entered: {latitude}.")
+            # 5. Date/time
+            ## first from image metadata
+            if image_datetime is not None:
+                time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
+                date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
+            else:
+                time_value = datetime.datetime.now().time()  # Default to current time
+                date_value = datetime.datetime.now().date()
+            ## if not, give user the option to enter manually
+            date_option = st.sidebar.date_input("Date for "+filename, value=date_value)
+            time_option = st.sidebar.time_input("Time for "+filename, time_value)
+            observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
+                                        author_email=author_email, date=image_datetime, time=None,
+                                        date_option=date_option, time_option=time_option)
+            observations[file.name] = observation
+            images[file.name] = image
+    st.session_state.image = images
+    st.session_state.files = uploaded_files
+    return observations

src/main.py CHANGED Viewed

@@ -77,14 +77,14 @@ def metadata2md() -> str:
     """
     markdown_str = "\n"
-    for key, value in st.session_state.full_data.items():
             markdown_str += f"- **{key}**: {value}\n"
     return markdown_str
-def push_observation(tab_log:DeltaGenerator=None):
     """
-    Push the observation to the Hugging Face dataset
     Args:
         tab_log (streamlit.container): The container to log messages to. If not provided,
@@ -94,12 +94,12 @@ def push_observation(tab_log:DeltaGenerator=None):
     """
     # we get the data from session state: 1 is the dict 2 is the image.
     # first, lets do an info display (popup)
-    metadata_str = json.dumps(st.session_state.full_data)
-    st.toast(f"Uploading observation: {metadata_str}", icon="🦭")
     tab_log = st.session_state.tab_log
     if tab_log is not None:
-        tab_log.info(f"Uploading observation: {metadata_str}")
     # get huggingface api
     import os
@@ -111,7 +111,7 @@ def push_observation(tab_log:DeltaGenerator=None):
     f.close()
     st.info(f"temp file: {f.name} with metadata written...")
-    path_in_repo= f"metadata/{st.session_state.full_data['author_email']}/{st.session_state.full_data['image_md5']}.json"
     msg = f"fname: {f.name} | path: {path_in_repo}"
     print(msg)
     st.warning(msg)
@@ -134,7 +134,7 @@ def main() -> None:
     The organisation is as follows:
-    1. data input (a new observation) is handled in the sidebar
     2. the rest of the interface is organised in tabs:
         - cetean classifier
@@ -161,12 +161,12 @@ def main() -> None:
     st.session_state.tab_log = tab_log
-    # create a sidebar, and parse all the input (returned as `observation` object)
-    observation = sw_inp.setup_input(viewcontainer=st.sidebar)
     if 0:## WIP
-        # goal of this code is to allow the user to override the ML prediction, before transmitting an observation
         predicted_class = st.sidebar.selectbox("Predicted Class", sw_wv.WHALE_CLASSES)
         override_prediction = st.sidebar.checkbox("Override Prediction")
@@ -236,18 +236,13 @@ def main() -> None:
     # Display submitted data
     if st.sidebar.button("Validate"):
         # create a dictionary with the submitted data
-        submitted_data = observation.to_dict()
-        #print(submitted_data)
-        #full_data.update(**submitted_data)
-        for k, v in submitted_data.items():
-            st.session_state.full_data[k] = v
-        #st.write(f"full dict of data: {json.dumps(submitted_data)}")
-        #tab_inference.info(f"{st.session_state.full_data}")
         tab_log.info(f"{st.session_state.full_data}")
-        df = pd.DataFrame(submitted_data, index=[0])
         with tab_data:
             st.table(df)
@@ -259,7 +254,7 @@ def main() -> None:
     # - the model predicts the top 3 most likely species from the input image
     # - these species are shown
     # - the user can override the species prediction using the dropdown
-    # - an observation is uploaded if the user chooses.
     if tab_inference.button("Identify with cetacean classifier"):
         #pipe = pipeline("image-classification", model="Saving-Willy/cetacean-classifier", trust_remote_code=True)
@@ -271,44 +266,53 @@ def main() -> None:
             # TODO: cleaner design to disable the button until data input done?
             st.info("Please upload an image first.")
         else:
-            # run classifier model on `image`, and persistently store the output
-            out = cetacean_classifier(st.session_state.image) # get top 3 matches
-            st.session_state.whale_prediction1 = out['predictions'][0]
-            st.session_state.classify_whale_done = True
-            msg = f"[D]2 classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}"
-            st.info(msg)
-            g_logger.info(msg)
-            # dropdown for selecting/overriding the species prediction
-            #st.info(f"[D] classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}")
-            if not st.session_state.classify_whale_done:
-                selected_class = tab_inference.sidebar.selectbox("Species", sw_wv.WHALE_CLASSES, index=None, placeholder="Species not yet identified...", disabled=True)
-            else:
-                pred1 = st.session_state.whale_prediction1
-                # get index of pred1 from WHALE_CLASSES, none if not present
-                print(f"[D] pred1: {pred1}")
-                ix = sw_wv.WHALE_CLASSES.index(pred1) if pred1 in sw_wv.WHALE_CLASSES else None
-                selected_class = tab_inference.selectbox("Species", sw_wv.WHALE_CLASSES, index=ix)
-            st.session_state.full_data['predicted_class'] = selected_class
-            if selected_class != st.session_state.whale_prediction1:
-                st.session_state.full_data['class_overriden'] = selected_class
-            btn = st.button("Upload observation to THE INTERNET!", on_click=push_observation)
-            # TODO: the metadata only fills properly if `validate` was clicked.
-            tab_inference.markdown(metadata2md())
-            msg = f"[D] full data after inference: {st.session_state.full_data}"
-            g_logger.debug(msg)
-            print(msg)
-            # TODO: add a link to more info on the model, next to the button.
-            whale_classes = out['predictions'][:]
-            # render images for the top 3 (that is what the model api returns)
-            with tab_inference:
-                st.markdown("## Species detected")
-                for i in range(len(whale_classes)):
-                    sw_wv.display_whale(whale_classes, i)
@@ -325,27 +329,29 @@ def main() -> None:
         if st.session_state.image is None:
             st.info("Please upload an image first.")
-            st.info(str(observation.to_dict()))
         else:
             col1, col2 = tab_hotdogs.columns(2)
-            # display the image (use cached version, no need to reread)
-            col1.image(st.session_state.image, use_column_width=True)
-            # and then run inference on the image
-            hotdog_image = Image.fromarray(st.session_state.image)
-            predictions = pipeline_hot_dog(hotdog_image)
-            col2.header("Probabilities")
-            first = True
-            for p in predictions:
-                col2.subheader(f"{ p['label'] }: { round(p['score'] * 100, 1)}%")
-                if first:
-                    st.session_state.full_data['predicted_class'] = p['label']
-                    st.session_state.full_data['predicted_score'] = round(p['score'] * 100, 1)
-                    first = False
-            tab_hotdogs.write(f"Session Data: {json.dumps(st.session_state.full_data)}")

     """
     markdown_str = "\n"
+    for key, value in st.session_state.public_observation.items():
             markdown_str += f"- **{key}**: {value}\n"
     return markdown_str
+def push_observations(tab_log:DeltaGenerator=None):
     """
+    Push the observations to the Hugging Face dataset
     Args:
         tab_log (streamlit.container): The container to log messages to. If not provided,
     """
     # we get the data from session state: 1 is the dict 2 is the image.
     # first, lets do an info display (popup)
+    metadata_str = json.dumps(st.session_state.public_observation)
+    st.toast(f"Uploading observations: {metadata_str}", icon="🦭")
     tab_log = st.session_state.tab_log
     if tab_log is not None:
+        tab_log.info(f"Uploading observations: {metadata_str}")
     # get huggingface api
     import os
     f.close()
     st.info(f"temp file: {f.name} with metadata written...")
+    path_in_repo= f"metadata/{st.session_state.public_observation['author_email']}/{st.session_state.public_observation['image_md5']}.json"
     msg = f"fname: {f.name} | path: {path_in_repo}"
     print(msg)
     st.warning(msg)
     The organisation is as follows:
+    1. data input (a new observations) is handled in the sidebar
     2. the rest of the interface is organised in tabs:
         - cetean classifier
     st.session_state.tab_log = tab_log
+    # create a sidebar, and parse all the input (returned as `observations` object)
+    observations = sw_inp.setup_input(viewcontainer=st.sidebar)
     if 0:## WIP
+        # goal of this code is to allow the user to override the ML prediction, before transmitting an observations
         predicted_class = st.sidebar.selectbox("Predicted Class", sw_wv.WHALE_CLASSES)
         override_prediction = st.sidebar.checkbox("Override Prediction")
     # Display submitted data
     if st.sidebar.button("Validate"):
         # create a dictionary with the submitted data
+        submitted_data = observations
+        st.session_state.full_data = observations
         tab_log.info(f"{st.session_state.full_data}")
+        df = pd.DataFrame(submitted_data)
+        print("Dataframe Shape: ", df.shape)
         with tab_data:
             st.table(df)
     # - the model predicts the top 3 most likely species from the input image
     # - these species are shown
     # - the user can override the species prediction using the dropdown
+    # - an observations is uploaded if the user chooses.
     if tab_inference.button("Identify with cetacean classifier"):
         #pipe = pipeline("image-classification", model="Saving-Willy/cetacean-classifier", trust_remote_code=True)
             # TODO: cleaner design to disable the button until data input done?
             st.info("Please upload an image first.")
         else:
+            files = st.session_state.files
+            images = st.session_state.images
+            full_data = st.session_state.full_data
+            for file in files:
+                image = images[file]
+                data = full_data[file]
+                # run classifier model on `image`, and persistently store the output
+                out = cetacean_classifier(image) # get top 3 matches
+                st.session_state.whale_prediction1 = out['predictions'][0]
+                st.session_state.classify_whale_done = True
+                msg = f"[D]2 classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}"
+                # st.info(msg)
+                g_logger.info(msg)
+                # dropdown for selecting/overriding the species prediction
+                #st.info(f"[D] classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}")
+                if not st.session_state.classify_whale_done:
+                    selected_class = tab_inference.sidebar.selectbox("Species", sw_wv.WHALE_CLASSES,
+                                                                     index=None, placeholder="Species not yet identified...",
+                                                                     disabled=True)
+                else:
+                    pred1 = st.session_state.whale_prediction1
+                    # get index of pred1 from WHALE_CLASSES, none if not present
+                    print(f"[D] pred1: {pred1}")
+                    ix = sw_wv.WHALE_CLASSES.index(pred1) if pred1 in sw_wv.WHALE_CLASSES else None
+                    selected_class = tab_inference.selectbox("Species", sw_wv.WHALE_CLASSES, index=ix)
+                data['predicted_class'] = selected_class
+                if selected_class != st.session_state.whale_prediction1:
+                    data['class_overriden'] = selected_class
+                st.session_state.public_observation = data
+                st.button("Upload observations to THE INTERNET!", on_click=push_observations)
+                # TODO: the metadata only fills properly if `validate` was clicked.
+                tab_inference.markdown(metadata2md())
+                msg = f"[D] full data after inference: {data}"
+                g_logger.debug(msg)
+                print(msg)
+                # TODO: add a link to more info on the model, next to the button.
+                whale_classes = out['predictions'][:]
+                # render images for the top 3 (that is what the model api returns)
+                with tab_inference:
+                    st.markdown("## Species detected")
+                    for i in range(len(whale_classes)):
+                        sw_wv.display_whale(whale_classes, i)
         if st.session_state.image is None:
             st.info("Please upload an image first.")
+            st.info(str(observations.to_dict()))
         else:
             col1, col2 = tab_hotdogs.columns(2)
+            for file in st.session_state.files:
+                image = st.session_state.images[file]
+                data = st.session_state.full_data[file]
+                # display the image (use cached version, no need to reread)
+                col1.image(image, use_column_width=True)
+                # and then run inference on the image
+                hotdog_image = Image.fromarray(image)
+                predictions = pipeline_hot_dog(hotdog_image)
+                col2.header("Probabilities")
+                first = True
+                for p in predictions:
+                    col2.subheader(f"{ p['label'] }: { round(p['score'] * 100, 1)}%")
+                    if first:
+                        data['predicted_class'] = p['label']
+                        data['predicted_score'] = round(p['score'] * 100, 1)
+                        first = False
+                tab_hotdogs.write(f"Session Data: {json.dumps(data)}")