rmm-ch commited on
Commit
821ac40
·
unverified ·
2 Parent(s): 8ccb11f 8795702

Merge pull request #33 from sdsc-ordes/feat/stateful-workflow

Browse files
.github/workflows/python-pycov-onPR.yml CHANGED
@@ -24,8 +24,10 @@ jobs:
24
  if [ -f tests/requirements.txt ]; then pip install -r tests/requirements.txt; fi
25
 
26
  - name: Build coverage files for mishakav commenter action
 
 
27
  run: |
28
- pytest --junitxml=pytest.xml --cov-report=term-missing:skip-covered --cov=src tests/ | tee pytest-coverage.txt
29
  echo "working dir:" && pwd
30
  echo "files in cwd:" && ls -ltr
31
 
@@ -36,4 +38,4 @@ jobs:
36
  junitxml-path: ./pytest.xml
37
 
38
  #- name: Comment coverage
39
- # uses: coroo/[email protected]
 
24
  if [ -f tests/requirements.txt ]; then pip install -r tests/requirements.txt; fi
25
 
26
  - name: Build coverage files for mishakav commenter action
27
+ # note this will run all non-visual tests, including the slow end2end ones
28
+ # - this action is only on PR; the slow ones are skipped on push.
29
  run: |
30
+ pytest -s -m "not visual" --ignore=tests/visual_selenium --junitxml=pytest.xml --cov-report=term-missing:skip-covered --cov=src tests/ | tee pytest-coverage.txt
31
  echo "working dir:" && pwd
32
  echo "files in cwd:" && ls -ltr
33
 
 
38
  junitxml-path: ./pytest.xml
39
 
40
  #- name: Comment coverage
41
+ # uses: coroo/[email protected]
.github/workflows/python-pytest.yml CHANGED
@@ -5,7 +5,7 @@ name: Execute tests with pytest
5
 
6
  on:
7
  push:
8
- branches: [ "dev" ]
9
  pull_request:
10
  branches: [ "dev", "main" ]
11
  permissions:
@@ -31,6 +31,6 @@ jobs:
31
  # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
32
  # # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
33
  # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
34
- - name: Test with pytest
35
  run: |
36
- pytest
 
5
 
6
  on:
7
  push:
8
+ branches: [ "dev" ]
9
  pull_request:
10
  branches: [ "dev", "main" ]
11
  permissions:
 
31
  # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
32
  # # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
33
  # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
34
+ - name: Run quick tests with pytest
35
  run: |
36
+ pytest -m "not slow and not visual" --strict-markers --ignore=tests/visual_selenium
.github/workflows/python-visualtests.yml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Visual tests of the app
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ push:
6
+ branches: [ "feat/stateful-workflow" ]
7
+ #pull_request:
8
+ # branches: [ "dev", "main" ]
9
+
10
+
11
+ permissions:
12
+ contents: read
13
+
14
+ jobs:
15
+ test:
16
+ runs-on: ubuntu-latest
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+ - name: Set up Python 3.10
20
+ uses: actions/setup-python@v3
21
+ with:
22
+ python-version: "3.10"
23
+
24
+
25
+ - name: install chrome
26
+ run: |
27
+ sudo apt-get install -y wget
28
+ sudo bash -c "wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
29
+ && echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list"
30
+ sudo apt-get update && sudo apt-get -y install google-chrome-stable
31
+
32
+ - name: Install py dependencies
33
+ run: |
34
+ python -m pip install --upgrade pip
35
+ if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
36
+ if [ -f tests/requirements.txt ]; then pip install -r tests/requirements.txt; fi
37
+ if [ -f tests/visual_selenium/requirements_visual.txt ]; then pip install -r tests/visual_selenium/requirements_visual.txt; fi
38
+
39
+ - name: Check the console scripts interface
40
+ run: |
41
+ seleniumbase
42
+ sbase
43
+ - name: Install chromedriver
44
+ run: |
45
+ seleniumbase install chromedriver
46
+
47
+
48
+
49
+ - name: Run visual tests with pytest
50
+ # we use --demo to make it slow enough (selenium doesn't wait long enough
51
+ # otherwise, not one step it consistently fails at.)
52
+ run: |
53
+ pytest -m "visual" --strict-markers tests/visual_selenium/ -s --demo
.gitignore CHANGED
@@ -142,6 +142,7 @@ venv.bak/
142
 
143
  # mkdocs documentation
144
  /site
 
145
 
146
  # mypy
147
  .mypy_cache/
 
142
 
143
  # mkdocs documentation
144
  /site
145
+ docs/site
146
 
147
  # mypy
148
  .mypy_cache/
docs/{classifier_cetacean.md → classifier_image.md} RENAMED
@@ -1,3 +1,3 @@
1
  This module documents the cetacean fluke and fin classifier.
2
 
3
- ::: src.classifier.cetacean_image
 
1
  This module documents the cetacean fluke and fin classifier.
2
 
3
+ ::: src.classifier.classifier_image
docs/dev_notes.md CHANGED
@@ -44,6 +44,25 @@ mkdocs build -c
44
 
45
  # Testing
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  ## local testing
48
  To run the tests locally, we have the standard dependencies of the project, plus the test runner dependencies.
49
 
@@ -74,6 +93,39 @@ pytest --junit-xml=test-results.xml
74
  pytest --cov-report=lcov --cov=src
75
  ```
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  ## CI testing
79
 
 
44
 
45
  # Testing
46
 
47
+ ## use of markers
48
+
49
+ The CI runs with `--strict-markers` so any new marker must be registered in
50
+ `pytest.ini`.
51
+
52
+ - the basic CI action runs the fast tests only, skipping all tests marked
53
+ `visual` and `slow`
54
+ - the CI action on PR runs the `slow` tests, but stil excluding `visual`.
55
+ - TODO: a new action for the visual tests is to be developed.
56
+
57
+ Check all tests are marked ok, and that they are filtered correctly by the
58
+ groupings used in CI:
59
+ ```bash
60
+ pytest --collect-only -m "not slow and not visual" --strict-markers --ignore=tests/visual_selenium
61
+ pytest --collect-only -m "not visual" --strict-markers --ignore=tests/visual_selenium
62
+ ```
63
+
64
+
65
+
66
  ## local testing
67
  To run the tests locally, we have the standard dependencies of the project, plus the test runner dependencies.
68
 
 
93
  pytest --cov-report=lcov --cov=src
94
  ```
95
 
96
+ ## local testing for visual tests
97
+
98
+ We use seleniumbase to test the visual appearance of the app, including the
99
+ presence of elements that appear through the workflow. This testing takes quite
100
+ a long time to execute and is not yet configured with CI.
101
+
102
+ ```bash
103
+ # install packages for app and for visual testing
104
+ pip install ./requirements.txt
105
+ pip install -r tests/visual_selenium/requirements_visual.txt
106
+ ```
107
+
108
+ **Running tests**
109
+ The execution of these tests requires that the site/app is running already.
110
+
111
+ In one tab:
112
+ ```bash
113
+ streamlit run src/main.py
114
+ ```
115
+
116
+ In another tab:
117
+ ```bash
118
+ # run just the visual tests
119
+ pytest -m "visual" --strict-markers
120
+ # run in demo mode, using firefox (default is chrome)
121
+ pytest -m "visual" --strict-markers -s browser=firefox --demo
122
+
123
+ # the inverse set:
124
+ pytest -m "not slow and not visual" --strict-markers --ignore=tests/visual_selenium
125
+
126
+ ```
127
+
128
+
129
 
130
  ## CI testing
131
 
docs/st_logs.md CHANGED
@@ -3,5 +3,5 @@ This module provides utilities to incorporate a standard python logger within st
3
 
4
  # Streamlit log handler
5
 
6
- ::: src.st_logs
7
 
 
3
 
4
  # Streamlit log handler
5
 
6
+ ::: src.utils.st_logs
7
 
mkdocs.yaml CHANGED
@@ -33,7 +33,7 @@ nav:
33
  - Data extraction and validation: input_validator.md
34
  - Data Object Class: input_observation.md
35
  - Classifiers:
36
- - Cetacean Fluke & Fin Recognition: classifier_cetacean.md
37
  - (temporary) Hotdog Classifier: hotdog.md
38
  - Hugging Face Integration:
39
  - Push Observations to Dataset: hf_push_observations.md
 
33
  - Data extraction and validation: input_validator.md
34
  - Data Object Class: input_observation.md
35
  - Classifiers:
36
+ - Cetacean Fluke & Fin Recognition: classifier_image.md
37
  - (temporary) Hotdog Classifier: hotdog.md
38
  - Hugging Face Integration:
39
  - Push Observations to Dataset: hf_push_observations.md
pytest.ini CHANGED
@@ -3,3 +3,8 @@ pythonpath = "src"
3
  testpaths =
4
  tests
5
 
 
 
 
 
 
 
3
  testpaths =
4
  tests
5
 
6
+ markers =
7
+ component:
8
+ end2end: following several steps of workflow
9
+ visual: rendering the site (using selenium)
10
+ slow: more than 5 seconds
src/apptest/demo_elements.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # a small library of elements used in testing, presenting some
2
+ # processed data in simple ways that are easily testable via AppTest
3
+ from typing import List
4
+ import streamlit as st
5
+ from input.input_handling import (
6
+ get_image_datetime, get_image_latlon
7
+ )
8
+
9
+ def show_uploaded_file_info():
10
+ if "file_uploader_data" not in st.session_state or \
11
+ not st.session_state.file_uploader_data:
12
+
13
+ st.write("No files uploaded yet")
14
+ return
15
+
16
+ st.write("the buffered files:")
17
+
18
+ uploaded_files:List = st.session_state.file_uploader_data
19
+ for ix, file in enumerate(uploaded_files):
20
+ image_datetime_raw = get_image_datetime(file)
21
+ latitude0, longitude0 = get_image_latlon(file)
22
+ s = f"index: {ix}, name: {file.name}, datetime: {image_datetime_raw}, lat: {latitude0}, lon:{longitude0}"
23
+ st.text_area(f"{file.name}", value=s, key=f"metadata_{ix}")
24
+ print(s)
25
+
src/apptest/demo_input_sidebar.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # a chunk of the full app, covering the elements presented on the sidebar
2
+ # - this includes both input and workflow items.
3
+ import streamlit as st
4
+
5
+ # to run streamlit from this subdir, we need the the src dir on the path
6
+ # NOTE: pytest doesn't need this to run the tests, but to develop the test
7
+ # harness is hard without running streamlit
8
+ import sys
9
+ from os import path
10
+ # src (parent from here)
11
+ src_dir = path.dirname( path.dirname( path.abspath(__file__) ) )
12
+ sys.path.append(src_dir)
13
+
14
+ from input.input_handling import (
15
+ init_input_data_session_states,
16
+ init_input_container_states,
17
+ add_input_UI_elements,
18
+ setup_input,
19
+ )
20
+ from utils.workflow_ui import refresh_progress_display, init_workflow_viz, init_workflow_session_states
21
+
22
+ from apptest.demo_elements import show_uploaded_file_info
23
+
24
+
25
+
26
+ if __name__ == "__main__":
27
+
28
+ init_input_data_session_states()
29
+ init_input_container_states()
30
+ init_workflow_session_states()
31
+
32
+ init_workflow_viz()
33
+
34
+
35
+ with st.sidebar:
36
+ refresh_progress_display()
37
+ # layout handling
38
+ add_input_UI_elements()
39
+ # input elements (file upload, text input, etc)
40
+ setup_input()
41
+
42
+ # as a debug, let's add some text_area elements to show the files (no clash
43
+ # with testing the prod app since we dont use text_area at all)
44
+ show_uploaded_file_info ()
src/apptest/demo_multifile_upload.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # a minimal snippet for validating the upload sequence, for testing purposes (with AppTest)
2
+ from typing import List
3
+ import streamlit as st
4
+
5
+ # to run streamlit from this subdir, we need the the src dir on the path
6
+ # NOTE: pytest doesn't need this to run the tests, but to develop the test
7
+ # harness is hard without running streamlit
8
+ import sys
9
+ from os import path
10
+ # src (parent from here)
11
+ src_dir = path.dirname( path.dirname( path.abspath(__file__) ) )
12
+ sys.path.append(src_dir)
13
+
14
+ # we aim to validate:
15
+ # - user uploads multple files via FileUploader (with key=file_uploader_data)
16
+ # - they get buffered into session state
17
+ # - some properties are extracted from the files, and are displayed in a visual
18
+ # element so we can validate them with apptest.
19
+
20
+
21
+ from input.input_handling import (
22
+ spoof_metadata, is_valid_email,
23
+ get_image_datetime, get_image_latlon,
24
+ init_input_data_session_states
25
+ )
26
+
27
+ def buffer_uploaded_files():
28
+ st.write("buffering files! ")
29
+ uploaded_files:List = st.session_state.file_uploader_data
30
+ for ix, file in enumerate(uploaded_files):
31
+ image_datetime_raw = get_image_datetime(file)
32
+ latitude0, longitude0 = get_image_latlon(file)
33
+ #st.write(f"- file {ix}: {file.name}")
34
+ #st.write(f" - datetime: {image_datetime_raw}")
35
+ #st.write(f" - lat/lon: {latitude0}, {longitude0}")
36
+ s = f"index: {ix}, name: {file.name}, datetime: {image_datetime_raw}, lat: {latitude0}, lon:{longitude0}"
37
+ st.text_area(f"{file.name}", value=s, key=f"metadata_{ix}")
38
+ print(s)
39
+
40
+ init_input_data_session_states()
41
+
42
+ with st.sidebar:
43
+ author_email = st.text_input("Author Email", spoof_metadata.get('author_email', ""),
44
+ key="input_author_email")
45
+ if author_email and not is_valid_email(author_email):
46
+ st.error("Please enter a valid email address.")
47
+
48
+ st.file_uploader(
49
+ "Upload one or more images", type=["png", 'jpg', 'jpeg', 'webp'],
50
+ accept_multiple_files=True,
51
+ key="file_uploader_data",
52
+ on_change=buffer_uploaded_files
53
+ )
54
+
55
+ # this is the callback that would be triggered by the FileUploader
56
+ # - unfortunately, we get into a mess now
57
+ # - in real app, this runs twice and breaks (because of the duplicate keys)
58
+ # - in the test, if we don't run manually, we don't get the frontend elements to validate
59
+ # - if we remove the on_change, both run ok. but it deviates from the true app.
60
+ # - possible ways forward?
61
+ # - could we patch the on_change, or substitute the buffer_uploaded_files?
62
+ if (1 and "file_uploader_data" in st.session_state and
63
+ len(st.session_state.file_uploader_data) ):
64
+ print(f"buffering files: {len(st.session_state.file_uploader_data)}")
65
+ buffer_uploaded_files()
src/classifier/__init__.py ADDED
File without changes
src/input/__init__.py ADDED
File without changes
src/input/input_handling.py CHANGED
@@ -116,7 +116,7 @@ def buffer_uploaded_files():
116
 
117
 
118
  # get files from state
119
- uploaded_files = st.session_state.file_uploader_data
120
 
121
  filenames = []
122
  images = {}
@@ -127,6 +127,7 @@ def buffer_uploaded_files():
127
  print(f"[D] processing {ix}th file {filename}. {file.file_id} {file.type} {file.size}")
128
  # image to np and hash both require reading the file so do together
129
  image, image_hash = load_file_and_hash(file)
 
130
 
131
  filenames.append(filename)
132
  image_hashes.append(image_hash)
@@ -178,7 +179,7 @@ def metadata_inputs_one_file(file:UploadedFile, image_hash:str, dbg_ix:int=0) ->
178
  _viewcontainer = st.session_state.container_metadata_inputs
179
  else:
180
  _viewcontainer = st.sidebar
181
- m_logger.warning(f"[W] `container_metadata_inputs` is None, using sidebar")
182
 
183
 
184
 
@@ -261,7 +262,7 @@ def _setup_dynamic_inputs() -> None:
261
  # - and these go into session state
262
 
263
  # load the files from the session state
264
- uploaded_files = st.session_state.files
265
  hashes = st.session_state.image_hashes
266
  #images = st.session_state.images
267
  observations = {}
@@ -368,7 +369,7 @@ def init_input_data_session_states() -> None:
368
  st.session_state.images = {}
369
 
370
  if "files" not in st.session_state:
371
- st.session_state.files = {}
372
 
373
  if "public_observations" not in st.session_state:
374
  st.session_state.public_observations = {}
 
116
 
117
 
118
  # get files from state
119
+ uploaded_files:List = st.session_state.file_uploader_data
120
 
121
  filenames = []
122
  images = {}
 
127
  print(f"[D] processing {ix}th file {filename}. {file.file_id} {file.type} {file.size}")
128
  # image to np and hash both require reading the file so do together
129
  image, image_hash = load_file_and_hash(file)
130
+ print(f" [D] {ix}th file {filename} hash: {image_hash}")
131
 
132
  filenames.append(filename)
133
  image_hashes.append(image_hash)
 
179
  _viewcontainer = st.session_state.container_metadata_inputs
180
  else:
181
  _viewcontainer = st.sidebar
182
+ m_logger.warning("[W] `container_metadata_inputs` is None, using sidebar")
183
 
184
 
185
 
 
262
  # - and these go into session state
263
 
264
  # load the files from the session state
265
+ uploaded_files:List = st.session_state.files
266
  hashes = st.session_state.image_hashes
267
  #images = st.session_state.images
268
  observations = {}
 
369
  st.session_state.images = {}
370
 
371
  if "files" not in st.session_state:
372
+ st.session_state.files = []
373
 
374
  if "public_observations" not in st.session_state:
375
  st.session_state.public_observations = {}
src/input/input_validator.py CHANGED
@@ -22,7 +22,7 @@ def generate_random_md5(length:int=16) -> str:
22
  """
23
 
24
  # Generate a random string
25
- random_string = ''.join(random.choices(string.ascii_letters + string.digits, length=16))
26
  # Encode the string and compute its MD5 hash
27
  md5_hash = hashlib.md5(random_string.encode()).hexdigest()
28
  return md5_hash
 
22
  """
23
 
24
  # Generate a random string
25
+ random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=length))
26
  # Encode the string and compute its MD5 hash
27
  md5_hash = hashlib.md5(random_string.encode()).hexdigest()
28
  return md5_hash
src/main.py CHANGED
@@ -140,7 +140,7 @@ def main() -> None:
140
  # the goal of this tab is to allow selection of the new obsvation's location by map click/adjust.
141
  st.markdown("Coming later! :construction:")
142
  st.markdown(
143
- f"""*The goal is to allow interactive definition for the coordinates of a new
144
  observation, by click/drag points on the map.*""")
145
 
146
 
@@ -226,7 +226,8 @@ def main() -> None:
226
 
227
  if st.session_state.workflow_fsm.is_in_state('data_entry_validated'):
228
  # show the button, enabled. If pressed, we start the ML model (And advance state)
229
- if tab_inference.button("Identify with cetacean classifier"):
 
230
  cetacean_classifier = AutoModelForImageClassification.from_pretrained(
231
  "Saving-Willy/cetacean-classifier",
232
  revision=classifier_revision,
 
140
  # the goal of this tab is to allow selection of the new obsvation's location by map click/adjust.
141
  st.markdown("Coming later! :construction:")
142
  st.markdown(
143
+ """*The goal is to allow interactive definition for the coordinates of a new
144
  observation, by click/drag points on the map.*""")
145
 
146
 
 
226
 
227
  if st.session_state.workflow_fsm.is_in_state('data_entry_validated'):
228
  # show the button, enabled. If pressed, we start the ML model (And advance state)
229
+ if tab_inference.button("Identify with cetacean classifier",
230
+ key="button_infer_ceteans"):
231
  cetacean_classifier = AutoModelForImageClassification.from_pretrained(
232
  "Saving-Willy/cetacean-classifier",
233
  revision=classifier_revision,
src/maps/__init__.py ADDED
File without changes
src/utils/__init__.py ADDED
File without changes
tests/data/rand_images/img_000.jpg ADDED
tests/data/rand_images/img_001.jpg ADDED
tests/data/rand_images/img_002.jpg ADDED
tests/data/rand_images/img_003.jpg ADDED
tests/data/rand_images/img_004.jpg ADDED
tests/test_demo_input_sidebar.py ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from pathlib import Path
3
+ from io import BytesIO
4
+ from PIL import Image
5
+ import numpy as np
6
+
7
+ import pytest
8
+ from unittest.mock import MagicMock, patch
9
+ from streamlit.testing.v1 import AppTest
10
+ from datetime import datetime, timedelta
11
+ import time
12
+
13
+ from input.input_handling import spoof_metadata
14
+ from input.input_observation import InputObservation
15
+ from input.input_handling import buffer_uploaded_files
16
+
17
+ from streamlit.runtime.uploaded_file_manager import UploadedFile
18
+
19
+ from test_demo_multifile_upload import (
20
+ mock_uploadedFile_List_ImageData, mock_uploadedFile,
21
+ MockUploadedFile, )
22
+
23
+
24
+ # decorator that counts the number of times a function is called
25
+ def count_calls(func):
26
+ def wrapper(*args, **kwargs):
27
+ wrapper.called += 1
28
+ return func(*args, **kwargs)
29
+ wrapper.called = 0
30
+ return wrapper
31
+
32
+
33
+ @count_calls
34
+ def wrapped_buffer_uploaded_files(*args, **kwargs):
35
+ import streamlit as st
36
+ uploaded_files = st.session_state.file_uploader_data
37
+ _cprint(f"[I] buffering files in my side-effect! cool | {len(uploaded_files)}")
38
+ for i, (key, img) in enumerate(st.session_state.images.items()):
39
+ _cprint(f" - image {i}: {type(img)} [{key}]")
40
+
41
+ buffer_uploaded_files() # nowcall the real prod func
42
+ _cprint(f"[I] finished the real buffering ! cool | {len(uploaded_files)}")
43
+
44
+
45
+ @count_calls
46
+ def wrapped_buffer_uploaded_files_allowed_once(*args, **kwargs):
47
+ # this is a wrapper that only allows the real function to be called once
48
+ # - this is to prevent the side-effect from being called multiple times
49
+ # - the callback is only invoked when the input data is changed for the
50
+ # real file_uploader object; but due to the 're-run script on interaction'
51
+ # model, the side-effect is called each time.
52
+ import streamlit as st
53
+ uploaded_files = st.session_state.file_uploader_data
54
+ if len(st.session_state.images) != 0:
55
+ _cprint(f"[I] buffering already called before, side-effect! not rerun inner func | {len(uploaded_files)} | {len(st.session_state.images)}")
56
+ for i, (key, img) in enumerate(st.session_state.images.items()):
57
+ _cprint(f" - image {i}: {type(img)} [{key}]")
58
+ return
59
+
60
+ _cprint(f"[I] buffering files in my side-effect! cool | {len(uploaded_files)}")
61
+ for i, (key, img) in enumerate(st.session_state.images.items()):
62
+ _cprint(f" - image {i}: {type(img)} [{key}]")
63
+
64
+ buffer_uploaded_files() # nowcall the real prod func
65
+ _cprint(f"[I] finished the real buffering ! cool | {len(uploaded_files)}")
66
+
67
+
68
+
69
+ # - tests for apptest/demo_input_sidebar
70
+
71
+ # zero test: no inputs
72
+ # -> empty session state
73
+ # -> file_uploader with no files, ready to accept input
74
+ # -> a couple of containers
75
+ # -> not much on the main tab.
76
+
77
+ # many test: list of 2 inputs
78
+ # -> session state with 2 files
79
+ # -> file_uploader with 2 files, ready to accept more
80
+ # -> the metadata container will have two groups inside, with several input elements
81
+ # -> the main tab will have a couple of text_area elements showing the uploaded file metadata
82
+
83
+
84
+ OKBLUE = '\033[94m'
85
+ OKGREEN = '\033[92m'
86
+ OKCYAN = '\033[96m'
87
+ FAIL = '\033[91m'
88
+ PURPLE = '\033[35m'
89
+ ENDC = '\033[0m'
90
+
91
+ def _cprint(msg:str, color:str=OKCYAN):
92
+ print(f"{color}{msg}{ENDC}")
93
+
94
+
95
+ TIMEOUT = 10
96
+ #SCRIPT_UNDER_TEST = "src/main.py"
97
+ SCRIPT_UNDER_TEST = "src/apptest/demo_input_sidebar.py"
98
+
99
+ def verify_initial_session_state(at:AppTest):
100
+ # the initialised states we expect
101
+ # - container_file_uploader exists
102
+ # - container_metadata_inputs exists
103
+ # - observations {}
104
+ # - image_hashes []
105
+ # - images {}
106
+ # - files []
107
+ # - public_observations {}
108
+ assert at.session_state.observations == {}
109
+ assert at.session_state.image_hashes == []
110
+ assert at.session_state.images == {}
111
+ assert at.session_state.files == []
112
+ assert at.session_state.public_observations == {}
113
+ assert "container_file_uploader" in at.session_state
114
+ assert "container_metadata_inputs" in at.session_state
115
+
116
+ def verify_session_state_after_processing_files(at:AppTest, num_files:int):
117
+ # this is after buffering & metadata extraction, but *BEFORE* the ML is run.
118
+
119
+ # now we've processed the files and got metadata, we expect some
120
+ # changes in the elements in the session_state (x=same)
121
+ # x container_file_uploader exists
122
+ # x container_metadata_inputs exists
123
+ # - observations 2 elements, keys -> some hashes. values: InputObservation objects
124
+ # - image_hashes 2 elements, hashes (str) |
125
+ # - images {} 2 elements, keys -> hashes, values -> np.ndarray.
126
+ # - files [] a list of 2 MockUploadedFile objects
127
+ # x public_observations {}
128
+ # I think just verify the sizes and types, we could do a data integrity
129
+ # check on the hashes matching everywhere, but that is far from visual.
130
+
131
+ assert len(at.session_state.observations) == num_files
132
+ for obs in at.session_state.observations.values():
133
+ assert isinstance(obs, InputObservation)
134
+ assert len(at.session_state.image_hashes) == num_files
135
+ for hash in at.session_state.image_hashes:
136
+ assert isinstance(hash, str)
137
+ assert len(at.session_state.images) == num_files
138
+ for img in at.session_state.images.values():
139
+ assert isinstance(img, np.ndarray)
140
+ assert len(at.session_state.image_hashes) == num_files
141
+ for hash in at.session_state.image_hashes:
142
+ assert isinstance(hash, str)
143
+ assert len(at.session_state.files) == num_files
144
+ for file in at.session_state.files:
145
+ assert isinstance(file, MockUploadedFile)
146
+ assert isinstance(file, BytesIO) # cool it looks like the FileUploader.
147
+ #assert isinstance(file, UploadedFile) no... it isn't but bytesIO is the parent class
148
+
149
+ assert at.session_state.public_observations == {}
150
+
151
+ def verify_metadata_in_demo_display(at:AppTest, num_files:int):
152
+ # we can check the metadata display in the main area
153
+ # - this presentation is not part of the normal app, but is a test-only feature
154
+
155
+ if 'src/main.py' in SCRIPT_UNDER_TEST:
156
+ raise ValueError("This test is not valid for the main app, only for unit/component test snippets")
157
+
158
+ # finally we can check the main area, where the metadata is displayed
159
+ # since we uplaoded num_files files, hopefully we get num_files text areas
160
+ assert len(at.text_area) == num_files
161
+ # expecting
162
+ exp0 = "index: 0, name: cakes.jpg, datetime: 2024:10:24 15:59:45, lat: 46.51860277777778, lon:6.562075"
163
+ exp1 = "index: 1, name: cakes_no_exif_datetime.jpg, datetime: None, lat: 46.51860277777778, lon:6.562075"
164
+ exp2 = "index: 2, name: cakes_no_exif_gps.jpg, datetime: 2024:10:24 15:59:45, lat: None, lon:None"
165
+
166
+ assert at.text_area[0].value == exp0
167
+ assert at.text_area[1].value == exp1
168
+ if num_files >= 1:
169
+ assert at.text_area(key='metadata_0').value == exp0
170
+ if num_files >= 2:
171
+ assert at.text_area(key='metadata_1').value == exp1
172
+ if num_files >= 3:
173
+ assert at.text_area(key='metadata_2').value == exp2
174
+
175
+ @pytest.mark.component
176
+ def test_no_input_no_interaction():
177
+
178
+ # zero test: no inputs
179
+ # -> empty session state (ok many initialised, but empty data)
180
+ # -> file_uploader with no files, ready to accept input
181
+ # -> a couple of containers
182
+ # -> not much on the main tab.
183
+
184
+ at = AppTest.from_file(SCRIPT_UNDER_TEST, default_timeout=10).run()
185
+ verify_initial_session_state(at)
186
+
187
+ assert at.session_state.input_author_email == spoof_metadata.get("author_email")
188
+
189
+ # print (f"[I] whole tree: {at._tree}")
190
+ # for elem in at.sidebar.markdown:
191
+ # print("\t", elem.value)
192
+
193
+ # do some basic checks on what is present in the sidebar
194
+ assert len(at.sidebar.divider) == 1
195
+
196
+ # in the sidebar, we have the progress indicator, then the fileuploader and metadata inputs
197
+ # - annoyingly we can't use keys for markdown.
198
+ # - so we are sensitive to the order.
199
+ # - we could grab all the text, and just be content with presence of the target strings
200
+ # anywhere in the sidebar? that would be more robust at least.
201
+ assert "Progress: 0/5" in at.sidebar.markdown[0].value
202
+ assert "st-key-container_file_uploader_id" in at.sidebar.markdown[1].value
203
+ assert "st-key-container_metadata_inputs_id" in at.sidebar.markdown[2].value
204
+ assert "Metadata Inputs... wait for file upload" in at.sidebar.markdown[3].value
205
+
206
+ # there should be 1 input, for the author_email, in this path (no files uploaded)
207
+ assert len(at.sidebar.text_input) == 1
208
+
209
+ # can't check for the presence of containers (they are st.Block elements in the tree)
210
+ # - no way to access the list of them, nor by key/id. nor by getter (unlike
211
+ # images which seem to have an undocumented accessor, "imgs")
212
+ # best we can do is check that the session state ids exist, which is really basic but ok
213
+ assert "container_file_uploader" in at.session_state
214
+ assert "container_metadata_inputs" in at.session_state
215
+ # wow, the keys defined in the constructor are not honoured in session_state, unlike with
216
+ # the text_input elements.
217
+ # code init -- st.container(border=True, key="container_file_uploader_id")
218
+ # so skip these ones for now.
219
+ # assert "container_file_uploader_id" in at.session_state
220
+ # assert "container_metadata_inputs_id" in at.session_state
221
+
222
+ @pytest.mark.component
223
+ @patch("streamlit.file_uploader")
224
+ def test_two_input_files_realdata(mock_file_rv: MagicMock, mock_uploadedFile_List_ImageData):
225
+ # many test: list of 2 inputs
226
+ # -> session state with 2 files
227
+ # -> file_uploader with 2 files, ready to accept more
228
+ # -> the metadata container will have two groups inside, with several input elements
229
+ # -> the main tab will have a couple of text_area elements showing the uploaded file metadata
230
+
231
+
232
+ # Create a list of 2 mock files
233
+ num_files = 2
234
+ mock_files = mock_uploadedFile_List_ImageData(num_files=num_files)
235
+
236
+ # Set the return value of the mocked file_uploader to the list of mock files
237
+ mock_file_rv.return_value = mock_files
238
+
239
+ # Run the Streamlit app
240
+ at = AppTest.from_file(SCRIPT_UNDER_TEST, default_timeout=TIMEOUT).run()
241
+ verify_initial_session_state(at)
242
+
243
+ # put the mocked file_upload into session state, as if it were the result of a file upload, with the key 'file_uploader_data'
244
+ at.session_state["file_uploader_data"] = mock_files
245
+ # the side effect cant run until now (need file_uploader_data to be set)
246
+ mock_file_rv.side_effect = wrapped_buffer_uploaded_files
247
+
248
+ print(f"[I] session state: {at.session_state}")
249
+ at.run()
250
+ print(f"[I] session state: {at.session_state}")
251
+ print(f"full tree: {at._tree}")
252
+
253
+ verify_session_state_after_processing_files(at, num_files)
254
+
255
+ # and then there are plenty of visual elements, based on the image hashes.
256
+ for hash in at.session_state.image_hashes:
257
+ # check that each of the 4 inputs is present
258
+ assert at.sidebar.text_input(key=f"input_latitude_{hash}") is not None
259
+ assert at.sidebar.text_input(key=f"input_longitude_{hash}") is not None
260
+ assert at.sidebar.date_input(key=f"input_date_{hash}") is not None
261
+ assert at.sidebar.time_input(key=f"input_time_{hash}") is not None
262
+
263
+ if 'demo_input_sidebar' in SCRIPT_UNDER_TEST:
264
+ verify_metadata_in_demo_display(at, num_files)
tests/test_demo_multifile_upload.py ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Protocol, runtime_checkable
2
+
3
+ from pathlib import Path
4
+ from io import BytesIO
5
+ from PIL import Image
6
+
7
+ import pytest
8
+ from unittest.mock import MagicMock, patch
9
+ from streamlit.testing.v1 import AppTest
10
+
11
+ # tests for apptest/demo_multifile_upload
12
+ # - the functionality in the test harness is a file_uploader that is configured
13
+ # for multi-file input; and uses a callback to buffer the files into session state.
14
+ # - the handling of individual files includes extracting metadata from the files
15
+ # - a text_area is created for each file, to display the metadata extracted;
16
+ # this deviates from the presentation in the real app, but the extracted info
17
+ # is the same (here we put it all in text which is far easier to validate using AppTest)
18
+ # - the demo also has the author email input
19
+
20
+
21
+ # zero test: no inputs -> empty session state
22
+ # (or maybe even non-existent session state; for file_uploader we are not
23
+ # allowed to initialise the keyed variable, st borks)
24
+
25
+ # many test: list of >=2 inputs -> session state with 2 files
26
+
27
+
28
+ # for expectations
29
+ from input.input_handling import spoof_metadata
30
+ from input.input_validator import get_image_datetime, get_image_latlon
31
+
32
+
33
+ @runtime_checkable
34
+ class UploadedFile(Protocol):
35
+ name: str
36
+ size: int
37
+ type: str
38
+ #RANDO: str
39
+ _file_urls: list
40
+
41
+ def getvalue(self) -> bytes: ...
42
+ def read(self) -> bytes: ...
43
+
44
+
45
+ class MockUploadedFile(BytesIO):
46
+ def __init__(self,
47
+ initial_bytes: bytes,
48
+ *,
49
+ name: str,
50
+ size: int,
51
+ type: str):
52
+ super().__init__(initial_bytes)
53
+ self.name = name # Simulate a filename
54
+ self.size = size # Simulate file size
55
+ self.type = type # Simulate MIME type
56
+ self.file_id = None
57
+
58
+
59
+ @pytest.fixture
60
+ def mock_uploadedFile():
61
+ def _mock_uploadedFile(name: str, size: int, type: str):
62
+ test_data = b'test data'
63
+ # now load some real data, if fname exists
64
+ base = Path(__file__).parent.parent
65
+ fname = Path(base / f"tests/data/{name}")
66
+
67
+ if fname.exists():
68
+ with open(fname, 'rb') as f:
69
+ #test_data = BytesIO(f.read())
70
+ test_data = f.read()
71
+ else:
72
+ #print(f"[DDDD] {name}, {size}, {type} not found")
73
+ raise FileNotFoundError(f"file {fname} not found ({name}, {size}, {type})")
74
+
75
+ return MockUploadedFile(
76
+ test_data, name=name, size=size, type=type,)
77
+
78
+ return _mock_uploadedFile
79
+
80
+
81
+ @pytest.fixture
82
+ def mock_uploadedFileNoRealData():
83
+ class MockGUIClassFakeData(MagicMock):
84
+ def __init__(self, *args, **kwargs):
85
+ super().__init__(*args, **kwargs)
86
+ name = kwargs.get('fname', 'image2.jpg')
87
+ size = kwargs.get('size', 123456)
88
+ type = kwargs.get('type', 'image/jpeg')
89
+ self.bytes_io = MockUploadedFile(
90
+ b"test data", name=name, size=size, type=type)
91
+ self.get_data = MagicMock(return_value=self.bytes_io)
92
+ # it seems unclear to me which member attributes get set by the MockUploadedFile constructor
93
+ # - for some reason, size and type get set, but name does not, and results in
94
+ # <MockGUIClass name='mock.name' id='<12345>'>.
95
+ # so let's sjust explicitly set all the relevant attributes here.
96
+ self.name = name
97
+ self.size = size
98
+ self.type = type
99
+
100
+ return MockGUIClassFakeData
101
+
102
+ @pytest.fixture
103
+ def mock_uploadedFile_List(mock_uploadedFileNoRealData):
104
+ def create_list_of_mocks(num_files=3, **kwargs):
105
+ return [mock_uploadedFileNoRealData(**kwargs) for _ in range(num_files)]
106
+ return create_list_of_mocks
107
+
108
+ @pytest.fixture
109
+ def mock_uploadedFile_List_ImageData(mock_uploadedFile):
110
+ def create_list_of_mocks_realdata(num_files=3, **kwargs):
111
+ print(f"[D] [mock_uploadedFile_List_Img-internal] num_files: {num_files}")
112
+ data = [
113
+ {"name": "cakes.jpg", "size": 1234, "type": "image/jpeg"},
114
+ {"name": "cakes_no_exif_datetime.jpg", "size": 12345, "type": "image/jpeg"},
115
+ {"name": "cakes_no_exif_gps.jpg", "size": 123456, "type": "image/jpeg"},
116
+ ]
117
+
118
+ _the_files = []
119
+ for i in range(num_files):
120
+ _the_files.append( mock_uploadedFile(**data[i]))
121
+
122
+ print(f"========== finished init of {num_files} mock_uploaded files | {len(_the_files)} ==========")
123
+ return _the_files
124
+
125
+ #return [mock_uploadedFile(**kwargs) for _ in range(num_files)]
126
+ return create_list_of_mocks_realdata
127
+
128
+
129
+ # simple tests on the author email input via AppTest
130
+ # - empty input should propagate to session state
131
+ # - invalid email should trigger an error
132
+ def test_no_input_no_interaction():
133
+ with patch.dict(spoof_metadata, {"author_email": None}):
134
+ at = AppTest.from_file("src/apptest/demo_multifile_upload.py").run()
135
+ assert at.session_state.observations == {}
136
+ assert at.session_state.input_author_email == None
137
+
138
+ at = AppTest.from_file("src/apptest/demo_multifile_upload.py").run()
139
+ assert at.session_state.observations == {}
140
+ assert at.session_state.input_author_email == spoof_metadata.get("author_email")
141
+
142
+ def test_bad_email():
143
+ with patch.dict(spoof_metadata, {"author_email": "notanemail"}):
144
+ at = AppTest.from_file("src/apptest/demo_multifile_upload.py").run()
145
+ assert at.session_state.input_author_email == "notanemail"
146
+ assert at.error[0].value == "Please enter a valid email address."
147
+
148
+
149
+ # test when we load real data files, with all properties as per real app
150
+ # - if files loaded correctly and metadata is extracted correctly, we should see the
151
+ # the data in both the session state and in the visual elements.
152
+ @patch("streamlit.file_uploader")
153
+ def test_mockupload_list_realdata(mock_file_rv: MagicMock, mock_uploadedFile_List_ImageData):
154
+ #def test_mockupload_list(mock_file_uploader_rtn: MagicMock, mock_uploadedFile_List):
155
+ num_files = 3
156
+ PRINT_PROPS = False
157
+ # Create a list of n mock files
158
+ mock_files = mock_uploadedFile_List_ImageData(num_files=num_files)
159
+
160
+ # Set the return value of the mocked file_uploader to the list of mock files
161
+ mock_file_rv.return_value = mock_files
162
+
163
+ # Run the Streamlit app
164
+ at = AppTest.from_file("src/apptest/demo_multifile_upload.py").run()
165
+
166
+ # put the mocked file_upload into session state, as if it were the result of a file upload, with the key 'file_uploader_data'
167
+ at.session_state["file_uploader_data"] = mock_files
168
+
169
+ #print(f"[I] session state: {at.session_state}")
170
+ #print(f"[I] uploaded files: {at.session_state.file_uploader_data}")
171
+
172
+ if PRINT_PROPS:
173
+ print(f"[I] uploaded files: ({len(at.session_state.file_uploader_data)}) {at.session_state.file_uploader_data}")
174
+ for _f in at.session_state.file_uploader_data:
175
+ #print(f"\t[I] props: {dir(_f)}")
176
+ print(f" [I] name: {_f.name}")
177
+ print(f"\t[I] size: {_f.size}")
178
+ print(f"\t[I] type: {_f.type}")
179
+ # lets make an image from the data
180
+ im = Image.open(_f)
181
+
182
+ # lets see what metadata we can get to.
183
+ dt = get_image_datetime(_f)
184
+ print(f"\t[I] datetime: {dt}")
185
+ lat, lon = get_image_latlon(_f)
186
+ print(f"\t[I] lat, lon: {lat}, {lon}")
187
+
188
+
189
+ # we expect to get the following info from the files
190
+ # file1:
191
+ # datetime: 2024:10:24 15:59:45
192
+ # lat, lon: 46.51860277777778, 6.562075
193
+ # file2:
194
+ # datetime: None
195
+ # lat, lon: 46.51860277777778, 6.562075
196
+
197
+ # let's run assertions on the backend data (session_state)
198
+ # and then on the front end too (visual elements)
199
+ f1 = at.session_state.file_uploader_data[0]
200
+ f2 = at.session_state.file_uploader_data[1]
201
+
202
+ assert get_image_datetime(f1) == "2024:10:24 15:59:45"
203
+ assert get_image_datetime(f2) == None
204
+ # use a tolerance of 1e-6, assert that the lat, lon is close to 46.5186
205
+ assert abs(get_image_latlon(f1)[0] - 46.51860277777778) < 1e-6
206
+ assert abs(get_image_latlon(f1)[1] - 6.562075) < 1e-6
207
+ assert abs(get_image_latlon(f2)[0] - 46.51860277777778) < 1e-6
208
+ assert abs(get_image_latlon(f2)[1] - 6.562075) < 1e-6
209
+
210
+ # need to run the script top-to-bottom to get the text_area elements
211
+ # since they are dynamically created.
212
+ at.run()
213
+
214
+ # since we uplaoded num_files files, hopefully we get num_files text areas
215
+ assert len(at.text_area) == num_files
216
+ # expecting
217
+ exp0 = "index: 0, name: cakes.jpg, datetime: 2024:10:24 15:59:45, lat: 46.51860277777778, lon:6.562075"
218
+ exp1 = "index: 1, name: cakes_no_exif_datetime.jpg, datetime: None, lat: 46.51860277777778, lon:6.562075"
219
+ exp2 = "index: 2, name: cakes_no_exif_gps.jpg, datetime: 2024:10:24 15:59:45, lat: None, lon:None"
220
+
221
+ assert at.text_area[0].value == exp0
222
+ assert at.text_area[1].value == exp1
223
+ if num_files >= 1:
224
+ assert at.text_area(key='metadata_0').value == exp0
225
+ if num_files >= 2:
226
+ assert at.text_area(key='metadata_1').value == exp1
227
+ if num_files >= 3:
228
+ assert at.text_area(key='metadata_2').value == exp2
229
+
230
+ # {"fname": "cakes.jpg", "size": 1234, "type": "image/jpeg"},
231
+ # {"fname": "cakes_no_exif_datetime.jpg", "size": 12345, "type": "image/jpeg"},
232
+ # {"fname": "cakes_no_exif_gps.jpg", "size": 123456, "type": "image/jpeg"},
233
+ #]
234
+
235
+
236
+ # Verify the behavior in your app
237
+ assert len(at.session_state.file_uploader_data) == num_files
238
+
239
+ assert at.session_state.file_uploader_data[0].size == 1234 # Check properties of the first file
240
+ assert at.session_state.file_uploader_data[1].name == "cakes_no_exif_datetime.jpg"
241
+
242
+
243
+ # this test was a stepping stone; when I was mocking files that didn't have any real data
244
+ # - it helped to explore how properties should be set in the mock object and generator funcs.
245
+ @patch("streamlit.file_uploader")
246
+ def test_mockupload_list(mock_file_uploader_rtn: MagicMock, mock_uploadedFile_List):
247
+ # Create a list of 2 mock files
248
+ mock_files = mock_uploadedFile_List(num_files=2, fname="test.jpg", size=100, type="image/jpeg")
249
+
250
+ # Set the return value of the mocked file_uploader to the list of mock files
251
+ mock_file_uploader_rtn.return_value = mock_files
252
+
253
+ # Run the Streamlit app
254
+ at = AppTest.from_file("src/apptest/demo_multifile_upload.py").run()
255
+
256
+ # put the mocked file_upload into session state, as if it were the result of a file upload, with the key 'file_uploader_data'
257
+ at.session_state["file_uploader_data"] = mock_files
258
+
259
+ #print(f"[I] session state: {at.session_state}")
260
+ #print(f"[I] uploaded files: {at.session_state.file_uploader_data}")
261
+
262
+ if 1:
263
+ print(f"[I] uploaded files: {at.session_state.file_uploader_data}")
264
+ for _f in at.session_state.file_uploader_data:
265
+ print(f"[I] props: {dir(_f)}")
266
+ print(f"[I] name: {_f.name}")
267
+ print(f"[I] size: {_f.size}")
268
+ print(f"[I] type: {_f.type}")
269
+ print(f"[I] data : {type(_f)} | {type(_f.return_value)} | {_f}")
270
+ # lets make an image from it.
271
+ #im = Image.open(_f)
272
+
273
+
274
+
275
+
276
+
277
+ # Verify behavior in the app
278
+ assert len(at.session_state.file_uploader_data) == 2
279
+
280
+ assert at.session_state.file_uploader_data[0].size == 100 # Check properties of the first file
281
+ assert at.session_state.file_uploader_data[1].name == "test.jpg" # Check properties of the second file
282
+
tests/test_input_observation.py ADDED
@@ -0,0 +1,422 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Protocol, runtime_checkable
2
+ import pytest
3
+ from unittest.mock import MagicMock, patch
4
+
5
+ from io import BytesIO
6
+ #from PIL import Image
7
+ import datetime
8
+ import numpy as np
9
+
10
+ #from streamlit.runtime.uploaded_file_manager import UploadedFile # for type hinting
11
+ #from typing import List, Union
12
+
13
+ from input.input_observation import InputObservation
14
+
15
+ @runtime_checkable
16
+ class UploadedFile(Protocol):
17
+ name: str
18
+ size: int
19
+ type: str
20
+ _file_urls: list
21
+
22
+ def getvalue(self) -> bytes: ...
23
+ def read(self) -> bytes: ...
24
+
25
+
26
+ class MockUploadedFile(BytesIO):
27
+ def __init__(self,
28
+ initial_bytes: bytes,
29
+ *, # enforce keyword-only arguments after now
30
+ name:str,
31
+ size:int,
32
+ type:str):
33
+ #super().__init__(*args, **kwargs)
34
+ super().__init__(initial_bytes)
35
+ self.name = name
36
+ self.size = size
37
+ self.type = type
38
+
39
+ self._file_urls = [None,]
40
+
41
+
42
+ @pytest.fixture
43
+ def mock_uploadedFile():
44
+ class MockGUIClass(MagicMock):
45
+ def __init__(self, *args, **kwargs):
46
+ super().__init__(*args, **kwargs)
47
+ name = kwargs.get('name', 'image2.jpg')
48
+ size = kwargs.get('size', 123456)
49
+ type = kwargs.get('type', 'image/jpeg')
50
+ self.bytes_io = MockUploadedFile(
51
+ b"test data", name=name, size=size, type=type)
52
+ self.get_data = MagicMock(return_value=self.bytes_io)
53
+ return MockGUIClass
54
+
55
+
56
+ # let's first generate a test for the mock_uploaded_file and MockUploadedFile class
57
+ # - test with valid input
58
+ def test_mock_uploaded_file(mock_uploadedFile):
59
+ # setup values for the test (all valid)
60
+ image_name = "test_image.jpg"
61
+ mock_file = mock_uploadedFile(name=image_name).get_data()
62
+
63
+ #print(dir(mock_file))
64
+ assert isinstance(mock_file, BytesIO)
65
+
66
+ assert mock_file.name == image_name
67
+ assert mock_file.size == 123456
68
+ assert mock_file.type == "image/jpeg"
69
+
70
+
71
+ # now we move on to test the class InputObservation
72
+ # - with valid input
73
+ # - with invalid input
74
+ # - with missing input
75
+
76
+ def test_input_observation_valid(mock_uploadedFile):
77
+ # image: ndarray
78
+ # lat, lon: float
79
+ # author_email: str
80
+ # date, time: datetime.date, datetime.time
81
+ #uploaded_file: UploadedFile (need to mock this)
82
+ # image_md5: str
83
+
84
+ # setup values for the test (all valid)
85
+
86
+ author_email = "[email protected]"
87
+ image_name = "test_image.jpg"
88
+ mock_file = mock_uploadedFile(name=image_name).get_data()
89
+
90
+ _date="2023-10-10"
91
+ _time="10:10:10"
92
+ image_datetime_raw = _date + " " + _time
93
+ dt = datetime.datetime.strptime(image_datetime_raw, "%Y-%m-%d %H:%M:%S")
94
+ date = dt.date()
95
+ time = dt.time()
96
+
97
+ ## make a random image with dtype uint8 using np.random.randint
98
+ image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
99
+ image_md5 = 'd1d2515e6f6ac4c5ca6dd739d5143cd4' # 32 hex chars.
100
+
101
+ obs = InputObservation(
102
+ image=image,
103
+ latitude=12.34, longitude=56.78, author_email=author_email,
104
+ time=time, date=date,
105
+ uploaded_file=mock_file,
106
+ image_md5=image_md5,
107
+ )
108
+
109
+ assert isinstance(obs.image, np.ndarray)
110
+ assert (obs.image == image).all()
111
+
112
+ assert obs.latitude == 12.34
113
+ assert obs.longitude == 56.78
114
+ assert obs.author_email == author_email
115
+ assert isinstance(obs.date, datetime.date)
116
+ assert isinstance(obs.time, datetime.time)
117
+ assert str(obs.date) == "2023-10-10"
118
+ assert str(obs.time) == "10:10:10"
119
+
120
+ assert obs.uploaded_file.name == image_name
121
+ assert obs.uploaded_file.size == 123456
122
+ assert obs.uploaded_file.type == "image/jpeg"
123
+
124
+ assert isinstance(obs.uploaded_file, BytesIO)
125
+ #assert isinstance(obs.uploaded_file, MockUploadedFile) # is there any point in checking the type of the mock, ?
126
+
127
+
128
+ # a list of tuples (strings that are the keys of "valid_inputs", expected error type)
129
+ # loop over the list, and for each tuple, create a dictionary with all valid inputs, and one invalid input
130
+ # assert that the function raises the expected error type
131
+
132
+ invalid_input_scenarios = [
133
+ ("author_email", TypeError),
134
+ ("image_name", TypeError),
135
+ ("uploaded_file", TypeError),
136
+ ("date", TypeError),
137
+ ("time", TypeError),
138
+ ("image", TypeError),
139
+ ("image_md5", TypeError),
140
+ ]
141
+
142
+ @pytest.mark.parametrize("key, error_type", invalid_input_scenarios)
143
+ def test_input_observation_invalid(key, error_type, mock_uploadedFile):
144
+ # correct datatypes are:
145
+ # - image: ndarray
146
+ # - lat, lon: float
147
+ # - author_email: str
148
+ # - date, time: datetime.date, datetime.time
149
+ # - uploaded_file: UploadedFile (need to mock this)
150
+ # - image_md5: str
151
+
152
+ # the most critical/likely to go wrong would presumably be
153
+ # - date, time (strings not datetime objects)
154
+ # - lat, lon (strings not numbers)
155
+ # - image (not ndarray, maybe accidentally a PIL object or maybe the filename)
156
+ # - uploaded_file (not UploadedFile, maybe a string, or maybe the ndarray)
157
+
158
+ # check it fails when any of the datatypes are wrong,
159
+ # even if the rest are all good want to loop over the inputs, take each one
160
+ # from a bad list, and all others from a good list, and assert fails for
161
+ # each one
162
+
163
+ # set up the good and bad inputs
164
+ _date="2023-10-10"
165
+ _time="10:10:10"
166
+ image_datetime_raw = _date + " " + _time
167
+ fname = "test_image.jpg"
168
+ image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
169
+
170
+ dt_ok = datetime.datetime.strptime(image_datetime_raw, "%Y-%m-%d %H:%M:%S")
171
+ valid_inputs = {
172
+ "author_email": "[email protected]",
173
+ "image_name": "test_image.jpg",
174
+ "uploaded_file": mock_uploadedFile(name=fname).get_data(),
175
+ "date": dt_ok.date(),
176
+ "time": dt_ok.time(),
177
+ "image": image,
178
+ "image_md5": 'd1d2515e6f6ac4c5ca6dd739d5143cd4', # 32 hex chars.
179
+ }
180
+ invalid_inputs = {
181
+ "author_email": "@example",
182
+ "image_name": 45,
183
+ "uploaded_file": image,
184
+ "date": _date,
185
+ "time": _time,
186
+ "image": fname,
187
+ "image_md5": 45643
188
+ }
189
+
190
+ # test a valid set of inputs, minus the target key, substituted for something invalid
191
+ inputs = valid_inputs.copy()
192
+ inputs[key] = invalid_inputs[key]
193
+
194
+ with pytest.raises(error_type):
195
+ obs = InputObservation(**inputs)
196
+
197
+ # now test the same key set to None
198
+ inputs = valid_inputs.copy()
199
+ inputs[key] = None
200
+ with pytest.raises(error_type):
201
+ obs = InputObservation(**inputs)
202
+
203
+
204
+ # we can take a similar approach to test equality.
205
+ # here, construct two dicts, each with valid inputs but all elements different.
206
+ # loop over the keys, and construct two InputObservations that differ on that key only.
207
+ # asser the expected output message.
208
+ # ah, it is the diff func that prints a message. Here we just assert boolean.
209
+
210
+ # we currently expect differences on time to be ignored.
211
+ inequality_keys = [
212
+ ("author_email", False),
213
+ ("uploaded_file", False),
214
+ ("date", False),
215
+ #("time", True),
216
+ pytest.param("time", False, marks=pytest.mark.xfail(reason="Time is currently ignored in __eq__")),
217
+ ("image", False),
218
+ ("image_md5", False),
219
+ ]
220
+ @pytest.mark.parametrize("key, expect_equality", inequality_keys)
221
+ def test_input_observation_equality(key, expect_equality, mock_uploadedFile):
222
+
223
+ # set up the two sets of good inputs
224
+ _date1 = "2023-10-10"
225
+ _time1 = "10:10:10"
226
+ image_datetime_raw1 = _date1 + " " + _time1
227
+ fname1 = "test_image.jpg"
228
+ image1 = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
229
+ dt1 = datetime.datetime.strptime(image_datetime_raw1, "%Y-%m-%d %H:%M:%S")
230
+
231
+ _date2 = "2023-10-11"
232
+ _time2 = "12:13:14"
233
+ image_datetime_raw2 = _date2 + " " + _time2
234
+ fname2 = "test_image.jpg"
235
+ image2 = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
236
+ dt2 = datetime.datetime.strptime(image_datetime_raw2, "%Y-%m-%d %H:%M:%S")
237
+ valid_inputs1 = {
238
+ "author_email": "[email protected]",
239
+ #"image_name": "test_image.jpg",
240
+ "uploaded_file": mock_uploadedFile(name=fname1).get_data(),
241
+ "date": dt1.date(),
242
+ "time": dt1.time(),
243
+ "image": image1,
244
+ "image_md5": 'd1d2515e6f6ac4c5ca6dd739d5143cd4', # 32 hex chars.
245
+ }
246
+
247
+ valid_inputs2 = {
248
+ "author_email": "[email protected]",
249
+ #"image_name": "another.jpg",
250
+ "uploaded_file": mock_uploadedFile(name=fname2).get_data(),
251
+ "date": dt2.date(),
252
+ "time": dt2.time(),
253
+ "image": image2,
254
+ "image_md5": 'cdb235587bdee5915d6ccfa52ca9f3ac', # 32 hex chars.
255
+ }
256
+
257
+ nearly_same_inputs = valid_inputs1.copy()
258
+ nearly_same_inputs[key] = valid_inputs2[key]
259
+ obs1 = InputObservation(**valid_inputs1)
260
+ obs2 = InputObservation(**nearly_same_inputs)
261
+
262
+ if expect_equality is True:
263
+ assert obs1 == obs2
264
+ else:
265
+ assert obs1 != obs2
266
+
267
+
268
+ # now let's test the setter methods (set_top_predictions, set_selected_class, set_class_overriden)
269
+ # ideally we get a fixture that produces a good / valid InputObservation object
270
+ # and from there, just test the setters + their expected changes / side effects
271
+
272
+ @pytest.fixture
273
+ def good_datadict_for_input_observation(mock_uploadedFile) -> dict:
274
+ # set up the good and bad inputs
275
+ _date="2023-10-10"
276
+ _time="10:10:10"
277
+ image_datetime_raw = _date + " " + _time
278
+ fname = "test_image.jpg"
279
+ image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
280
+
281
+ dt_ok = datetime.datetime.strptime(image_datetime_raw, "%Y-%m-%d %H:%M:%S")
282
+ valid_inputs = {
283
+ "author_email": "[email protected]",
284
+ "uploaded_file": mock_uploadedFile(name=fname).get_data(),
285
+ "date": dt_ok.date(),
286
+ "time": dt_ok.time(),
287
+ "image": image,
288
+ "image_md5": 'd1d2515e6f6ac4c5ca6dd739d5143cd4', # 32 hex chars.
289
+ "image_datetime_raw": image_datetime_raw,
290
+ "latitude": 12.34,
291
+ "longitude": 56.78,
292
+
293
+ }
294
+ return valid_inputs
295
+
296
+
297
+ @pytest.fixture
298
+ def good_input_observation(good_datadict_for_input_observation) -> InputObservation:
299
+ observation = InputObservation(**good_datadict_for_input_observation)
300
+
301
+ return observation
302
+
303
+
304
+ #
305
+ def test_input_observation__set_top_predictions_populated(good_input_observation):
306
+ obs = good_input_observation
307
+
308
+ # before setting, expect empty list
309
+ assert obs.top_predictions == []
310
+ assert obs.selected_class == None
311
+
312
+ # set >0,
313
+ # - expect to find the same list in the property/attribute
314
+ # - expect to find the first element in the selected_class
315
+ top_predictions = ["beluga", "blue_whale", "common_dolphin"]
316
+ obs.set_top_predictions(top_predictions)
317
+
318
+ assert len(obs.top_predictions) == 3
319
+ assert obs.top_predictions == top_predictions
320
+ assert obs.selected_class == "beluga"
321
+
322
+ def test_input_observation__set_top_predictions_unpopulated(good_input_observation):
323
+ obs = good_input_observation
324
+
325
+ # before setting, expect empty list
326
+ assert obs.top_predictions == []
327
+ assert obs.selected_class == None
328
+
329
+ # set to empty list,
330
+ # - expect to find the same list in the property/attribute
331
+ # - expect to find selected_class to be None
332
+ top_predictions = []
333
+ obs.set_top_predictions(top_predictions)
334
+
335
+ assert len(obs.top_predictions) == 0
336
+ assert obs.top_predictions == []
337
+ assert obs.selected_class == None
338
+
339
+ def test_input_observation__set_selected_class_default(good_input_observation):
340
+ obs = good_input_observation
341
+
342
+ # before setting, expect empty list
343
+ assert obs.top_predictions == []
344
+ assert obs.selected_class == None
345
+ assert obs.class_overriden == False
346
+
347
+ # set >0, and then set_selected_class to the first element
348
+ # - expect to find the same list in the property/attribute
349
+ # - expect to find the first element in the selected_class
350
+ # - expect class_overriden to be False
351
+ top_predictions = ["beluga", "blue_whale", "common_dolphin"]
352
+ obs.set_top_predictions(top_predictions)
353
+ obs.set_selected_class(top_predictions[0])
354
+
355
+ assert len(obs.top_predictions) == 3
356
+ assert obs.top_predictions == top_predictions
357
+ assert obs.selected_class == "beluga"
358
+
359
+ def test_input_observation__set_selected_class_override(good_input_observation):
360
+ obs = good_input_observation
361
+
362
+ # before setting, expect empty list
363
+ assert obs.top_predictions == []
364
+ assert obs.selected_class == None
365
+ assert obs.class_overriden == False
366
+
367
+ # set >0, and then set_selected_class to something out of list
368
+ # - expect to find the same list in the property/attribute
369
+ # - expect to find the first element in the selected_class
370
+ # - expect class_overriden to be False
371
+ top_predictions = ["beluga", "blue_whale", "common_dolphin"]
372
+ obs.set_top_predictions(top_predictions)
373
+ obs.set_selected_class("brydes_whale")
374
+
375
+ assert len(obs.top_predictions) == 3
376
+ assert obs.top_predictions == top_predictions
377
+ assert obs.selected_class == "brydes_whale"
378
+ assert obs.class_overriden == True
379
+
380
+
381
+ # now we want to test to_dict, make sure it is compliant with the data to be
382
+ # transmitted to the dataset/server
383
+
384
+ def test_input_observation_to_dict(good_datadict_for_input_observation):
385
+ obs = InputObservation(**good_datadict_for_input_observation)
386
+
387
+ # set >0, and then set_selected_class to something out of list
388
+ # - expect to find the same list in the property/attribute
389
+ # - expect to find the first element in the selected_class
390
+ # - expect class_overriden to be False
391
+ top_predictions = ["beluga", "blue_whale", "common_dolphin"]
392
+ selected = "brydes_whale"
393
+ obs.set_top_predictions(top_predictions)
394
+ obs.set_selected_class(selected)
395
+
396
+ # as a first point, we expect the dict to be like the input dict...
397
+ expected_output = good_datadict_for_input_observation.copy()
398
+ # ... with a few changes
399
+ # - date and time get converted to str(date) str(time)
400
+ expected_output["date"] = str(expected_output["date"])
401
+ expected_output["time"] = str(expected_output["time"])
402
+ # - image_filename comes from uploaded_file.name
403
+ expected_output["image_filename"] = expected_output["uploaded_file"].name
404
+ # - uploaded_file and image are not in the transmitted data
405
+ del expected_output["uploaded_file"]
406
+ del expected_output["image"]
407
+ # - the classification results should be as set above
408
+ expected_output["top_prediction"] = top_predictions[0]
409
+ expected_output["selected_class"] = selected
410
+ expected_output["class_overriden"] = True
411
+
412
+ print(obs.to_dict())
413
+ assert obs.to_dict() == expected_output
414
+
415
+ # expected = {
416
+ # 'image_filename': 'test_image.jpg', 'image_md5':
417
+ # 'd1d2515e6f6ac4c5ca6dd739d5143cd4', 'latitude': 12.34, 'longitude':
418
+ # 56.78, 'author_email': '[email protected]', 'image_datetime_raw':
419
+ # '2023-10-10 10:10:10', 'date': '2023-10-10', 'time': '10:10:10',
420
+ # 'selected_class': 'brydes_whale', 'top_prediction': 'beluga',
421
+ # 'class_overriden': True
422
+ # }
tests/test_input_validator.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from input.input_validator import generate_random_md5
3
+
4
+ def test_generate_random_md5_length():
5
+ md5_hash = generate_random_md5(16)
6
+ assert len(md5_hash) == 32, "MD5 hash length should be 32 characters"
7
+
8
+ def test_generate_random_md5_uniqueness():
9
+ md5_hash1 = generate_random_md5(16)
10
+ md5_hash2 = generate_random_md5(16)
11
+ assert md5_hash1 != md5_hash2, "MD5 hashes should be unique for different random strings"
12
+
13
+ def test_generate_random_md5_default_length():
14
+ md5_hash = generate_random_md5()
15
+ assert len(md5_hash) == 32, "MD5 hash length should be 32 characters when using default length"
16
+
17
+ def test_generate_random_md5_different_data_lengths():
18
+ md5_hash_8 = generate_random_md5(8)
19
+ md5_hash_32 = generate_random_md5(32)
20
+ assert len(md5_hash_8) == 32, "MD5 hash length should be 32 characters for 8 character input"
21
+ assert len(md5_hash_32) == 32, "MD5 hash length should be 32 characters for 32 character input"
tests/test_main.py ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from unittest.mock import MagicMock, patch
3
+ from streamlit.testing.v1 import AppTest
4
+ import time
5
+
6
+ from input.input_handling import spoof_metadata
7
+ from input.input_observation import InputObservation
8
+ from input.input_handling import buffer_uploaded_files
9
+
10
+ from streamlit.runtime.uploaded_file_manager import UploadedFile
11
+ from numpy import ndarray
12
+
13
+ from test_demo_multifile_upload import (
14
+ mock_uploadedFile_List_ImageData, mock_uploadedFile,
15
+ MockUploadedFile, )
16
+
17
+
18
+ from test_demo_input_sidebar import (
19
+ verify_initial_session_state, verify_session_state_after_processing_files,
20
+ wrapped_buffer_uploaded_files_allowed_once)
21
+
22
+ from test_demo_input_sidebar import _cprint, OKBLUE, OKGREEN, OKCYAN, FAIL, PURPLE
23
+
24
+ TIMEOUT = 30
25
+ SCRIPT_UNDER_TEST = "src/main.py"
26
+
27
+ def debug_check_images(at:AppTest, msg:str=""):
28
+ _cprint(f"[I] num images in session state {msg}: {len(at.session_state.images)}", OKCYAN)
29
+ for i, (key, img) in enumerate(at.session_state.images.items()):
30
+ #for i, img in enumerate(at.session_state.images.values()):
31
+ #assert isinstance(img, ndarray)
32
+ if isinstance(img, ndarray):
33
+ print(f"image {i}: {img.shape} [{key}]")
34
+ else:
35
+ print(f"image {i}: {type(img)} [{key}]")
36
+
37
+ def nooop(*args, **kwargs):
38
+ _cprint("skipping the buffering -- shoul only happen once", FAIL)
39
+ raise RuntimeError
40
+ pass
41
+
42
+ @pytest.mark.end2end
43
+ @pytest.mark.slow
44
+ @patch("streamlit.file_uploader")
45
+ def test_click_validate_after_data_entry(mock_file_rv: MagicMock, mock_uploadedFile_List_ImageData):
46
+ # this test goes through several stages of the workflow
47
+ #
48
+
49
+ # 1. get app started
50
+
51
+ # first we need to upload >0 files
52
+ num_files = 2
53
+ mock_files = mock_uploadedFile_List_ImageData(num_files=num_files)
54
+ mock_file_rv.return_value = mock_files
55
+
56
+ t0 = time.time()
57
+ at = AppTest.from_file(SCRIPT_UNDER_TEST, default_timeout=TIMEOUT).run()
58
+ t1 = time.time()
59
+ _cprint(f"[T] time to load: {t1-t0:.2f}s", PURPLE)
60
+ verify_initial_session_state(at)
61
+
62
+ # 1-Test: at this initial state, we expect:
63
+ # - the workflow state is 'doing_data_entry'
64
+ # - the validate button is disabled
65
+ # - the infer button (on main tab) is disabled
66
+ # - note: props of the button: label, value, proto, disabled.
67
+ # don't need to check others here
68
+
69
+ assert at.session_state.workflow_fsm.current_state == 'doing_data_entry'
70
+ assert at.sidebar.button[1].disabled == True
71
+ infer_button = at.tabs[0].button[0]
72
+ assert infer_button.disabled == True
73
+
74
+
75
+ # 2. upload files, and trigger the callback
76
+
77
+ # put the mocked file_upload into session state, as if it were the result of a file upload, with the key 'file_uploader_data'
78
+ at.session_state["file_uploader_data"] = mock_files
79
+ # the side effect cant run until now (need file_uploader_data to be set)
80
+ if wrapped_buffer_uploaded_files_allowed_once.called == 0:
81
+ mock_file_rv.side_effect = wrapped_buffer_uploaded_files_allowed_once
82
+ else:
83
+ mock_file_rv.side_effect = nooop
84
+
85
+ _cprint(f"[I] buffering called {wrapped_buffer_uploaded_files_allowed_once.called} times", OKGREEN)
86
+
87
+ t2 = time.time()
88
+ at.run()
89
+ t3 = time.time()
90
+ _cprint(f"[T] time to run with file processing: {t3-t2:.2f}s", PURPLE)
91
+
92
+ # 2-Test: after uploading the files, we should have:
93
+ # - the workflow state moved on to 'data_entry_complete'
94
+ # - several changes applied to the session_state (handled by verify_session_state_after_processing_files)
95
+ # - the validate button is enabled
96
+ # - the infer button is still disabled
97
+
98
+ verify_session_state_after_processing_files(at, num_files)
99
+ debug_check_images(at, "after processing files")
100
+ _cprint(f"[I] buffering called {wrapped_buffer_uploaded_files_allowed_once.called} times", OKGREEN)
101
+
102
+ assert at.session_state.workflow_fsm.current_state == 'data_entry_complete'
103
+
104
+ assert at.sidebar.button[1].disabled == False
105
+ infer_button = at.tabs[0].button[0]
106
+ assert infer_button.disabled == True
107
+
108
+ print(at.markdown[0])
109
+
110
+ # 3. data entry complete, click the validate button
111
+ at.sidebar.button[1].click().run()
112
+ t4 = time.time()
113
+ _cprint(f"[T] time to run step 3: {t4-t3:.2f}s", PURPLE)
114
+
115
+ # 3-Test: after validating the data, we should have:
116
+ # - the state (backend) should move to data_entry_validated
117
+ # - the UI should show the new state (in sidebar.markdown[0])
118
+ # - the infer button should now be enabled
119
+ # - the validate button should be disabled
120
+
121
+ assert at.session_state.workflow_fsm.current_state == 'data_entry_validated'
122
+ assert "data_entry_validated" in at.sidebar.markdown[0].value
123
+
124
+ # TODO: this part of the test currently fails because hte main code doesn't
125
+ # change the button; in this exec path/branch, the button is not rendered at all.
126
+ # so if we did at.run() after the click, the button is absent entierly!
127
+ # If we don't run, the button is still present in its old state (enabled)
128
+ # for btn in at.sidebar.button:
129
+ # print(f"button: {btn.label} {btn.disabled}")
130
+ # #assert at.sidebar.button[1].disabled == True
131
+
132
+ infer_button = at.tabs[0].button[0]
133
+ assert infer_button.disabled == False
134
+
135
+ debug_check_images(at, "after validation button")
136
+ _cprint(f"[I] buffering called {wrapped_buffer_uploaded_files_allowed_once.called} times", OKGREEN)
137
+
138
+ # # at this point, we want to retrieve the main area, get the tabs child,
139
+ # # and then on the first tab get the first button & check not disabled (will click next step)
140
+ # #print(at._tree)
141
+ # # fragile: assume the first child is 'main'
142
+ # # robust: walk through children until we find the main area
143
+ # # main_area = at._tree.children[0]
144
+ # # main_area = None
145
+ # # for _id, child in at._tree.children.items():
146
+ # # if child.type == 'main':
147
+ # # main_area = child
148
+ # # break
149
+ # # assert main_area is not None
150
+
151
+ # # ah, we can go direct to the tabs. they are only plausible in main. (not supported in sidebar)
152
+ # infer_tab = at.tabs[0]
153
+ # #print(f"tab: {infer_tab}")
154
+ # #print(dir(infer_tab))
155
+ # btn = infer_tab.button[0]
156
+ # print(f"button: {btn}")
157
+ # print(btn.label)
158
+ # print(btn.disabled)
159
+
160
+ # infer_button = at.tabs[0].button[0]
161
+ # assert infer_button.disabled == False
162
+
163
+ # check pre-ML click that we are ready for it.
164
+
165
+ debug_check_images(at, "before clicking infer. ")
166
+ _cprint(f"[I] buffering called {wrapped_buffer_uploaded_files_allowed_once.called} times", OKGREEN)
167
+ TEST_ML = True
168
+ SKIP_CHECK_OVERRIDE = False
169
+ # 4. launch ML inference by clicking the button
170
+ if TEST_ML:
171
+ # infer_button = at.tabs[0].button[0]
172
+ # assert infer_button.disabled == False
173
+ # now test the ML step
174
+ infer_button.click().run()
175
+ t5 = time.time()
176
+ _cprint(f"[T] time to run step 4: {t5-t4:.2f}s", PURPLE)
177
+
178
+ # 4-Test: after clicking the infer button, we should have:
179
+ # - workflow should have moved on to 'ml_classification_completed'
180
+ # - the main tab button should now have new text (confirm species predictions)
181
+ # - we should have the results presented on the main area
182
+ # - 2+6 image elements (the source image, images of 3 predictions) * num_files
183
+ # - 2 dropdown elements (one for each image) + 1 for the page selector
184
+ # - all of the observations should have class_overriden == False
185
+
186
+ assert at.session_state.workflow_fsm.current_state == 'ml_classification_completed'
187
+ # check the observations
188
+ for i, obs in enumerate(at.session_state.observations.values()):
189
+ print(f"obs {i}: {obs}")
190
+ assert isinstance(obs, InputObservation)
191
+ assert obs.class_overriden == False
192
+
193
+ # check the visual elements
194
+ infer_tab = at.tabs[0]
195
+ print(f"tab: {infer_tab}")
196
+ img_elems = infer_tab.get("imgs")
197
+ print(f"imgs: {len(img_elems)}")
198
+ assert len(img_elems) == num_files*4
199
+
200
+ infer_button = infer_tab.button[0]
201
+ assert infer_button.disabled == False
202
+ assert 'Confirm species predictions' in infer_button.label
203
+
204
+ # we have 1 per file, and also one more to select the page of results being shown.
205
+ # - hmm, so we aren't going to see the right number if it goes multipage :(
206
+ # - but this test specifically uses 2 inputs.
207
+ assert len(infer_tab.selectbox) == num_files + 1
208
+
209
+
210
+ # 5. manually override the class of one of the observations
211
+ idx_to_override = 1 # don't forget, we also have the page selector first.
212
+ infer_tab.selectbox[idx_to_override + 1].select_index(20).run() # FRAGILE!
213
+
214
+ # 5-TEST.
215
+ # - expect that all class_overriden are False, except for the one we just set
216
+ # - also expect there still to be num_files*4 images (2+6 per file) etc
217
+ for i, obs in enumerate(at.session_state.observations.values()):
218
+ _cprint(f"obs {i}: {obs.class_overriden} {obs.to_dict()}", OKBLUE)
219
+ assert isinstance(obs, InputObservation)
220
+ if not SKIP_CHECK_OVERRIDE:
221
+ if i == idx_to_override:
222
+ assert obs.class_overriden == True
223
+ else:
224
+ assert obs.class_overriden == False
225
+
226
+ # 6. confirm the species predictions, get ready to allow upload
227
+ infer_tab = at.tabs[0]
228
+ confirm_button = infer_tab.button[0]
229
+ confirm_button.click().run()
230
+ t6 = time.time()
231
+ _cprint(f"[T] time to run step 5: {t6-t5:.2f}s", PURPLE)
232
+
233
+ # 6-TEST. Now we expect to see:
234
+ # - the workflow state should be 'manual_inspection_completed'
235
+ # - the obsevations should be as per the previous step
236
+ # - the main tab button should now have new text (Upload all observations)
237
+ # - we should have 4n images
238
+ # - we should have only 1 select box (page), (passed stage for overriding class)
239
+
240
+ assert at.session_state.workflow_fsm.current_state == 'manual_inspection_completed'
241
+ for i, obs in enumerate(at.session_state.observations.values()):
242
+ _cprint(f"obs {i}: {obs.class_overriden} {obs.to_dict()}", OKBLUE)
243
+ assert isinstance(obs, InputObservation)
244
+ if not SKIP_CHECK_OVERRIDE:
245
+ if i == idx_to_override:
246
+ assert obs.class_overriden == True
247
+ else:
248
+ assert obs.class_overriden == False
249
+
250
+ # we have to trigger a manual refresh? no, it seems that sometimes the tests fail, maybe
251
+ # because the script is slow? it is not unique to here, various points that usually pass
252
+ # occasionally fail because elements haven't yet been drawn. I suppose the timing aspect
253
+ # internally by AppTest is not perfect (selenium has moved from explicit to implicit waits,
254
+ # though I didn't look too deeply whether apptest also has an explicit wait mechanism)
255
+ # # time.sleep(1)
256
+ # #at.run()
257
+ infer_tab = at.tabs[0]
258
+ upload_button = infer_tab.button[0]
259
+ assert upload_button.disabled == False
260
+ assert 'Upload all observations' in upload_button.label
261
+
262
+ img_elems = infer_tab.get("imgs")
263
+ assert len(img_elems) == num_files*4
264
+
265
+ assert len(infer_tab.selectbox) == 1
266
+
267
+ # 7. upload the observations
268
+ upload_button.click().run()
269
+ t7 = time.time()
270
+ _cprint(f"[T] time to run step 6: {t7-t6:.2f}s", PURPLE)
271
+
272
+ # 7-TEST. Now we expect to see:
273
+ # - workflow state should be 'data_uploaded'
274
+ # - nothing else in the back end should have changed (is that a mistake? should we
275
+ # add a boolean tracking if the observations have been uploaded?)
276
+ # - a toast presented for each observation uploaded
277
+ # - the images should still be there, and 1 select box (page)
278
+ # - no more button on the main area
279
+
280
+ assert at.session_state.workflow_fsm.current_state == 'data_uploaded'
281
+ #print(at.toast)
282
+ assert len(at.toast) == num_files
283
+ infer_tab = at.tabs[0]
284
+
285
+ img_elems = infer_tab.get("imgs")
286
+ assert len(img_elems) == num_files*4
287
+ assert len(infer_tab.selectbox) == 1
288
+ assert len(infer_tab.button) == 0
289
+
290
+
291
+
292
+
293
+
294
+
295
+
296
+
297
+
298
+
tests/visual_selenium/requirements_visual.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # visual tests with selenium
2
+ selenium~=4.27.1
3
+ seleniumbase~=4.33.12
4
+
5
+ pytest~=8.3.4
tests/visual_selenium/test_visual_main.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import time
3
+ from contextlib import contextmanager
4
+
5
+ import pytest
6
+ from seleniumbase import BaseCase
7
+ from selenium.webdriver.common.by import By
8
+ from selenium.webdriver.support.ui import WebDriverWait
9
+ from selenium.webdriver.support import expected_conditions as EC
10
+
11
+
12
+ BaseCase.main(__name__, __file__)
13
+
14
+ # Set the paths to the images and csv file
15
+ repo_path = Path(__file__).resolve().parents[2]
16
+ imgpath = repo_path / "tests/data/rand_images"
17
+ img_f1 = imgpath / "img_001.jpg"
18
+ img_f2 = imgpath / "img_002.jpg"
19
+ img_f3 = imgpath / "img_003.jpg"
20
+ #csvpath = repo_path / "tests/data/test_csvs"
21
+ #csv_f1 = csvpath / "debian.csv"
22
+
23
+ mk_visible = """
24
+ var input = document.querySelector('[data-testid="stFileUploaderDropzoneInput"]');
25
+ input.style.display = 'block';
26
+ input.style.opacity = '1';
27
+ input.style.visibility = 'visible';
28
+ """
29
+
30
+ PORT = "8501"
31
+
32
+ # - _before_module and run_streamlit taken from
33
+ # https://github.com/randyzwitch/streamlit-folium/blob/master/tests/test_frontend.py
34
+ # example given via streamlit blog
35
+ # - note: to use pytest fixtures x unittest we have to use autouse=True.
36
+ @pytest.fixture(scope="module", autouse=True)
37
+ def _before_module():
38
+ # Run the streamlit app before each module
39
+ with run_streamlit():
40
+ yield
41
+
42
+ @contextmanager
43
+ def run_streamlit():
44
+ """Run the streamlit app at src/main.py on port PORT"""
45
+
46
+ import subprocess
47
+
48
+ p = subprocess.Popen(
49
+ [
50
+ "streamlit",
51
+ "run",
52
+ "src/main.py",
53
+ "--server.port",
54
+ PORT,
55
+ "--server.headless",
56
+ "true",
57
+ ]
58
+ )
59
+
60
+ time.sleep(5)
61
+
62
+ try:
63
+ yield 1
64
+ finally:
65
+ p.kill()
66
+
67
+ def wait_for_element(self, by, selector, timeout=10):
68
+ # example usage:
69
+ # element = self.wait_for_element(By.XPATH, "//p[contains(text(), 'Species for observation')]")
70
+
71
+ return WebDriverWait(self.driver, timeout).until(
72
+ EC.presence_of_element_located((by, selector))
73
+ )
74
+
75
+
76
+ def find_all_button_paths(self):
77
+ buttons = self.find_elements("button")
78
+ for button in buttons:
79
+ print(f"\nButton found:")
80
+ print(f"Text: {button.text.strip()}")
81
+ print(f"HTML: {button.get_attribute('outerHTML')}")
82
+ print("-" * 50)
83
+
84
+ def check_columns_and_images(self, exp_cols:int, exp_imgs:int=4):
85
+ # Find all columns
86
+ columns = self.find_elements("div[class*='stColumn']")
87
+
88
+ # Check number of columns
89
+ assert len(columns) == exp_cols, f"Expected exp_cols columns but found {len(columns)}"
90
+
91
+ # Check images in each column
92
+ for i, column in enumerate(columns, 1):
93
+ # Find all images within this column's image containers
94
+ images = self.find_elements(
95
+ f"div[class*='stColumn']:nth-child({i}) div[data-testid='stImageContainer'] img"
96
+ )
97
+
98
+ # Check number of images in this column
99
+ assert len(images) == exp_imgs, f"Column {i} has {len(images)} images instead of {exp_imgs}"
100
+
101
+
102
+ def analyze_species_columns_debug(self):
103
+ # First, just try to find any divs
104
+ all_divs = self.find_elements(By.TAG_NAME, "div")
105
+ print(f"Found {len(all_divs)} total divs")
106
+
107
+ # Then try to find stColumn divs
108
+ column_divs = self.find_elements(By.XPATH, "//div[contains(@class, 'stColumn')]")
109
+ print(f"Found {len(column_divs)} column divs")
110
+
111
+ # Try to find any elements containing our text, without class restrictions
112
+ text_elements = self.find_elements(
113
+ By.XPATH, "//*[contains(text(), 'Species for observation')]"
114
+ )
115
+ print(f"Found {len(text_elements)} elements with 'Species for observation' text")
116
+
117
+ # If we found text elements, print their tag names and class names to help debug
118
+ for elem in text_elements:
119
+ print(f"Tag: {elem.tag_name}, Class: {elem.get_attribute('class')}")
120
+
121
+ def analyze_species_columns(self, exp_cols:int, exp_imgs:int=4, exp_visible:bool=True):
122
+ # Find all columns that contain the specific text pattern
123
+ cur_tab = get_selected_tab(self)
124
+ print(f"Current tab: {cur_tab['text']} ({cur_tab['id']})" )
125
+
126
+ #"div[class*='stColumn']//div[contains(text(), 'Species for observation')]"
127
+ spec_labels = self.find_elements(
128
+ By.XPATH,
129
+ "//p[contains(text(), 'Species for observation')]"
130
+ )
131
+
132
+ # This gets us the text containers, need to go back up to the column
133
+ species_columns = [lbl.find_element(By.XPATH, "./ancestor::div[contains(@class, 'stColumn')]")
134
+ for lbl in spec_labels]
135
+
136
+ print(f" Found {len(species_columns)} species columns (total {len(spec_labels)} species labels)")
137
+ assert len(species_columns) == exp_cols, f"Expected {exp_cols} columns but found {len(species_columns)}"
138
+
139
+
140
+ for i, column in enumerate(species_columns, 1):
141
+ # Get the species number text
142
+ species_text = column.find_element(
143
+ #By.XPATH, ".//div[contains(text(), 'Species for observation')]"
144
+ By.XPATH, ".//p[contains(text(), 'Species for observation')]"
145
+ )
146
+ print(f" Analyzing col {i}:{species_text.text} {species_text.get_attribute('outerHTML')} | ")
147
+
148
+ # Find images in this specific column
149
+ images = column.find_elements(
150
+ By.XPATH, ".//div[@data-testid='stImageContainer']//img"
151
+ )
152
+ print(f" - Contains {len(images)} images (expected: {exp_imgs})")
153
+ assert len(images) == exp_imgs, f"Column {i} has {len(images)} images instead of {exp_imgs}"
154
+
155
+ # now let's refine the search to find the images that are actually displayed
156
+ visible_images = [img for img in column.find_elements(
157
+ By.XPATH, ".//div[@data-testid='stImageContainer']//img"
158
+ ) if img.is_displayed()]
159
+ print(f" - Contains {len(visible_images)} visible images")
160
+ if exp_visible:
161
+ assert len(visible_images) == exp_imgs, f"Column {i} has {len(visible_images)} visible images instead of {exp_imgs}"
162
+ else:
163
+ assert len(visible_images) == 0, f"Column {i} has {len(visible_images)} visible images instead of 0"
164
+
165
+
166
+ # even more strict test for visibility
167
+ # for img in images:
168
+ # style = img.get_attribute('style')
169
+ # computed_style = self.driver.execute_script(
170
+ # "return window.getComputedStyle(arguments[0])", img
171
+ # )
172
+ # print(f"Style: {style}")
173
+ # print(f"Visibility: {computed_style['visibility']}")
174
+ # print(f"Opacity: {computed_style['opacity']}")
175
+
176
+ def get_selected_tab(self):
177
+ selected_tab = self.find_element(
178
+ By.XPATH, "//div[@data-testid='stTabs']//button[@aria-selected='true']"
179
+ )
180
+ # Get the tab text
181
+ tab_text = selected_tab.find_element(By.TAG_NAME, "p").text
182
+ # Get the tab index (might be useful)
183
+ tab_id = selected_tab.get_attribute("id") # Usually ends with "-tab-X" where X is the index
184
+ return {
185
+ "text": tab_text,
186
+ "id": tab_id,
187
+ "element": selected_tab
188
+ }
189
+
190
+ def switch_tab(self, tab_number):
191
+ # Click the tab
192
+ self.click(f"div[data-testid='stTabs'] button[id$='-tab-{tab_number}'] p")
193
+
194
+ # Verify the switch
195
+ selected_tab = get_selected_tab(self)
196
+ if selected_tab["id"].endswith(f"-tab-{tab_number}"):
197
+ print(f"Successfully switched to tab {tab_number}: {selected_tab['text']}")
198
+ else:
199
+ raise Exception(f"Failed to switch to tab {tab_number}, current tab is {selected_tab['text']}")
200
+
201
+ class RecorderTest(BaseCase):
202
+
203
+ @pytest.mark.slow
204
+ @pytest.mark.visual
205
+ def test_species_presentation(self):
206
+ # this test goes through several steps of the workflow, primarily to get to the point
207
+ # that species columns are displayed.
208
+ # - setup steps:
209
+ # - open the app
210
+ # - upload two images
211
+ # - validate the data entry
212
+ # - click the infer button, wait for ML
213
+ # - the real test steps:
214
+ # - check the species columns are displayed
215
+ # - switch to another tab, check the columns are not displayed
216
+ # - switch back to the first tab, check the columns are displayed again
217
+
218
+ self.open("http://localhost:8501/")
219
+ time.sleep(4) # even in demo mode, on full script this is needed
220
+ # (the folium maps cause the scripts to rerun, which means the wait_for_element finds it, but
221
+ # the reload is going on and this makes the upload files (send_keys) command fail)
222
+
223
+ # make the file_uploader block visible -- for some reason even though we can see it, selenium can't...
224
+ wait_for_element(self, By.CSS_SELECTOR, '[data-testid="stFileUploaderDropzoneInput"]')
225
+ self.execute_script(mk_visible)
226
+ # send a list of files
227
+ self.send_keys(
228
+ 'input[data-testid="stFileUploaderDropzoneInput"]',
229
+ "\n".join([str(img_f1), str(img_f2)]),
230
+ )
231
+
232
+ # advance to the next step, by clicking the validate button (wait for it first)
233
+ wait_for_element(self, By.XPATH, "//button//strong[contains(text(), 'Validate')]")
234
+ self.click('button strong:contains("Validate")')
235
+ # validate the progress via the text display
236
+ self.assert_exact_text("Progress: 2/5. Current: data_entry_validated.", 'div[data-testid="stMarkdownContainer"] p em')
237
+
238
+ # check the tab bar is there, and the titles are correct
239
+ expected_texts = [
240
+ "Cetecean classifier", "Hotdog classifier", "Map",
241
+ "Dev:coordinates", "Log", "Beautiful cetaceans"
242
+ ]
243
+ self.assert_element("div[data-testid='stTabs']")
244
+
245
+ for i, text in enumerate(expected_texts):
246
+ selector = f"div[data-testid='stTabs'] button[id$='-tab-{i}'] p"
247
+ print(f"{i=}, {text=}, {selector=}")
248
+ self.assert_text(text, selector)
249
+ break # just do one, this is slow while debuggin
250
+
251
+ # dbg: look for buttons, find out which props will isolate the right one.
252
+ # find_all_button_paths(self)
253
+
254
+ self.assert_element(".st-key-button_infer_ceteans button")
255
+ self.click(".st-key-button_infer_ceteans button")
256
+
257
+ # check the state has advanced
258
+ # NOTE: FOR REMOTE RUN, IT IS STARTING FROM ZERO, SO IT HAS TO DOWNLOAD
259
+ # ALL MODEL FILES -> 60s timeout for this one step
260
+ self.assert_exact_text("Progress: 3/5. Current: ml_classification_completed.",
261
+ 'div[data-testid="stMarkdownContainer"] p em',
262
+ timeout=60)
263
+
264
+ # on the inference tab, check the columns and images are rendered correctly
265
+ # - normally it is selected by default, but we can switch to it to be sure
266
+ # - then we do the test for the right number of columns and images per col,
267
+ # which should be visible
268
+ switch_tab(self, 0)
269
+ analyze_species_columns(self, exp_cols=2, exp_imgs=4, exp_visible=True)
270
+
271
+ # now, we want to select another tab, check somethign is present?
272
+ # then go back, and re-check the columns and images are re-rendered.
273
+ switch_tab(self, 4)
274
+ assert get_selected_tab(self)["id"].endswith("-tab-4")
275
+
276
+ # now we click the refresh button
277
+ self.click('button[data-testid="stBaseButton-secondary"]')
278
+ # and then select the first tab again
279
+ switch_tab(self, 0)
280
+ assert get_selected_tab(self)["id"].endswith("-tab-0")
281
+ # and check the columns and images are re-rendered
282
+ analyze_species_columns(self, exp_cols=2, exp_imgs=4, exp_visible=True)
283
+
284
+ # now go to some other tab, and check the columns and images are not visible
285
+ switch_tab(self, 2)
286
+ assert get_selected_tab(self)["id"].endswith("-tab-2")
287
+ analyze_species_columns(self, exp_cols=2, exp_imgs=4, exp_visible=False)
288
+