saving-willy-dev / tests /test_input_observation.py
rmm
test: for setters and for to_dict method of InputObservation
5312fc3
raw
history blame
15.2 kB
from typing import Protocol, runtime_checkable
import pytest
from unittest.mock import MagicMock, patch
from io import BytesIO
#from PIL import Image
import datetime
import numpy as np
#from streamlit.runtime.uploaded_file_manager import UploadedFile # for type hinting
#from typing import List, Union
from input.input_observation import InputObservation
@runtime_checkable
class UploadedFile(Protocol):
name: str
size: int
type: str
_file_urls: list
def getvalue(self) -> bytes: ...
def read(self) -> bytes: ...
class MockUploadedFile(BytesIO):
def __init__(self,
initial_bytes: bytes,
*, # enforce keyword-only arguments after now
name:str,
size:int,
type:str):
#super().__init__(*args, **kwargs)
super().__init__(initial_bytes)
self.name = name
self.size = size
self.type = type
self._file_urls = [None,]
@pytest.fixture
def mock_uploadedFile():
class MockGUIClass(MagicMock):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
name = kwargs.get('name', 'image2.jpg')
size = kwargs.get('size', 123456)
type = kwargs.get('type', 'image/jpeg')
self.bytes_io = MockUploadedFile(
b"test data", name=name, size=size, type=type)
self.get_data = MagicMock(return_value=self.bytes_io)
return MockGUIClass
# let's first generate a test for the mock_uploaded_file and MockUploadedFile class
# - test with valid input
def test_mock_uploaded_file(mock_uploadedFile):
# setup values for the test (all valid)
image_name = "test_image.jpg"
mock_file = mock_uploadedFile(name=image_name).get_data()
#print(dir(mock_file))
assert isinstance(mock_file, BytesIO)
assert mock_file.name == image_name
assert mock_file.size == 123456
assert mock_file.type == "image/jpeg"
# now we move on to test the class InputObservation
# - with valid input
# - with invalid input
# - with missing input
def test_input_observation_valid(mock_uploadedFile):
# image: ndarray
# lat, lon: float
# author_email: str
# date, time: datetime.date, datetime.time
#uploaded_file: UploadedFile (need to mock this)
# image_md5: str
# setup values for the test (all valid)
author_email = "[email protected]"
image_name = "test_image.jpg"
mock_file = mock_uploadedFile(name=image_name).get_data()
_date="2023-10-10"
_time="10:10:10"
image_datetime_raw = _date + " " + _time
dt = datetime.datetime.strptime(image_datetime_raw, "%Y-%m-%d %H:%M:%S")
date = dt.date()
time = dt.time()
## make a random image with dtype uint8 using np.random.randint
image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
image_md5 = 'd1d2515e6f6ac4c5ca6dd739d5143cd4' # 32 hex chars.
obs = InputObservation(
image=image,
latitude=12.34, longitude=56.78, author_email=author_email,
time=time, date=date,
uploaded_file=mock_file,
image_md5=image_md5,
)
assert isinstance(obs.image, np.ndarray)
assert (obs.image == image).all()
assert obs.latitude == 12.34
assert obs.longitude == 56.78
assert obs.author_email == author_email
assert isinstance(obs.date, datetime.date)
assert isinstance(obs.time, datetime.time)
assert str(obs.date) == "2023-10-10"
assert str(obs.time) == "10:10:10"
assert obs.uploaded_file.name == image_name
assert obs.uploaded_file.size == 123456
assert obs.uploaded_file.type == "image/jpeg"
assert isinstance(obs.uploaded_file, BytesIO)
#assert isinstance(obs.uploaded_file, MockUploadedFile) # is there any point in checking the type of the mock, ?
# a list of tuples (strings that are the keys of "valid_inputs", expected error type)
# loop over the list, and for each tuple, create a dictionary with all valid inputs, and one invalid input
# assert that the function raises the expected error type
invalid_input_scenarios = [
("author_email", TypeError),
("image_name", TypeError),
("uploaded_file", TypeError),
("date", TypeError),
("time", TypeError),
("image", TypeError),
("image_md5", TypeError),
]
@pytest.mark.parametrize("key, error_type", invalid_input_scenarios)
def test_input_observation_invalid(key, error_type, mock_uploadedFile):
# correct datatypes are:
# - image: ndarray
# - lat, lon: float
# - author_email: str
# - date, time: datetime.date, datetime.time
# - uploaded_file: UploadedFile (need to mock this)
# - image_md5: str
# the most critical/likely to go wrong would presumably be
# - date, time (strings not datetime objects)
# - lat, lon (strings not numbers)
# - image (not ndarray, maybe accidentally a PIL object or maybe the filename)
# - uploaded_file (not UploadedFile, maybe a string, or maybe the ndarray)
# check it fails when any of the datatypes are wrong,
# even if the rest are all good want to loop over the inputs, take each one
# from a bad list, and all others from a good list, and assert fails for
# each one
# set up the good and bad inputs
_date="2023-10-10"
_time="10:10:10"
image_datetime_raw = _date + " " + _time
fname = "test_image.jpg"
image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
dt_ok = datetime.datetime.strptime(image_datetime_raw, "%Y-%m-%d %H:%M:%S")
valid_inputs = {
"author_email": "[email protected]",
"image_name": "test_image.jpg",
"uploaded_file": mock_uploadedFile(name=fname).get_data(),
"date": dt_ok.date(),
"time": dt_ok.time(),
"image": image,
"image_md5": 'd1d2515e6f6ac4c5ca6dd739d5143cd4', # 32 hex chars.
}
invalid_inputs = {
"author_email": "@example",
"image_name": 45,
"uploaded_file": image,
"date": _date,
"time": _time,
"image": fname,
"image_md5": 45643
}
# test a valid set of inputs, minus the target key, substituted for something invalid
inputs = valid_inputs.copy()
inputs[key] = invalid_inputs[key]
with pytest.raises(error_type):
obs = InputObservation(**inputs)
# now test the same key set to None
inputs = valid_inputs.copy()
inputs[key] = None
with pytest.raises(error_type):
obs = InputObservation(**inputs)
# we can take a similar approach to test equality.
# here, construct two dicts, each with valid inputs but all elements different.
# loop over the keys, and construct two InputObservations that differ on that key only.
# asser the expected output message.
# ah, it is the diff func that prints a message. Here we just assert boolean.
# we currently expect differences on time to be ignored.
inequality_keys = [
("author_email", False),
("uploaded_file", False),
("date", False),
#("time", True),
pytest.param("time", False, marks=pytest.mark.xfail(reason="Time is currently ignored in __eq__")),
("image", False),
("image_md5", False),
]
@pytest.mark.parametrize("key, expect_equality", inequality_keys)
def test_input_observation_equality(key, expect_equality, mock_uploadedFile):
# set up the two sets of good inputs
_date1 = "2023-10-10"
_time1 = "10:10:10"
image_datetime_raw1 = _date1 + " " + _time1
fname1 = "test_image.jpg"
image1 = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
dt1 = datetime.datetime.strptime(image_datetime_raw1, "%Y-%m-%d %H:%M:%S")
_date2 = "2023-10-11"
_time2 = "12:13:14"
image_datetime_raw2 = _date2 + " " + _time2
fname2 = "test_image.jpg"
image2 = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
dt2 = datetime.datetime.strptime(image_datetime_raw2, "%Y-%m-%d %H:%M:%S")
valid_inputs1 = {
"author_email": "[email protected]",
#"image_name": "test_image.jpg",
"uploaded_file": mock_uploadedFile(name=fname1).get_data(),
"date": dt1.date(),
"time": dt1.time(),
"image": image1,
"image_md5": 'd1d2515e6f6ac4c5ca6dd739d5143cd4', # 32 hex chars.
}
valid_inputs2 = {
"author_email": "[email protected]",
#"image_name": "another.jpg",
"uploaded_file": mock_uploadedFile(name=fname2).get_data(),
"date": dt2.date(),
"time": dt2.time(),
"image": image2,
"image_md5": 'cdb235587bdee5915d6ccfa52ca9f3ac', # 32 hex chars.
}
nearly_same_inputs = valid_inputs1.copy()
nearly_same_inputs[key] = valid_inputs2[key]
obs1 = InputObservation(**valid_inputs1)
obs2 = InputObservation(**nearly_same_inputs)
if expect_equality is True:
assert obs1 == obs2
else:
assert obs1 != obs2
# now let's test the setter methods (set_top_predictions, set_selected_class, set_class_overriden)
# ideally we get a fixture that produces a good / valid InputObservation object
# and from there, just test the setters + their expected changes / side effects
@pytest.fixture
def good_datadict_for_input_observation(mock_uploadedFile) -> dict:
# set up the good and bad inputs
_date="2023-10-10"
_time="10:10:10"
image_datetime_raw = _date + " " + _time
fname = "test_image.jpg"
image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
dt_ok = datetime.datetime.strptime(image_datetime_raw, "%Y-%m-%d %H:%M:%S")
valid_inputs = {
"author_email": "[email protected]",
"uploaded_file": mock_uploadedFile(name=fname).get_data(),
"date": dt_ok.date(),
"time": dt_ok.time(),
"image": image,
"image_md5": 'd1d2515e6f6ac4c5ca6dd739d5143cd4', # 32 hex chars.
"image_datetime_raw": image_datetime_raw,
"latitude": 12.34,
"longitude": 56.78,
}
return valid_inputs
@pytest.fixture
def good_input_observation(good_datadict_for_input_observation) -> InputObservation:
observation = InputObservation(**good_datadict_for_input_observation)
return observation
#
def test_input_observation__set_top_predictions_populated(good_input_observation):
obs = good_input_observation
# before setting, expect empty list
assert obs.top_predictions == []
assert obs.selected_class == None
# set >0,
# - expect to find the same list in the property/attribute
# - expect to find the first element in the selected_class
top_predictions = ["beluga", "blue_whale", "common_dolphin"]
obs.set_top_predictions(top_predictions)
assert len(obs.top_predictions) == 3
assert obs.top_predictions == top_predictions
assert obs.selected_class == "beluga"
def test_input_observation__set_top_predictions_unpopulated(good_input_observation):
obs = good_input_observation
# before setting, expect empty list
assert obs.top_predictions == []
assert obs.selected_class == None
# set to empty list,
# - expect to find the same list in the property/attribute
# - expect to find selected_class to be None
top_predictions = []
obs.set_top_predictions(top_predictions)
assert len(obs.top_predictions) == 0
assert obs.top_predictions == []
assert obs.selected_class == None
def test_input_observation__set_selected_class_default(good_input_observation):
obs = good_input_observation
# before setting, expect empty list
assert obs.top_predictions == []
assert obs.selected_class == None
assert obs.class_overriden == False
# set >0, and then set_selected_class to the first element
# - expect to find the same list in the property/attribute
# - expect to find the first element in the selected_class
# - expect class_overriden to be False
top_predictions = ["beluga", "blue_whale", "common_dolphin"]
obs.set_top_predictions(top_predictions)
obs.set_selected_class(top_predictions[0])
assert len(obs.top_predictions) == 3
assert obs.top_predictions == top_predictions
assert obs.selected_class == "beluga"
def test_input_observation__set_selected_class_override(good_input_observation):
obs = good_input_observation
# before setting, expect empty list
assert obs.top_predictions == []
assert obs.selected_class == None
assert obs.class_overriden == False
# set >0, and then set_selected_class to something out of list
# - expect to find the same list in the property/attribute
# - expect to find the first element in the selected_class
# - expect class_overriden to be False
top_predictions = ["beluga", "blue_whale", "common_dolphin"]
obs.set_top_predictions(top_predictions)
obs.set_selected_class("brydes_whale")
assert len(obs.top_predictions) == 3
assert obs.top_predictions == top_predictions
assert obs.selected_class == "brydes_whale"
assert obs.class_overriden == True
# now we want to test to_dict, make sure it is compliant with the data to be
# transmitted to the dataset/server
def test_input_observation_to_dict(good_datadict_for_input_observation):
obs = InputObservation(**good_datadict_for_input_observation)
# set >0, and then set_selected_class to something out of list
# - expect to find the same list in the property/attribute
# - expect to find the first element in the selected_class
# - expect class_overriden to be False
top_predictions = ["beluga", "blue_whale", "common_dolphin"]
selected = "brydes_whale"
obs.set_top_predictions(top_predictions)
obs.set_selected_class(selected)
# as a first point, we expect the dict to be like the input dict...
expected_output = good_datadict_for_input_observation.copy()
# ... with a few changes
# - date and time get converted to str(date) str(time)
expected_output["date"] = str(expected_output["date"])
expected_output["time"] = str(expected_output["time"])
# - image_filename comes from uploaded_file.name
expected_output["image_filename"] = expected_output["uploaded_file"].name
# - uploaded_file and image are not in the transmitted data
del expected_output["uploaded_file"]
del expected_output["image"]
# - the classification results should be as set above
expected_output["top_prediction"] = top_predictions[0]
expected_output["selected_class"] = selected
expected_output["class_overriden"] = True
print(obs.to_dict())
assert obs.to_dict() == expected_output
# expected = {
# 'image_filename': 'test_image.jpg', 'image_md5':
# 'd1d2515e6f6ac4c5ca6dd739d5143cd4', 'latitude': 12.34, 'longitude':
# 56.78, 'author_email': '[email protected]', 'image_datetime_raw':
# '2023-10-10 10:10:10', 'date': '2023-10-10', 'time': '10:10:10',
# 'selected_class': 'brydes_whale', 'top_prediction': 'beluga',
# 'class_overriden': True
# }