File size: 15,197 Bytes
44d13aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5312fc3
44d13aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99171fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5312fc3
a847100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5312fc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
from typing import Protocol, runtime_checkable
import pytest
from unittest.mock import MagicMock, patch

from io import BytesIO
#from PIL import Image
import datetime
import numpy as np
    
#from streamlit.runtime.uploaded_file_manager import UploadedFile # for type hinting
#from typing import List, Union

from input.input_observation import InputObservation

@runtime_checkable
class UploadedFile(Protocol):
    name: str
    size: int
    type: str
    _file_urls: list

    def getvalue(self) -> bytes: ...
    def read(self) -> bytes: ... 


class MockUploadedFile(BytesIO):
    def __init__(self, 
                 initial_bytes: bytes,
                 *, # enforce keyword-only arguments after now
                 name:str,
                 size:int,
                 type:str): 
        #super().__init__(*args, **kwargs)
        super().__init__(initial_bytes)
        self.name = name 
        self.size = size
        self.type = type
        
        self._file_urls = [None,]


@pytest.fixture
def mock_uploadedFile():
    class MockGUIClass(MagicMock):
        def __init__(self, *args, **kwargs):
            super().__init__(*args, **kwargs)
            name = kwargs.get('name', 'image2.jpg')
            size = kwargs.get('size', 123456)
            type = kwargs.get('type', 'image/jpeg')
            self.bytes_io = MockUploadedFile(
                b"test data", name=name, size=size, type=type)
            self.get_data = MagicMock(return_value=self.bytes_io)
    return MockGUIClass


# let's first generate a test for the mock_uploaded_file  and MockUploadedFile class
# - test with valid input
def test_mock_uploaded_file(mock_uploadedFile):
    # setup values for the test (all valid)
    image_name = "test_image.jpg"
    mock_file = mock_uploadedFile(name=image_name).get_data()
    
    #print(dir(mock_file))
    assert isinstance(mock_file, BytesIO)

    assert mock_file.name == image_name
    assert mock_file.size == 123456
    assert mock_file.type == "image/jpeg"


# now we move on to test the class InputObservation
# - with valid input
# - with invalid input
# - with missing input

def test_input_observation_valid(mock_uploadedFile):
    # image: ndarray
    # lat, lon: float
    # author_email: str
    # date, time: datetime.date, datetime.time
    #uploaded_file: UploadedFile (need to mock this)
    # image_md5: str

    # setup values for the test (all valid)

    author_email = "[email protected]"
    image_name = "test_image.jpg"
    mock_file = mock_uploadedFile(name=image_name).get_data()
    
    _date="2023-10-10"
    _time="10:10:10"
    image_datetime_raw = _date + " " + _time
    dt = datetime.datetime.strptime(image_datetime_raw, "%Y-%m-%d %H:%M:%S")
    date = dt.date()    
    time = dt.time()

    ## make a random image with dtype uint8 using np.random.randint
    image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
    image_md5 = 'd1d2515e6f6ac4c5ca6dd739d5143cd4' # 32 hex chars.
    
    obs = InputObservation(
        image=image, 
        latitude=12.34, longitude=56.78, author_email=author_email,
        time=time, date=date,
        uploaded_file=mock_file,
        image_md5=image_md5,
        )
    
    assert isinstance(obs.image, np.ndarray)
    assert (obs.image == image).all()
    
    assert obs.latitude == 12.34
    assert obs.longitude == 56.78
    assert obs.author_email == author_email
    assert isinstance(obs.date, datetime.date)
    assert isinstance(obs.time, datetime.time)
    assert str(obs.date) == "2023-10-10"
    assert str(obs.time) == "10:10:10"

    assert obs.uploaded_file.name == image_name
    assert obs.uploaded_file.size == 123456
    assert obs.uploaded_file.type == "image/jpeg"
    
    assert isinstance(obs.uploaded_file, BytesIO)
    #assert isinstance(obs.uploaded_file, MockUploadedFile) # is there any point in checking the type of the mock, ?


# a list of tuples (strings that are the keys of "valid_inputs", expected error type)
# loop over the list, and for each tuple, create a dictionary with all valid inputs, and one invalid input
# assert that the function raises the expected error type

invalid_input_scenarios = [ 
            ("author_email", TypeError),
            ("image_name", TypeError),
            ("uploaded_file", TypeError),
            ("date", TypeError),
            ("time", TypeError),
            ("image", TypeError),
            ("image_md5", TypeError),
    ]

@pytest.mark.parametrize("key, error_type", invalid_input_scenarios)
def test_input_observation_invalid(key, error_type, mock_uploadedFile):
    # correct datatypes are:
    # - image: ndarray
    # - lat, lon: float
    # - author_email: str
    # - date, time: datetime.date, datetime.time
    # - uploaded_file: UploadedFile (need to mock this)
    # - image_md5: str

    # the most critical/likely to go wrong would presumably be 
    # - date, time (strings not datetime objects)
    # - lat, lon (strings not numbers)
    # - image (not ndarray, maybe accidentally a PIL object or maybe the filename)
    # - uploaded_file (not UploadedFile, maybe a string, or maybe the ndarray)

    # check it fails when any of the datatypes are wrong,
    # even if the rest are all good want to loop over the inputs, take each one
    # from a bad list, and all others from a good list, and assert fails for
    # each one
    
    # set up the good and bad inputs
    _date="2023-10-10"
    _time="10:10:10"
    image_datetime_raw = _date + " " + _time
    fname = "test_image.jpg"
    image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
    
    dt_ok = datetime.datetime.strptime(image_datetime_raw, "%Y-%m-%d %H:%M:%S")
    valid_inputs = {
        "author_email": "[email protected]",
        "image_name": "test_image.jpg",
        "uploaded_file": mock_uploadedFile(name=fname).get_data(),
        "date": dt_ok.date(),
        "time": dt_ok.time(),
        "image": image,
        "image_md5": 'd1d2515e6f6ac4c5ca6dd739d5143cd4', # 32 hex chars.
    }
    invalid_inputs = {
        "author_email": "@example",
        "image_name": 45,
        "uploaded_file": image,
        "date": _date,
        "time": _time,
        "image": fname,
        "image_md5": 45643
    }

    # test a valid set of inputs, minus the target key, substituted for something invalid
    inputs = valid_inputs.copy()
    inputs[key] = invalid_inputs[key]
    
    with pytest.raises(error_type):
        obs = InputObservation(**inputs)
    
    # now test the same key set to None 
    inputs = valid_inputs.copy()
    inputs[key] = None
    with pytest.raises(error_type):
        obs = InputObservation(**inputs)
    

# we can take a similar approach to test equality. 
# here, construct two dicts, each with valid inputs but all elements different.
# loop over the keys, and construct two InputObservations that differ on that key only.
# asser the expected output message.
# ah, it is the diff func that prints a message. Here we just assert boolean.

# we currently expect differences on time to be ignored. 
inequality_keys = [
    ("author_email", False),
    ("uploaded_file", False),
    ("date", False),
    #("time", True),
    pytest.param("time", False, marks=pytest.mark.xfail(reason="Time is currently ignored in __eq__")),
    ("image", False),
    ("image_md5", False),
]
@pytest.mark.parametrize("key, expect_equality", inequality_keys)
def test_input_observation_equality(key, expect_equality, mock_uploadedFile):

    # set up the two sets of good inputs
    _date1 = "2023-10-10"
    _time1 = "10:10:10"
    image_datetime_raw1 = _date1 + " " + _time1
    fname1 = "test_image.jpg"
    image1 = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
    dt1 = datetime.datetime.strptime(image_datetime_raw1, "%Y-%m-%d %H:%M:%S")

    _date2 = "2023-10-11"
    _time2 = "12:13:14"
    image_datetime_raw2 = _date2 + " " + _time2
    fname2 = "test_image.jpg"
    image2 = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
    dt2 = datetime.datetime.strptime(image_datetime_raw2, "%Y-%m-%d %H:%M:%S")
    valid_inputs1 = {
        "author_email": "[email protected]",
        #"image_name": "test_image.jpg",
        "uploaded_file": mock_uploadedFile(name=fname1).get_data(),
        "date": dt1.date(),
        "time": dt1.time(),
        "image": image1,
        "image_md5": 'd1d2515e6f6ac4c5ca6dd739d5143cd4', # 32 hex chars.
    }

    valid_inputs2 = {
        "author_email": "[email protected]",
        #"image_name": "another.jpg",
        "uploaded_file": mock_uploadedFile(name=fname2).get_data(),
        "date": dt2.date(),
        "time": dt2.time(),
        "image": image2,
        "image_md5": 'cdb235587bdee5915d6ccfa52ca9f3ac', # 32 hex chars.
    }

    nearly_same_inputs = valid_inputs1.copy()
    nearly_same_inputs[key] = valid_inputs2[key]
    obs1 = InputObservation(**valid_inputs1)
    obs2 = InputObservation(**nearly_same_inputs)

    if expect_equality is True:
        assert obs1 == obs2
    else:
        assert obs1 != obs2
    

# now let's test the setter methods (set_top_predictions, set_selected_class, set_class_overriden)
# ideally we get a fixture that produces a good / valid InputObservation object
# and from there, just test the setters + their expected changes / side effects

@pytest.fixture
def good_datadict_for_input_observation(mock_uploadedFile) -> dict:
    # set up the good and bad inputs
    _date="2023-10-10"
    _time="10:10:10"
    image_datetime_raw = _date + " " + _time
    fname = "test_image.jpg"
    image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
    
    dt_ok = datetime.datetime.strptime(image_datetime_raw, "%Y-%m-%d %H:%M:%S")
    valid_inputs = {
        "author_email": "[email protected]",
        "uploaded_file": mock_uploadedFile(name=fname).get_data(),
        "date": dt_ok.date(),
        "time": dt_ok.time(),
        "image": image,
        "image_md5": 'd1d2515e6f6ac4c5ca6dd739d5143cd4', # 32 hex chars.
        "image_datetime_raw": image_datetime_raw,
        "latitude": 12.34, 
        "longitude": 56.78,
    
    }
    return valid_inputs
    

@pytest.fixture
def good_input_observation(good_datadict_for_input_observation) -> InputObservation:
    observation = InputObservation(**good_datadict_for_input_observation)

    return observation
    

# 
def test_input_observation__set_top_predictions_populated(good_input_observation):
    obs = good_input_observation
    
    # before setting, expect empty list
    assert obs.top_predictions == []
    assert obs.selected_class == None
    
    # set >0, 
    # - expect to find the same list in the property/attribute
    # - expect to find the first element in the selected_class
    top_predictions = ["beluga", "blue_whale", "common_dolphin"]
    obs.set_top_predictions(top_predictions)

    assert len(obs.top_predictions) == 3
    assert obs.top_predictions == top_predictions
    assert obs.selected_class == "beluga"
    
def test_input_observation__set_top_predictions_unpopulated(good_input_observation):
    obs = good_input_observation
    
    # before setting, expect empty list
    assert obs.top_predictions == []
    assert obs.selected_class == None
    
    # set to empty list,
    # - expect to find the same list in the property/attribute
    # - expect to find selected_class to be None
    top_predictions = []
    obs.set_top_predictions(top_predictions)

    assert len(obs.top_predictions) == 0
    assert obs.top_predictions == []
    assert obs.selected_class == None
    
def test_input_observation__set_selected_class_default(good_input_observation):
    obs = good_input_observation
    
    # before setting, expect empty list
    assert obs.top_predictions == []
    assert obs.selected_class == None
    assert obs.class_overriden == False
    
    # set >0, and then set_selected_class to the first element 
    # - expect to find the same list in the property/attribute
    # - expect to find the first element in the selected_class
    # - expect class_overriden to be False
    top_predictions = ["beluga", "blue_whale", "common_dolphin"]
    obs.set_top_predictions(top_predictions)
    obs.set_selected_class(top_predictions[0])

    assert len(obs.top_predictions) == 3
    assert obs.top_predictions == top_predictions
    assert obs.selected_class == "beluga" 
   
def test_input_observation__set_selected_class_override(good_input_observation):
    obs = good_input_observation
    
    # before setting, expect empty list
    assert obs.top_predictions == []
    assert obs.selected_class == None
    assert obs.class_overriden == False
    
    # set >0, and then set_selected_class to something out of list
    # - expect to find the same list in the property/attribute
    # - expect to find the first element in the selected_class
    # - expect class_overriden to be False
    top_predictions = ["beluga", "blue_whale", "common_dolphin"]
    obs.set_top_predictions(top_predictions)
    obs.set_selected_class("brydes_whale")

    assert len(obs.top_predictions) == 3
    assert obs.top_predictions == top_predictions
    assert obs.selected_class == "brydes_whale"
    assert obs.class_overriden == True
    
   
# now we want to test to_dict, make sure it is compliant with the data to be
# transmitted to the dataset/server 

def test_input_observation_to_dict(good_datadict_for_input_observation):
    obs = InputObservation(**good_datadict_for_input_observation)
    
    # set >0, and then set_selected_class to something out of list
    # - expect to find the same list in the property/attribute
    # - expect to find the first element in the selected_class
    # - expect class_overriden to be False
    top_predictions = ["beluga", "blue_whale", "common_dolphin"]
    selected = "brydes_whale"
    obs.set_top_predictions(top_predictions)
    obs.set_selected_class(selected)
    
    # as a first point, we expect the dict to be like the input dict...
    expected_output = good_datadict_for_input_observation.copy()
    # ... with a few changes
    # - date and time get converted to str(date) str(time)
    expected_output["date"] = str(expected_output["date"])
    expected_output["time"] = str(expected_output["time"])
    # - image_filename comes from uploaded_file.name
    expected_output["image_filename"] = expected_output["uploaded_file"].name
    # - uploaded_file and image are not in the transmitted data
    del expected_output["uploaded_file"]
    del expected_output["image"]
    # - the classification results should be as set above
    expected_output["top_prediction"] = top_predictions[0]
    expected_output["selected_class"] = selected
    expected_output["class_overriden"] = True
    
    print(obs.to_dict())
    assert obs.to_dict() == expected_output
    
    # expected = {
    #     'image_filename': 'test_image.jpg', 'image_md5':
    #     'd1d2515e6f6ac4c5ca6dd739d5143cd4', 'latitude': 12.34, 'longitude':
    #     56.78, 'author_email': '[email protected]', 'image_datetime_raw':
    #     '2023-10-10 10:10:10', 'date': '2023-10-10', 'time': '10:10:10',
    #     'selected_class': 'brydes_whale', 'top_prediction': 'beluga',
    #     'class_overriden': True
    #     }