Spaces:
Sleeping
Sleeping
| import hashlib | |
| from input.input_validator import generate_random_md5 | |
| from numpy import ndarray | |
| from streamlit.runtime.uploaded_file_manager import UploadedFile | |
| import datetime | |
| # autogenerated class to hold the input data | |
| class InputObservation: | |
| """ | |
| A class to hold an input observation and associated metadata | |
| Attributes: | |
| image (ndarray): | |
| The image associated with the observation. | |
| latitude (float): | |
| The latitude where the observation was made. | |
| longitude (float): | |
| The longitude where the observation was made. | |
| author_email (str): | |
| The email of the author of the observation. | |
| image_datetime_raw (str): | |
| The datetime extracted from the observation file | |
| date (datetime.date): | |
| Date of the observation | |
| time (datetime.time): | |
| Time of the observation | |
| uploaded_file (UploadedFile): | |
| The uploaded file associated with the observation. | |
| image_md5 (str): | |
| The MD5 hash of the image associated with the observation. | |
| Methods: | |
| __str__(): | |
| Returns a string representation of the observation. | |
| __repr__(): | |
| Returns a string representation of the observation. | |
| __eq__(other): | |
| Checks if two observations are equal. | |
| __ne__(other): | |
| Checks if two observations are not equal. | |
| show_diff(other): | |
| Shows the differences between two observations. | |
| to_dict(): | |
| Converts the observation to a dictionary. | |
| from_dict(data): | |
| Creates an observation from a dictionary. | |
| from_input(input): | |
| Creates an observation from another input observation. | |
| """ | |
| _inst_count = 0 | |
| def __init__( | |
| self, image:ndarray=None, latitude:float=None, longitude:float=None, | |
| author_email:str=None, image_datetime_raw:str=None, | |
| date:datetime.date=None, | |
| time:datetime.time=None, | |
| uploaded_file:UploadedFile=None, image_md5:str=None): | |
| self.image = image | |
| self.latitude = latitude | |
| self.longitude = longitude | |
| self.author_email = author_email | |
| self.image_datetime_raw = image_datetime_raw | |
| self.date = date | |
| self.time = time | |
| self.uploaded_file = uploaded_file | |
| self.image_md5 = image_md5 | |
| # attributes that get set after predictions/processing | |
| self._top_predictions = [] | |
| self._selected_class = None | |
| self._class_overriden = False | |
| InputObservation._inst_count += 1 | |
| self._inst_id = InputObservation._inst_count | |
| #dbg - temporarily give up if hash is not provided | |
| if self.image_md5 is None: | |
| raise ValueError(f"Image MD5 hash is required - {self._inst_id:3}.") | |
| def set_top_predictions(self, top_predictions:list): | |
| self._top_predictions = top_predictions | |
| if len(top_predictions) > 0: | |
| self.set_selected_class(top_predictions[0]) | |
| def set_selected_class(self, selected_class:str): | |
| self._selected_class = selected_class | |
| if selected_class != self._top_predictions[0]: | |
| self.set_class_overriden(True) | |
| def set_class_overriden(self, class_overriden:bool): | |
| self._class_overriden = class_overriden | |
| # add getters for the top_predictions, selected_class and class_overriden | |
| def top_predictions(self): | |
| return self._top_predictions | |
| def selected_class(self): | |
| return self._selected_class | |
| def class_overriden(self): | |
| return self._class_overriden | |
| # add a method to assign the image_md5 only once | |
| def assign_image_md5(self): | |
| raise DeprecationWarning("This method is deprecated. hash is a required constructor argument.") | |
| if not self.image_md5: | |
| self.image_md5 = hashlib.md5(self.uploaded_file.read()).hexdigest() if self.uploaded_file else generate_random_md5() | |
| m_logger.debug(f"[D] Assigned image md5: {self.image_md5} for {self.uploaded_file}") | |
| def __str__(self): | |
| _im_str = "None" if self.image is None else f"image dims: {self.image.shape}" | |
| return ( | |
| f"Observation: {_im_str}, {self.latitude}, {self.longitude}, " | |
| f"{self.author_email}, {self.image_datetime_raw}, {self.date}, " | |
| f"{self.time}, {self.uploaded_file}, {self.image_md5}" | |
| ) | |
| def __repr__(self): | |
| _im_str = "None" if self.image is None else f"image dims: {self.image.shape}" | |
| return ( | |
| f"Observation: " | |
| f"Image: {_im_str}, " | |
| f"Latitude: {self.latitude}, " | |
| f"Longitude: {self.longitude}, " | |
| f"Author Email: {self.author_email}, " | |
| f"raw timestamp: {self.image_datetime_raw}, " | |
| f"Date: {self.date}, " | |
| f"Time: {self.time}, " | |
| f"Uploaded Filename: {self.uploaded_file}" | |
| f"Image MD5 hash: {self.image_md5}" | |
| ) | |
| def __eq__(self, other): | |
| # TODO: ensure this covers all the attributes (some have been added?) | |
| # - except inst_id which is unique | |
| _image_equality = False | |
| if self.image is None or other.image is None: | |
| _image_equality = other.image == self.image | |
| else: # maybe strong assumption: both are correctly ndarray.. should I test types intead? | |
| _image_equality = (self.image == other.image).all() | |
| equality = ( | |
| #self.image == other.image and | |
| _image_equality and | |
| self.latitude == other.latitude and | |
| self.longitude == other.longitude and | |
| self.author_email == other.author_email and | |
| self.image_datetime_raw == other.image_datetime_raw and | |
| self.date == other.date and | |
| # temporarily skip time, it is followed by the clock and that is always differnt | |
| #self.time == other.time and | |
| self.uploaded_file == other.uploaded_file and | |
| self.image_md5 == other.image_md5 | |
| ) | |
| return equality | |
| # define a function show_diff(other) that shows the differences between two observations | |
| # only highlight the differences, if element is the same don't show it | |
| # have a summary at the top that shows if the observations are the same or not | |
| def show_diff(self, other): | |
| """Show the differences between two observations""" | |
| differences = [] | |
| if self.image is None or other.image is None: | |
| if other.image != self.image: | |
| differences.append(f" Image is different. (types mismatch: {type(self.image)} vs {type(other.image)})") | |
| else: | |
| if (self.image != other.image).any(): | |
| cnt = (self.image != other.image).sum() | |
| differences.append(f" Image is different: {cnt} different pixels.") | |
| if self.latitude != other.latitude: | |
| differences.append(f" Latitude is different. (self: {self.latitude}, other: {other.latitude})") | |
| if self.longitude != other.longitude: | |
| differences.append(f" Longitude is different. (self: {self.longitude}, other: {other.longitude})") | |
| if self.author_email != other.author_email: | |
| differences.append(f" Author email is different. (self: {self.author_email}, other: {other.author_email})") | |
| if self.image_datetime_raw != other.image_datetime_raw: | |
| differences.append(f" Date is different. (self: {self.image_datetime_raw}, other: {other.image_datetime_raw})") | |
| if self.date != other.date: | |
| differences.append(f" Date is different. (self: {self.date}, other: {other.date})") | |
| if self.time != other.time: | |
| differences.append(f" Time is different. (self: {self.time}, other: {other.time})") | |
| if self.uploaded_file != other.uploaded_file: | |
| differences.append(" Uploaded filename is different.") | |
| if self.image_md5 != other.image_md5: | |
| differences.append(" Image MD5 hash is different.") | |
| if differences: | |
| print(f"Observations have {len(differences)} differences:") | |
| for diff in differences: | |
| print(diff) | |
| else: | |
| print("Observations are the same.") | |
| def __ne__(self, other): | |
| return not self.__eq__(other) | |
| def to_dict(self): | |
| return { | |
| #"image": self.image, | |
| "image_filename": self.uploaded_file.name if self.uploaded_file else None, | |
| "image_md5": self.image_md5, | |
| #"image_md5": hashlib.md5(self.uploaded_file.read()).hexdigest() if self.uploaded_file else generate_random_md5(), | |
| "latitude": self.latitude, | |
| "longitude": self.longitude, | |
| "author_email": self.author_email, | |
| "image_datetime_raw": self.image_datetime_raw, | |
| "date": str(self.date), | |
| "time": str(self.time), | |
| "selected_class": self._selected_class, | |
| "top_prediction": self._top_predictions[0] if len(self._top_predictions) else None, | |
| "class_overriden": self._class_overriden, | |
| #"uploaded_file": self.uploaded_file # can't serialize this in json, not sent to dataset anyway. | |
| } | |
| def from_dict(cls, data): | |
| return cls( | |
| image=data.get("image"), | |
| latitude=data.get("latitude"), | |
| longitude=data.get("longitude"), | |
| author_email=data.get("author_email"), | |
| image_datetime_raw=data.get("image_datetime_raw"), | |
| date=data.get("date"), | |
| time=data.get("time"), | |
| uploaded_file=data.get("uploaded_file"), | |
| image_hash=data.get("image_md5") | |
| ) | |
| def from_input(cls, input): | |
| return cls( | |
| image=input.image, | |
| latitude=input.latitude, | |
| longitude=input.longitude, | |
| author_email=input.author_email, | |
| image_datetime_raw=input.image_datetime_raw, | |
| date=input.date, | |
| time=input.time, | |
| uploaded_file=input.uploaded_file, | |
| image_hash=input.image_hash | |
| ) | |