Spaces:

rootstrap-org
/

waste-classifier

Sleeping

App Files Files Community

waste-classifier / efficientdet /effdet /data /transforms.py

santit96

Create the streamlit app that classifies the trash in an image into classes

fa84113 over 1 year ago

raw

history blame

8.95 kB

	""" COCO transforms (quick and dirty)

	Hacked together by Ross Wightman
	"""
	import torch
	from PIL import Image
	import numpy as np
	import random
	import math

	IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
	IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
	IMAGENET_INCEPTION_MEAN = (0.5, 0.5, 0.5)
	IMAGENET_INCEPTION_STD = (0.5, 0.5, 0.5)


	class ImageToNumpy:

	def __call__(self, pil_img, annotations: dict):
	np_img = np.array(pil_img, dtype=np.uint8)
	if np_img.ndim < 3:
	np_img = np.expand_dims(np_img, axis=-1)
	np_img = np.moveaxis(np_img, 2, 0) # HWC to CHW
	return np_img, annotations


	class ImageToTensor:

	def __init__(self, dtype=torch.float32):
	self.dtype = dtype

	def __call__(self, pil_img, annotations: dict):
	np_img = np.array(pil_img, dtype=np.uint8)
	if np_img.ndim < 3:
	np_img = np.expand_dims(np_img, axis=-1)
	np_img = np.moveaxis(np_img, 2, 0) # HWC to CHW
	return torch.from_numpy(np_img).to(dtype=self.dtype), annotations


	def _pil_interp(method):
	if method == 'bicubic':
	return Image.BICUBIC
	elif method == 'lanczos':
	return Image.LANCZOS
	elif method == 'hamming':
	return Image.HAMMING
	else:
	# default bilinear, do we want to allow nearest?
	return Image.BILINEAR


	def clip_boxes_(boxes, img_size):
	height, width = img_size
	clip_upper = np.array([height, width] * 2, dtype=boxes.dtype)
	np.clip(boxes, 0, clip_upper, out=boxes)


	def clip_boxes(boxes, img_size):
	clipped_boxes = boxes.copy()
	clip_boxes_(clipped_boxes, img_size)
	return clipped_boxes


	def _size_tuple(size):
	if isinstance(size, int):
	return size, size
	else:
	assert len(size) == 2
	return size


	class ResizePad:

	def __init__(self, target_size: int, interpolation: str = 'bilinear', fill_color: tuple = (0, 0, 0)):
	self.target_size = _size_tuple(target_size)
	self.interpolation = interpolation
	self.fill_color = fill_color

	def __call__(self, img, anno: dict):
	width, height = img.size

	img_scale_y = self.target_size[0] / height
	img_scale_x = self.target_size[1] / width
	img_scale = min(img_scale_y, img_scale_x)
	scaled_h = int(height * img_scale)
	scaled_w = int(width * img_scale)

	new_img = Image.new("RGB", (self.target_size[1], self.target_size[0]), color=self.fill_color)
	interp_method = _pil_interp(self.interpolation)
	img = img.resize((scaled_w, scaled_h), interp_method)
	new_img.paste(img)

	if 'bbox' in anno:
	# FIXME haven't tested this path since not currently using dataset annotations for train/eval
	bbox = anno['bbox']
	bbox[:, :4] *= img_scale
	clip_boxes_(bbox, (scaled_h, scaled_w))
	valid_indices = (bbox[:, :2] < bbox[:, 2:4]).all(axis=1)
	anno['bbox'] = bbox[valid_indices, :]
	anno['cls'] = anno['cls'][valid_indices]

	anno['img_scale'] = 1. / img_scale # back to original

	return new_img, anno


	class RandomResizePad:

	def __init__(self, target_size: int, scale: tuple = (0.1, 2.0), interpolation: str = 'bilinear',
	fill_color: tuple = (0, 0, 0)):
	self.target_size = _size_tuple(target_size)
	self.scale = scale
	self.interpolation = interpolation
	self.fill_color = fill_color

	def _get_params(self, img):
	# Select a random scale factor.
	scale_factor = random.uniform(*self.scale)
	scaled_target_height = scale_factor * self.target_size[0]
	scaled_target_width = scale_factor * self.target_size[1]

	# Recompute the accurate scale_factor using rounded scaled image size.
	width, height = img.size
	img_scale_y = scaled_target_height / height
	img_scale_x = scaled_target_width / width
	img_scale = min(img_scale_y, img_scale_x)

	# Select non-zero random offset (x, y) if scaled image is larger than target size
	scaled_h = int(height * img_scale)
	scaled_w = int(width * img_scale)
	offset_y = scaled_h - self.target_size[0]
	offset_x = scaled_w - self.target_size[1]
	offset_y = int(max(0.0, float(offset_y)) * random.uniform(0, 1))
	offset_x = int(max(0.0, float(offset_x)) * random.uniform(0, 1))
	return scaled_h, scaled_w, offset_y, offset_x, img_scale

	def __call__(self, img, anno: dict):
	scaled_h, scaled_w, offset_y, offset_x, img_scale = self._get_params(img)

	interp_method = _pil_interp(self.interpolation)
	img = img.resize((scaled_w, scaled_h), interp_method)
	right, lower = min(scaled_w, offset_x + self.target_size[1]), min(scaled_h, offset_y + self.target_size[0])
	img = img.crop((offset_x, offset_y, right, lower))
	new_img = Image.new("RGB", (self.target_size[1], self.target_size[0]), color=self.fill_color)
	new_img.paste(img)

	if 'bbox' in anno:
	# FIXME not fully tested
	bbox = anno['bbox'].copy() # FIXME copy for debugger inspection, back to inplace
	bbox[:, :4] *= img_scale
	box_offset = np.stack([offset_y, offset_x] * 2)
	bbox -= box_offset
	clip_boxes_(bbox, (scaled_h, scaled_w))
	valid_indices = (bbox[:, :2] < bbox[:, 2:4]).all(axis=1)
	anno['bbox'] = bbox[valid_indices, :]
	anno['cls'] = anno['cls'][valid_indices]

	anno['img_scale'] = 1. / img_scale # back to original

	return new_img, anno


	class RandomFlip:

	def __init__(self, horizontal=True, vertical=False, prob=0.5):
	self.horizontal = horizontal
	self.vertical = vertical
	self.prob = prob

	def _get_params(self):
	do_horizontal = random.random() < self.prob if self.horizontal else False
	do_vertical = random.random() < self.prob if self.vertical else False
	return do_horizontal, do_vertical

	def __call__(self, img, annotations: dict):
	do_horizontal, do_vertical = self._get_params()
	width, height = img.size

	def _fliph(bbox):
	x_max = width - bbox[:, 1]
	x_min = width - bbox[:, 3]
	bbox[:, 1] = x_min
	bbox[:, 3] = x_max

	def _flipv(bbox):
	y_max = height - bbox[:, 0]
	y_min = height - bbox[:, 2]
	bbox[:, 0] = y_min
	bbox[:, 2] = y_max

	if do_horizontal and do_vertical:
	img = img.transpose(Image.ROTATE_180)
	if 'bbox' in annotations:
	_fliph(annotations['bbox'])
	_flipv(annotations['bbox'])
	elif do_horizontal:
	img = img.transpose(Image.FLIP_LEFT_RIGHT)
	if 'bbox' in annotations:
	_fliph(annotations['bbox'])
	elif do_vertical:
	img = img.transpose(Image.FLIP_TOP_BOTTOM)
	if 'bbox' in annotations:
	_flipv(annotations['bbox'])

	return img, annotations


	def resolve_fill_color(fill_color, img_mean=IMAGENET_DEFAULT_MEAN):
	if isinstance(fill_color, tuple):
	assert len(fill_color) == 3
	fill_color = fill_color
	else:
	try:
	int_color = int(fill_color)
	fill_color = (int_color,) * 3
	except ValueError:
	assert fill_color == 'mean'
	fill_color = tuple([int(round(255 * x)) for x in img_mean])
	return fill_color


	class Compose:

	def __init__(self, transforms: list):
	self.transforms = transforms

	def __call__(self, img, annotations: dict):
	for t in self.transforms:
	img, annotations = t(img, annotations)
	return img, annotations


	def transforms_coco_eval(
	img_size=224,
	interpolation='bilinear',
	use_prefetcher=False,
	fill_color='mean',
	mean=IMAGENET_DEFAULT_MEAN,
	std=IMAGENET_DEFAULT_STD):

	fill_color = resolve_fill_color(fill_color, mean)

	image_tfl = [
	ResizePad(
	target_size=img_size, interpolation=interpolation, fill_color=fill_color),
	ImageToNumpy(),
	]

	assert use_prefetcher, "Only supporting prefetcher usage right now"

	image_tf = Compose(image_tfl)
	return image_tf


	def transforms_coco_train(
	img_size=224,
	interpolation='random',
	use_prefetcher=False,
	fill_color='mean',
	mean=IMAGENET_DEFAULT_MEAN,
	std=IMAGENET_DEFAULT_STD):

	fill_color = resolve_fill_color(fill_color, mean)

	image_tfl = [
	RandomFlip(horizontal=True, prob=0.5),
	RandomResizePad(
	target_size=img_size, interpolation=interpolation, fill_color=fill_color),
	ImageToNumpy(),
	]

	assert use_prefetcher, "Only supporting prefetcher usage right now"

	image_tf = Compose(image_tfl)
	return image_tf