Spaces:

Nick088
/

Audio-SR

Running on Zero

added audio sr files, adapted them to zerogpu and optimization for memory

fa90792 about 1 year ago

1.36 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.

	# This source code is licensed under the license found in the
	# LICENSE file in the root directory of this source tree.

	import math

	import torch

	from torchvision import transforms
	from torchvision.transforms import functional as F


	class RandomResizedCrop(transforms.RandomResizedCrop):
	"""
	RandomResizedCrop for matching TF/TPU implementation: no for-loop is used.
	This may lead to results different with torchvision's version.
	Following BYOL's TF code:
	https://github.com/deepmind/deepmind-research/blob/master/byol/utils/dataset.py#L206
	"""

	@staticmethod
	def get_params(img, scale, ratio):
	width, height = F._get_image_size(img)
	area = height * width

	target_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item()
	log_ratio = torch.log(torch.tensor(ratio))
	aspect_ratio = torch.exp(
	torch.empty(1).uniform_(log_ratio[0], log_ratio[1])
	).item()

	w = int(round(math.sqrt(target_area * aspect_ratio)))
	h = int(round(math.sqrt(target_area / aspect_ratio)))

	w = min(w, width)
	h = min(h, height)

	i = torch.randint(0, height - h + 1, size=(1,)).item()
	j = torch.randint(0, width - w + 1, size=(1,)).item()

	return i, j, h, w