Spaces:

Tzktz
/

Dit-document-layout-analysis

Sleeping

Upload 7664 files

6fc683c verified over 1 year ago

1.28 kB

	# Copyright (c) Facebook, Inc. and its affiliates.
	#
	# This source code is licensed under the MIT license found in the
	# LICENSE file in the root directory of this source tree.

	import torch
	from torch import nn

	from fairseq.distributed import utils


	class TPUDistributedDataParallel(nn.Module):

	def __init__(self, module, process_group):
	super().__init__()
	self.module = module
	self.process_group = process_group
	self.world_size = utils.get_world_size(self.process_group)

	def forward(self, inputs, *kwargs):
	return self.module(inputs, *kwargs)

	def all_reduce_grads(self):
	gradients = []
	for p in self.parameters():
	if not p.requires_grad:
	continue
	if p.grad is None:
	p.grad = torch.zeros_like(p)
	if p.grad.requires_grad:
	raise RuntimeError(
	"TPUDistributedDataParallel only works with gradients that don't "
	"require grad"
	)
	gradients.append(p.grad)

	import torch_xla.core.xla_model as xm
	xm.all_reduce(
	'sum',
	gradients,
	scale=1. / self.world_size,
	groups=self.process_group[1],
	)