""" Shared optimizer, the parameters in the optimizer will shared in the multiprocessors. """ import torch class SharedAdam(torch.optim.Adam): def __init__(self, params, lr=1e-3, betas=(0.9, 0.99), eps=1e-8, weight_decay=0): super(SharedAdam, self).__init__( params, lr=lr, betas=betas, eps=eps, weight_decay=weight_decay ) # State initialization for group in self.param_groups: for p in group["params"]: state = self.state[p] state["step"] = 0 state["exp_avg"] = torch.zeros_like(p.data) state["exp_avg_sq"] = torch.zeros_like(p.data) # share in memory state["exp_avg"].share_memory_() state["exp_avg_sq"].share_memory_()