""" Functions that use multiple times """ from torch import nn import torch import numpy as np def v_wrap(np_array, dtype=np.float32): if np_array.dtype != dtype: np_array = np_array.astype(dtype) return torch.from_numpy(np_array) def set_init(layers): for layer in layers: nn.init.normal_(layer.weight, mean=0., std=0.1) nn.init.constant_(layer.bias, 0.) def push_and_pull(opt, lnet, gnet, done, s_, bs, ba, br, gamma): if done: v_s_ = 0. # terminal else: v_s_ = lnet.forward(v_wrap(s_[None, :]))[-1].data.numpy()[0, 0] buffer_v_target = [] for r in br[::-1]: # reverse buffer r v_s_ = r + gamma * v_s_ buffer_v_target.append(v_s_) buffer_v_target.reverse() loss = lnet.loss_func( v_wrap(np.vstack(bs)), v_wrap(np.array(ba), dtype=np.int64) if ba[0].dtype == np.int64 else v_wrap(np.vstack(ba)), v_wrap(np.array(buffer_v_target)[:, None])) # calculate local gradients and push local parameters to global opt.zero_grad() loss.backward() for lp, gp in zip(lnet.parameters(), gnet.parameters()): gp._grad = lp.grad opt.step() # pull global parameters lnet.load_state_dict(gnet.state_dict()) def record(global_ep, global_ep_r, ep_r, res_queue, name, goal_word, action, action_number, winning_ep): with global_ep.get_lock(): global_ep.value += 1 with global_ep_r.get_lock(): if global_ep_r.value == 0.: global_ep_r.value = ep_r else: global_ep_r.value = global_ep_r.value * 0.99 + ep_r * 0.01 res_queue.put(global_ep_r.value) if goal_word == action: winning_ep.value += 1 if global_ep.value % 100 == 0: print( name, "Ep:", global_ep.value, "| Ep_r: %.0f" % global_ep_r.value, "| Goal :", goal_word, "| Action: ", action, "| Actions: ", action_number )