AlexK-PL commited on
Commit
2fe470f
·
1 Parent(s): 689b3d3

Delete loss_scaler.py

Browse files
Files changed (1) hide show
  1. loss_scaler.py +0 -79
loss_scaler.py DELETED
@@ -1,79 +0,0 @@
1
-
2
-
3
- class LossScaler:
4
-
5
- def __init__(self, scale=1):
6
- self.cur_scale = scale
7
-
8
- # `params` is a list / generator of torch.Variable
9
- def has_overflow(self, params):
10
- return False
11
-
12
- # `x` is a torch.Tensor
13
- def _has_inf_or_nan(x):
14
- return False
15
-
16
- # `overflow` is boolean indicating whether we overflowed in gradient
17
- def update_scale(self, overflow):
18
- pass
19
-
20
- @property
21
- def loss_scale(self):
22
- return self.cur_scale
23
-
24
- def scale_gradient(self, module, grad_in, grad_out):
25
- return tuple(self.loss_scale * g for g in grad_in)
26
-
27
- def backward(self, loss):
28
- scaled_loss = loss*self.loss_scale
29
- scaled_loss.backward()
30
-
31
- class DynamicLossScaler:
32
-
33
- def __init__(self,
34
- init_scale=2**32,
35
- scale_factor=2.,
36
- scale_window=1000):
37
- self.cur_scale = init_scale
38
- self.cur_iter = 0
39
- self.last_overflow_iter = -1
40
- self.scale_factor = scale_factor
41
- self.scale_window = scale_window
42
-
43
- # `params` is a list / generator of torch.Variable
44
- def has_overflow(self, params):
45
- for p in params:
46
- if p.grad is not None and DynamicLossScaler._has_inf_or_nan(p.grad.data):
47
- return True
48
-
49
- return False
50
-
51
- # `x` is a torch.Tensor
52
- def _has_inf_or_nan(x):
53
- cpu_sum = float(x.float().sum())
54
- if cpu_sum == float('inf') or cpu_sum == -float('inf') or cpu_sum != cpu_sum:
55
- return True
56
- return False
57
-
58
- # `overflow` is boolean indicating whether we overflowed in gradient
59
- def update_scale(self, overflow):
60
- if overflow:
61
- #self.cur_scale /= self.scale_factor
62
- self.cur_scale = max(self.cur_scale/self.scale_factor, 1)
63
- self.last_overflow_iter = self.cur_iter
64
- else:
65
- if (self.cur_iter - self.last_overflow_iter) % self.scale_window == 0:
66
- self.cur_scale *= self.scale_factor
67
- # self.cur_scale = 1
68
- self.cur_iter += 1
69
-
70
- @property
71
- def loss_scale(self):
72
- return self.cur_scale
73
-
74
- def scale_gradient(self, module, grad_in, grad_out):
75
- return tuple(self.loss_scale * g for g in grad_in)
76
-
77
- def backward(self, loss):
78
- scaled_loss = loss*self.loss_scale
79
- scaled_loss.backward()