ngwgsang commited on
Commit
dc732d8
·
verified ·
1 Parent(s): 3832b24

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cde463bf311ac805eec2150338f697c9940a5c7a200ccc234d37162b4649aed5
3
  size 442668636
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75562cd31a0f9b43a875d53eafeae39ff9f4884568c984afec548fb0b3a18bea
3
  size 442668636
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec1ab9ed8d43fdcf9ecea3365f7eeaacaae0d08b25b5e24189e5e1a81dde674e
3
  size 885457146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4737340e44de8f447a156c00d13c226c8d13eaaa3479d55229dea45aefbcbd95
3
  size 885457146
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2477d3715e68f2549c9ecd6a18f4a17a0bfb0a625f50ce4fafa0aa2652affb1c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc930492c5e0375b00eb1faa8503ca1a4cd6495e47aeaa009df65f9bce5b16e3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00b7251b468e4d3cb44eba0757f056754012975e532dc253bb53666972923e5b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9707f7f99f72a12c2631595b1bcc8638efdeda09ae580feffc3a464b56550f1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 7.66294542948405,
3
- "best_model_checkpoint": "./results/checkpoint-916",
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 916,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -82,6 +82,81 @@
82
  "eval_samples_per_second": 272.318,
83
  "eval_steps_per_second": 8.511,
84
  "step": 916
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  }
86
  ],
87
  "logging_steps": 100,
@@ -101,7 +176,7 @@
101
  "attributes": {}
102
  }
103
  },
104
- "total_flos": 1927766233338624.0,
105
  "train_batch_size": 32,
106
  "trial_name": null,
107
  "trial_params": null
 
1
  {
2
+ "best_metric": 6.929315567016602,
3
+ "best_model_checkpoint": "./results/checkpoint-1832",
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 1832,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
82
  "eval_samples_per_second": 272.318,
83
  "eval_steps_per_second": 8.511,
84
  "step": 916
85
+ },
86
+ {
87
+ "epoch": 1.091703056768559,
88
+ "grad_norm": 34.63822555541992,
89
+ "learning_rate": 2.5906113537117905e-05,
90
+ "loss": 6.9324,
91
+ "step": 1000
92
+ },
93
+ {
94
+ "epoch": 1.2008733624454149,
95
+ "grad_norm": 25.136709213256836,
96
+ "learning_rate": 2.5496724890829696e-05,
97
+ "loss": 6.6809,
98
+ "step": 1100
99
+ },
100
+ {
101
+ "epoch": 1.3100436681222707,
102
+ "grad_norm": 29.977298736572266,
103
+ "learning_rate": 2.5087336244541486e-05,
104
+ "loss": 6.6569,
105
+ "step": 1200
106
+ },
107
+ {
108
+ "epoch": 1.4192139737991267,
109
+ "grad_norm": 50.923553466796875,
110
+ "learning_rate": 2.4677947598253277e-05,
111
+ "loss": 6.5877,
112
+ "step": 1300
113
+ },
114
+ {
115
+ "epoch": 1.5283842794759825,
116
+ "grad_norm": 24.49920654296875,
117
+ "learning_rate": 2.4268558951965064e-05,
118
+ "loss": 6.5709,
119
+ "step": 1400
120
+ },
121
+ {
122
+ "epoch": 1.6375545851528384,
123
+ "grad_norm": 36.14987564086914,
124
+ "learning_rate": 2.3859170305676855e-05,
125
+ "loss": 6.4067,
126
+ "step": 1500
127
+ },
128
+ {
129
+ "epoch": 1.7467248908296944,
130
+ "grad_norm": 22.3398380279541,
131
+ "learning_rate": 2.344978165938865e-05,
132
+ "loss": 6.3692,
133
+ "step": 1600
134
+ },
135
+ {
136
+ "epoch": 1.8558951965065502,
137
+ "grad_norm": 23.658458709716797,
138
+ "learning_rate": 2.3040393013100437e-05,
139
+ "loss": 6.3785,
140
+ "step": 1700
141
+ },
142
+ {
143
+ "epoch": 1.965065502183406,
144
+ "grad_norm": 31.021987915039062,
145
+ "learning_rate": 2.2631004366812227e-05,
146
+ "loss": 6.2296,
147
+ "step": 1800
148
+ },
149
+ {
150
+ "epoch": 2.0,
151
+ "eval_avg_mae": 6.929315567016602,
152
+ "eval_loss": 6.92931604385376,
153
+ "eval_mae_lex": 6.660251617431641,
154
+ "eval_mae_sem": 4.739748001098633,
155
+ "eval_mae_syn": 9.387948036193848,
156
+ "eval_runtime": 26.931,
157
+ "eval_samples_per_second": 272.065,
158
+ "eval_steps_per_second": 8.503,
159
+ "step": 1832
160
  }
161
  ],
162
  "logging_steps": 100,
 
176
  "attributes": {}
177
  }
178
  },
179
+ "total_flos": 3855532466677248.0,
180
  "train_batch_size": 32,
181
  "trial_name": null,
182
  "trial_params": null