albertmartinez commited on
Commit
eb4dcbc
·
verified ·
1 Parent(s): d316538

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +13 -13
  2. eval_results.json +7 -7
  3. train_results.json +7 -7
  4. trainer_state.json +81 -49
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_f1": 0.8045510528812365,
4
- "eval_loss": 0.6810711622238159,
5
- "eval_runtime": 1951.2993,
6
- "eval_samples": 12908,
7
- "eval_samples_per_second": 6.615,
8
- "eval_steps_per_second": 0.207,
9
- "total_flos": 2.377533515518771e+16,
10
- "train_loss": 0.9083002715438119,
11
- "train_runtime": 52770.4686,
12
- "train_samples": 30117,
13
- "train_samples_per_second": 1.712,
14
- "train_steps_per_second": 0.054
15
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "eval_f1": 0.7979962127908091,
4
+ "eval_loss": 0.7055376768112183,
5
+ "eval_runtime": 10.4046,
6
+ "eval_samples": 8605,
7
+ "eval_samples_per_second": 827.037,
8
+ "eval_steps_per_second": 12.975,
9
+ "total_flos": 4.52871045169152e+16,
10
+ "train_loss": 0.9906343055923632,
11
+ "train_runtime": 639.5262,
12
+ "train_samples": 34420,
13
+ "train_samples_per_second": 269.105,
14
+ "train_steps_per_second": 4.206
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_f1": 0.8045510528812365,
4
- "eval_loss": 0.6810711622238159,
5
- "eval_runtime": 1951.2993,
6
- "eval_samples": 12908,
7
- "eval_samples_per_second": 6.615,
8
- "eval_steps_per_second": 0.207
9
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "eval_f1": 0.7979962127908091,
4
+ "eval_loss": 0.7055376768112183,
5
+ "eval_runtime": 10.4046,
6
+ "eval_samples": 8605,
7
+ "eval_samples_per_second": 827.037,
8
+ "eval_steps_per_second": 12.975
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 3.0,
3
- "total_flos": 2.377533515518771e+16,
4
- "train_loss": 0.9083002715438119,
5
- "train_runtime": 52770.4686,
6
- "train_samples": 30117,
7
- "train_samples_per_second": 1.712,
8
- "train_steps_per_second": 0.054
9
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "total_flos": 4.52871045169152e+16,
4
+ "train_loss": 0.9906343055923632,
5
+ "train_runtime": 639.5262,
6
+ "train_samples": 34420,
7
+ "train_samples_per_second": 269.105,
8
+ "train_steps_per_second": 4.206
9
  }
trainer_state.json CHANGED
@@ -1,75 +1,107 @@
1
  {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
- "eval_steps": 300.0,
6
- "global_step": 2826,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 1.0,
13
- "grad_norm": 15.699420928955078,
14
- "learning_rate": 1.6927223719676552e-05,
15
- "loss": 1.5106,
16
- "step": 942
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_f1": 0.7668134917297256,
21
- "eval_loss": 0.8142930269241333,
22
- "eval_runtime": 1955.7939,
23
- "eval_samples_per_second": 6.6,
24
- "eval_steps_per_second": 0.207,
25
- "step": 942
26
  },
27
  {
28
- "epoch": 2.0,
29
- "grad_norm": 4.08961820602417,
30
- "learning_rate": 8.463611859838276e-06,
31
- "loss": 0.7033,
32
- "step": 1884
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_f1": 0.7984619452840845,
37
- "eval_loss": 0.6980345845222473,
38
- "eval_runtime": 1952.7483,
39
- "eval_samples_per_second": 6.61,
40
- "eval_steps_per_second": 0.207,
41
- "step": 1884
42
  },
43
  {
44
- "epoch": 3.0,
45
- "grad_norm": 25.8586483001709,
46
- "learning_rate": 0.0,
47
- "loss": 0.511,
48
- "step": 2826
49
  },
50
  {
51
  "epoch": 3.0,
52
- "eval_f1": 0.8045510528812365,
53
- "eval_loss": 0.6810711622238159,
54
- "eval_runtime": 1950.0401,
55
- "eval_samples_per_second": 6.619,
56
- "eval_steps_per_second": 0.207,
57
- "step": 2826
58
  },
59
  {
60
- "epoch": 3.0,
61
- "step": 2826,
62
- "total_flos": 2.377533515518771e+16,
63
- "train_loss": 0.9083002715438119,
64
- "train_runtime": 52770.4686,
65
- "train_samples_per_second": 1.712,
66
- "train_steps_per_second": 0.054
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  }
68
  ],
69
  "logging_steps": 500,
70
- "max_steps": 2826,
71
  "num_input_tokens_seen": 0,
72
- "num_train_epochs": 3,
73
  "save_steps": 500,
74
  "stateful_callbacks": {
75
  "TrainerControl": {
@@ -83,8 +115,8 @@
83
  "attributes": {}
84
  }
85
  },
86
- "total_flos": 2.377533515518771e+16,
87
- "train_batch_size": 32,
88
  "trial_name": null,
89
  "trial_params": null
90
  }
 
1
  {
2
+ "best_metric": 0.7055376768112183,
3
+ "best_model_checkpoint": "./bert-sdg-classification/checkpoint-2690",
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2690,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.929368029739777,
13
+ "grad_norm": 6.427302360534668,
14
+ "learning_rate": 8.3e-06,
15
+ "loss": 2.2299,
16
+ "step": 500
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_f1": 0.711832686368243,
21
+ "eval_loss": 1.0520464181900024,
22
+ "eval_runtime": 10.5177,
23
+ "eval_samples_per_second": 818.142,
24
+ "eval_steps_per_second": 12.835,
25
+ "step": 538
26
  },
27
  {
28
+ "epoch": 1.858736059479554,
29
+ "grad_norm": 4.113575458526611,
30
+ "learning_rate": 8.095693779904307e-06,
31
+ "loss": 0.9383,
32
+ "step": 1000
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_f1": 0.7794402559050377,
37
+ "eval_loss": 0.7799968719482422,
38
+ "eval_runtime": 10.5039,
39
+ "eval_samples_per_second": 819.222,
40
+ "eval_steps_per_second": 12.852,
41
+ "step": 1076
42
  },
43
  {
44
+ "epoch": 2.7881040892193307,
45
+ "grad_norm": 6.160844802856445,
46
+ "learning_rate": 5.70334928229665e-06,
47
+ "loss": 0.7379,
48
+ "step": 1500
49
  },
50
  {
51
  "epoch": 3.0,
52
+ "eval_f1": 0.794722683984857,
53
+ "eval_loss": 0.7253227233886719,
54
+ "eval_runtime": 10.4924,
55
+ "eval_samples_per_second": 820.119,
56
+ "eval_steps_per_second": 12.866,
57
+ "step": 1614
58
  },
59
  {
60
+ "epoch": 3.717472118959108,
61
+ "grad_norm": 6.640861511230469,
62
+ "learning_rate": 3.3110047846889954e-06,
63
+ "loss": 0.6362,
64
+ "step": 2000
65
+ },
66
+ {
67
+ "epoch": 4.0,
68
+ "eval_f1": 0.7964522651875893,
69
+ "eval_loss": 0.7107406854629517,
70
+ "eval_runtime": 10.5173,
71
+ "eval_samples_per_second": 818.179,
72
+ "eval_steps_per_second": 12.836,
73
+ "step": 2152
74
+ },
75
+ {
76
+ "epoch": 4.646840148698884,
77
+ "grad_norm": 5.86486291885376,
78
+ "learning_rate": 9.186602870813398e-07,
79
+ "loss": 0.5779,
80
+ "step": 2500
81
+ },
82
+ {
83
+ "epoch": 5.0,
84
+ "eval_f1": 0.7979962127908091,
85
+ "eval_loss": 0.7055376768112183,
86
+ "eval_runtime": 10.5314,
87
+ "eval_samples_per_second": 817.079,
88
+ "eval_steps_per_second": 12.819,
89
+ "step": 2690
90
+ },
91
+ {
92
+ "epoch": 5.0,
93
+ "step": 2690,
94
+ "total_flos": 4.52871045169152e+16,
95
+ "train_loss": 0.9906343055923632,
96
+ "train_runtime": 639.5262,
97
+ "train_samples_per_second": 269.105,
98
+ "train_steps_per_second": 4.206
99
  }
100
  ],
101
  "logging_steps": 500,
102
+ "max_steps": 2690,
103
  "num_input_tokens_seen": 0,
104
+ "num_train_epochs": 5,
105
  "save_steps": 500,
106
  "stateful_callbacks": {
107
  "TrainerControl": {
 
115
  "attributes": {}
116
  }
117
  },
118
+ "total_flos": 4.52871045169152e+16,
119
+ "train_batch_size": 64,
120
  "trial_name": null,
121
  "trial_params": null
122
  }