kengamd commited on
Commit
27332c3
·
1 Parent(s): 24e2f6b

End of training

Browse files
all_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_loss": 1.8968859910964966,
4
- "eval_runtime": 68.8078,
5
- "eval_samples_per_second": 234.261,
6
- "eval_steps_per_second": 3.662,
7
- "train_loss": 2.6023941732047637,
8
- "train_runtime": 2611.7543,
9
- "train_samples_per_second": 86.401,
10
- "train_steps_per_second": 1.351
11
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_loss": 1.654771089553833,
4
+ "eval_runtime": 151.8615,
5
+ "eval_samples_per_second": 234.003,
6
+ "eval_steps_per_second": 3.661,
7
+ "train_loss": 2.2696795797151794,
8
+ "train_runtime": 5651.4997,
9
+ "train_samples_per_second": 88.029,
10
+ "train_steps_per_second": 1.376
11
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_loss": 1.8968859910964966,
4
- "eval_runtime": 68.8078,
5
- "eval_samples_per_second": 234.261,
6
- "eval_steps_per_second": 3.662
7
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_loss": 1.654771089553833,
4
+ "eval_runtime": 151.8615,
5
+ "eval_samples_per_second": 234.003,
6
+ "eval_steps_per_second": 3.661
7
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 2.6023941732047637,
4
- "train_runtime": 2611.7543,
5
- "train_samples_per_second": 86.401,
6
- "train_steps_per_second": 1.351
7
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 2.2696795797151794,
4
+ "train_runtime": 5651.4997,
5
+ "train_samples_per_second": 88.029,
6
+ "train_steps_per_second": 1.376
7
  }
trainer_state.json CHANGED
@@ -2,66 +2,114 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
- "global_step": 3528,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.43,
12
- "learning_rate": 4.291383219954649e-05,
13
- "loss": 3.7621,
14
  "step": 500
15
  },
16
  {
17
- "epoch": 0.85,
18
- "learning_rate": 3.5827664399092974e-05,
19
- "loss": 3.2286,
20
  "step": 1000
21
  },
22
  {
23
- "epoch": 1.28,
24
- "learning_rate": 2.8741496598639456e-05,
25
- "loss": 2.7611,
26
  "step": 1500
27
  },
28
  {
29
- "epoch": 1.7,
30
- "learning_rate": 2.1655328798185942e-05,
31
- "loss": 2.497,
32
  "step": 2000
33
  },
34
  {
35
- "epoch": 2.13,
36
- "learning_rate": 1.4569160997732428e-05,
37
- "loss": 2.2218,
38
  "step": 2500
39
  },
40
  {
41
- "epoch": 2.55,
42
- "learning_rate": 7.482993197278912e-06,
43
- "loss": 1.9671,
44
  "step": 3000
45
  },
46
  {
47
- "epoch": 2.98,
48
- "learning_rate": 3.9682539682539683e-07,
49
- "loss": 1.8285,
50
  "step": 3500
51
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  {
53
  "epoch": 3.0,
54
- "step": 3528,
55
- "total_flos": 3.012910885296e+16,
56
- "train_loss": 2.6023941732047637,
57
- "train_runtime": 2611.7543,
58
- "train_samples_per_second": 86.401,
59
- "train_steps_per_second": 1.351
60
  }
61
  ],
62
- "max_steps": 3528,
63
  "num_train_epochs": 3,
64
- "total_flos": 3.012910885296e+16,
65
  "trial_name": null,
66
  "trial_params": null
67
  }
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
+ "global_step": 7776,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.19,
12
+ "learning_rate": 4.6784979423868314e-05,
13
+ "loss": 3.7829,
14
  "step": 500
15
  },
16
  {
17
+ "epoch": 0.39,
18
+ "learning_rate": 4.3569958847736625e-05,
19
+ "loss": 3.3678,
20
  "step": 1000
21
  },
22
  {
23
+ "epoch": 0.58,
24
+ "learning_rate": 4.035493827160494e-05,
25
+ "loss": 2.9585,
26
  "step": 1500
27
  },
28
  {
29
+ "epoch": 0.77,
30
+ "learning_rate": 3.7139917695473254e-05,
31
+ "loss": 2.6975,
32
  "step": 2000
33
  },
34
  {
35
+ "epoch": 0.96,
36
+ "learning_rate": 3.3924897119341565e-05,
37
+ "loss": 2.5295,
38
  "step": 2500
39
  },
40
  {
41
+ "epoch": 1.16,
42
+ "learning_rate": 3.0709876543209876e-05,
43
+ "loss": 2.3339,
44
  "step": 3000
45
  },
46
  {
47
+ "epoch": 1.35,
48
+ "learning_rate": 2.7494855967078194e-05,
49
+ "loss": 2.2302,
50
  "step": 3500
51
  },
52
+ {
53
+ "epoch": 1.54,
54
+ "learning_rate": 2.4279835390946505e-05,
55
+ "loss": 2.1406,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 1.74,
60
+ "learning_rate": 2.1064814814814816e-05,
61
+ "loss": 2.0496,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 1.93,
66
+ "learning_rate": 1.784979423868313e-05,
67
+ "loss": 1.979,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 2.12,
72
+ "learning_rate": 1.463477366255144e-05,
73
+ "loss": 1.8329,
74
+ "step": 5500
75
+ },
76
+ {
77
+ "epoch": 2.31,
78
+ "learning_rate": 1.1419753086419753e-05,
79
+ "loss": 1.7135,
80
+ "step": 6000
81
+ },
82
+ {
83
+ "epoch": 2.51,
84
+ "learning_rate": 8.204732510288066e-06,
85
+ "loss": 1.6597,
86
+ "step": 6500
87
+ },
88
+ {
89
+ "epoch": 2.7,
90
+ "learning_rate": 4.989711934156379e-06,
91
+ "loss": 1.615,
92
+ "step": 7000
93
+ },
94
+ {
95
+ "epoch": 2.89,
96
+ "learning_rate": 1.7746913580246913e-06,
97
+ "loss": 1.5626,
98
+ "step": 7500
99
+ },
100
  {
101
  "epoch": 3.0,
102
+ "step": 7776,
103
+ "total_flos": 6.642431273088e+16,
104
+ "train_loss": 2.2696795797151794,
105
+ "train_runtime": 5651.4997,
106
+ "train_samples_per_second": 88.029,
107
+ "train_steps_per_second": 1.376
108
  }
109
  ],
110
+ "max_steps": 7776,
111
  "num_train_epochs": 3,
112
+ "total_flos": 6.642431273088e+16,
113
  "trial_name": null,
114
  "trial_params": null
115
  }