rakhman-llm commited on
Commit
e193196
·
verified ·
1 Parent(s): 9c93001

Training in progress, step 16000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e4c46a908162b00f3ba9486bea20fb84fb3214d554c60ceeedafb0b57bf2240
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75a86cd786a5d3a1e1f2adcb3f9a3150ccda047965e61deb6383da469625f4c5
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99c44c9279a3a25aea078d1e884539b925aea0259a9d1b9cea8a21f053a5066d
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d52a7d049b044518630bdac24825b9c5aace7c2129b9296a1f4f71f920000ff
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13d5ccf4326b5409b6c9f169af8a58a6579e0381579d71b37aaa359b3cba5d5e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c907d42c9636b923c5bcc4fd78594e76582ee7e7c3af9d3ccfeae6b74654c31c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bd47cad1117d63c0c537ebb025d165a0cc6ebd76cda442e82a66a6ac283ef01
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cf6ca81cadd867bd2c8ff23d5105fee09afef52498d5c300dfd5de226b319d5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0333333333333334,
5
  "eval_steps": 500,
6
- "global_step": 15500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1100,6 +1100,41 @@
1100
  "learning_rate": 1.311288888888889e-05,
1101
  "loss": 0.0703,
1102
  "step": 15500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1103
  }
1104
  ],
1105
  "logging_steps": 100,
@@ -1119,7 +1154,7 @@
1119
  "attributes": {}
1120
  }
1121
  },
1122
- "total_flos": 3.775538921472e+16,
1123
  "train_batch_size": 4,
1124
  "trial_name": null,
1125
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0666666666666667,
5
  "eval_steps": 500,
6
+ "global_step": 16000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1100
  "learning_rate": 1.311288888888889e-05,
1101
  "loss": 0.0703,
1102
  "step": 15500
1103
+ },
1104
+ {
1105
+ "epoch": 1.04,
1106
+ "grad_norm": 0.2654290497303009,
1107
+ "learning_rate": 1.3068444444444446e-05,
1108
+ "loss": 0.0593,
1109
+ "step": 15600
1110
+ },
1111
+ {
1112
+ "epoch": 1.0466666666666666,
1113
+ "grad_norm": 0.2421567589044571,
1114
+ "learning_rate": 1.3024000000000001e-05,
1115
+ "loss": 0.0556,
1116
+ "step": 15700
1117
+ },
1118
+ {
1119
+ "epoch": 1.0533333333333332,
1120
+ "grad_norm": 0.1856231540441513,
1121
+ "learning_rate": 1.2979555555555556e-05,
1122
+ "loss": 0.0619,
1123
+ "step": 15800
1124
+ },
1125
+ {
1126
+ "epoch": 1.06,
1127
+ "grad_norm": 0.11161370575428009,
1128
+ "learning_rate": 1.2935111111111111e-05,
1129
+ "loss": 0.0597,
1130
+ "step": 15900
1131
+ },
1132
+ {
1133
+ "epoch": 1.0666666666666667,
1134
+ "grad_norm": 0.31690728664398193,
1135
+ "learning_rate": 1.2890666666666667e-05,
1136
+ "loss": 0.061,
1137
+ "step": 16000
1138
  }
1139
  ],
1140
  "logging_steps": 100,
 
1154
  "attributes": {}
1155
  }
1156
  },
1157
+ "total_flos": 3.897330499584e+16,
1158
  "train_batch_size": 4,
1159
  "trial_name": null,
1160
  "trial_params": null