Training in progress, step 632, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1001465824
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:139b19928fa97b7c1693b19298b775f6c1f3de936d5316bec307c782a04054c1
|
3 |
size 1001465824
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 509177556
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12adce1c2d0ed7d9b98485bf23a9ec1cfa71c5d5145fef3624db221bf4212989
|
3 |
size 509177556
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f945a76a27af6da77da26bea2b0d33efbecc3a8bfb8cdd31c6f06af07cbed2fe
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a709aa6a7d0d804b893952095b83e581a0b61a8d7425622873aa0f43ed10f0a0
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 1.073432445526123,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-600",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4263,6 +4263,230 @@
|
|
4263 |
"eval_samples_per_second": 3.909,
|
4264 |
"eval_steps_per_second": 0.977,
|
4265 |
"step": 600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4266 |
}
|
4267 |
],
|
4268 |
"logging_steps": 1,
|
@@ -4286,12 +4510,12 @@
|
|
4286 |
"should_evaluate": false,
|
4287 |
"should_log": false,
|
4288 |
"should_save": true,
|
4289 |
-
"should_training_stop":
|
4290 |
},
|
4291 |
"attributes": {}
|
4292 |
}
|
4293 |
},
|
4294 |
-
"total_flos": 3.
|
4295 |
"train_batch_size": 4,
|
4296 |
"trial_name": null,
|
4297 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 1.073432445526123,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-600",
|
4 |
+
"epoch": 0.11625126460038628,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 632,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4263 |
"eval_samples_per_second": 3.909,
|
4264 |
"eval_steps_per_second": 0.977,
|
4265 |
"step": 600
|
4266 |
+
},
|
4267 |
+
{
|
4268 |
+
"epoch": 0.11054906649498758,
|
4269 |
+
"grad_norm": 1.0617674589157104,
|
4270 |
+
"learning_rate": 1.2232773595138415e-06,
|
4271 |
+
"loss": 7.3825,
|
4272 |
+
"step": 601
|
4273 |
+
},
|
4274 |
+
{
|
4275 |
+
"epoch": 0.11073300836935529,
|
4276 |
+
"grad_norm": 1.2446156740188599,
|
4277 |
+
"learning_rate": 1.145777792614189e-06,
|
4278 |
+
"loss": 8.8698,
|
4279 |
+
"step": 602
|
4280 |
+
},
|
4281 |
+
{
|
4282 |
+
"epoch": 0.11091695024372299,
|
4283 |
+
"grad_norm": 1.2019675970077515,
|
4284 |
+
"learning_rate": 1.070800041019504e-06,
|
4285 |
+
"loss": 8.9471,
|
4286 |
+
"step": 603
|
4287 |
+
},
|
4288 |
+
{
|
4289 |
+
"epoch": 0.11110089211809068,
|
4290 |
+
"grad_norm": 1.4310685396194458,
|
4291 |
+
"learning_rate": 9.98346017445706e-07,
|
4292 |
+
"loss": 8.777,
|
4293 |
+
"step": 604
|
4294 |
+
},
|
4295 |
+
{
|
4296 |
+
"epoch": 0.11128483399245838,
|
4297 |
+
"grad_norm": 1.1273508071899414,
|
4298 |
+
"learning_rate": 9.284175702272246e-07,
|
4299 |
+
"loss": 7.8448,
|
4300 |
+
"step": 605
|
4301 |
+
},
|
4302 |
+
{
|
4303 |
+
"epoch": 0.11146877586682609,
|
4304 |
+
"grad_norm": 1.1338694095611572,
|
4305 |
+
"learning_rate": 8.610164832699608e-07,
|
4306 |
+
"loss": 7.9259,
|
4307 |
+
"step": 606
|
4308 |
+
},
|
4309 |
+
{
|
4310 |
+
"epoch": 0.11165271774119379,
|
4311 |
+
"grad_norm": 1.441076397895813,
|
4312 |
+
"learning_rate": 7.961444760056891e-07,
|
4313 |
+
"loss": 9.1154,
|
4314 |
+
"step": 607
|
4315 |
+
},
|
4316 |
+
{
|
4317 |
+
"epoch": 0.11183665961556148,
|
4318 |
+
"grad_norm": 1.3927642107009888,
|
4319 |
+
"learning_rate": 7.338032033482711e-07,
|
4320 |
+
"loss": 7.2621,
|
4321 |
+
"step": 608
|
4322 |
+
},
|
4323 |
+
{
|
4324 |
+
"epoch": 0.11202060148992918,
|
4325 |
+
"grad_norm": 1.31500244140625,
|
4326 |
+
"learning_rate": 6.739942556513889e-07,
|
4327 |
+
"loss": 7.2625,
|
4328 |
+
"step": 609
|
4329 |
+
},
|
4330 |
+
{
|
4331 |
+
"epoch": 0.11220454336429689,
|
4332 |
+
"grad_norm": 1.1202362775802612,
|
4333 |
+
"learning_rate": 6.167191586679555e-07,
|
4334 |
+
"loss": 9.2775,
|
4335 |
+
"step": 610
|
4336 |
+
},
|
4337 |
+
{
|
4338 |
+
"epoch": 0.11238848523866458,
|
4339 |
+
"grad_norm": 1.0659925937652588,
|
4340 |
+
"learning_rate": 5.619793735112566e-07,
|
4341 |
+
"loss": 9.2408,
|
4342 |
+
"step": 611
|
4343 |
+
},
|
4344 |
+
{
|
4345 |
+
"epoch": 0.11257242711303228,
|
4346 |
+
"grad_norm": 1.259950041770935,
|
4347 |
+
"learning_rate": 5.097762966176256e-07,
|
4348 |
+
"loss": 7.6575,
|
4349 |
+
"step": 612
|
4350 |
+
},
|
4351 |
+
{
|
4352 |
+
"epoch": 0.11275636898739998,
|
4353 |
+
"grad_norm": 1.474528193473816,
|
4354 |
+
"learning_rate": 4.6011125971084924e-07,
|
4355 |
+
"loss": 8.3618,
|
4356 |
+
"step": 613
|
4357 |
+
},
|
4358 |
+
{
|
4359 |
+
"epoch": 0.11294031086176769,
|
4360 |
+
"grad_norm": 1.1022167205810547,
|
4361 |
+
"learning_rate": 4.129855297681617e-07,
|
4362 |
+
"loss": 7.7316,
|
4363 |
+
"step": 614
|
4364 |
+
},
|
4365 |
+
{
|
4366 |
+
"epoch": 0.11312425273613538,
|
4367 |
+
"grad_norm": 1.2318350076675415,
|
4368 |
+
"learning_rate": 3.684003089879484e-07,
|
4369 |
+
"loss": 9.4046,
|
4370 |
+
"step": 615
|
4371 |
+
},
|
4372 |
+
{
|
4373 |
+
"epoch": 0.11330819461050308,
|
4374 |
+
"grad_norm": 1.2324371337890625,
|
4375 |
+
"learning_rate": 3.2635673475910344e-07,
|
4376 |
+
"loss": 8.4448,
|
4377 |
+
"step": 616
|
4378 |
+
},
|
4379 |
+
{
|
4380 |
+
"epoch": 0.11349213648487078,
|
4381 |
+
"grad_norm": 1.224507212638855,
|
4382 |
+
"learning_rate": 2.8685587963194206e-07,
|
4383 |
+
"loss": 10.1946,
|
4384 |
+
"step": 617
|
4385 |
+
},
|
4386 |
+
{
|
4387 |
+
"epoch": 0.11367607835923849,
|
4388 |
+
"grad_norm": 1.0900365114212036,
|
4389 |
+
"learning_rate": 2.4989875129091125e-07,
|
4390 |
+
"loss": 7.4567,
|
4391 |
+
"step": 618
|
4392 |
+
},
|
4393 |
+
{
|
4394 |
+
"epoch": 0.11386002023360618,
|
4395 |
+
"grad_norm": 1.2144701480865479,
|
4396 |
+
"learning_rate": 2.1548629252883256e-07,
|
4397 |
+
"loss": 9.9855,
|
4398 |
+
"step": 619
|
4399 |
+
},
|
4400 |
+
{
|
4401 |
+
"epoch": 0.11404396210797388,
|
4402 |
+
"grad_norm": 1.4588361978530884,
|
4403 |
+
"learning_rate": 1.8361938122287703e-07,
|
4404 |
+
"loss": 9.7914,
|
4405 |
+
"step": 620
|
4406 |
+
},
|
4407 |
+
{
|
4408 |
+
"epoch": 0.11422790398234157,
|
4409 |
+
"grad_norm": 1.5850588083267212,
|
4410 |
+
"learning_rate": 1.5429883031217173e-07,
|
4411 |
+
"loss": 9.6143,
|
4412 |
+
"step": 621
|
4413 |
+
},
|
4414 |
+
{
|
4415 |
+
"epoch": 0.11441184585670928,
|
4416 |
+
"grad_norm": 1.0361146926879883,
|
4417 |
+
"learning_rate": 1.2752538777704992e-07,
|
4418 |
+
"loss": 6.8237,
|
4419 |
+
"step": 622
|
4420 |
+
},
|
4421 |
+
{
|
4422 |
+
"epoch": 0.11459578773107698,
|
4423 |
+
"grad_norm": 1.4321608543395996,
|
4424 |
+
"learning_rate": 1.0329973661996617e-07,
|
4425 |
+
"loss": 9.9172,
|
4426 |
+
"step": 623
|
4427 |
+
},
|
4428 |
+
{
|
4429 |
+
"epoch": 0.11477972960544468,
|
4430 |
+
"grad_norm": 1.2243539094924927,
|
4431 |
+
"learning_rate": 8.162249484809925e-08,
|
4432 |
+
"loss": 10.1801,
|
4433 |
+
"step": 624
|
4434 |
+
},
|
4435 |
+
{
|
4436 |
+
"epoch": 0.11496367147981237,
|
4437 |
+
"grad_norm": 0.9993793368339539,
|
4438 |
+
"learning_rate": 6.249421545755364e-08,
|
4439 |
+
"loss": 10.4931,
|
4440 |
+
"step": 625
|
4441 |
+
},
|
4442 |
+
{
|
4443 |
+
"epoch": 0.11514761335418008,
|
4444 |
+
"grad_norm": 1.1969102621078491,
|
4445 |
+
"learning_rate": 4.5915386419270736e-08,
|
4446 |
+
"loss": 9.4625,
|
4447 |
+
"step": 626
|
4448 |
+
},
|
4449 |
+
{
|
4450 |
+
"epoch": 0.11533155522854778,
|
4451 |
+
"grad_norm": 1.2495107650756836,
|
4452 |
+
"learning_rate": 3.188643066656116e-08,
|
4453 |
+
"loss": 8.6992,
|
4454 |
+
"step": 627
|
4455 |
+
},
|
4456 |
+
{
|
4457 |
+
"epoch": 0.11551549710291548,
|
4458 |
+
"grad_norm": 0.9973385334014893,
|
4459 |
+
"learning_rate": 2.0407706084368815e-08,
|
4460 |
+
"loss": 8.9503,
|
4461 |
+
"step": 628
|
4462 |
+
},
|
4463 |
+
{
|
4464 |
+
"epoch": 0.11569943897728317,
|
4465 |
+
"grad_norm": 1.3621132373809814,
|
4466 |
+
"learning_rate": 1.1479505500044951e-08,
|
4467 |
+
"loss": 8.2804,
|
4468 |
+
"step": 629
|
4469 |
+
},
|
4470 |
+
{
|
4471 |
+
"epoch": 0.11588338085165088,
|
4472 |
+
"grad_norm": 1.1957571506500244,
|
4473 |
+
"learning_rate": 5.102056675998501e-09,
|
4474 |
+
"loss": 9.484,
|
4475 |
+
"step": 630
|
4476 |
+
},
|
4477 |
+
{
|
4478 |
+
"epoch": 0.11606732272601858,
|
4479 |
+
"grad_norm": 1.095003604888916,
|
4480 |
+
"learning_rate": 1.2755223037896892e-09,
|
4481 |
+
"loss": 8.6529,
|
4482 |
+
"step": 631
|
4483 |
+
},
|
4484 |
+
{
|
4485 |
+
"epoch": 0.11625126460038628,
|
4486 |
+
"grad_norm": 1.1216403245925903,
|
4487 |
+
"learning_rate": 0.0,
|
4488 |
+
"loss": 10.7205,
|
4489 |
+
"step": 632
|
4490 |
}
|
4491 |
],
|
4492 |
"logging_steps": 1,
|
|
|
4510 |
"should_evaluate": false,
|
4511 |
"should_log": false,
|
4512 |
"should_save": true,
|
4513 |
+
"should_training_stop": true
|
4514 |
},
|
4515 |
"attributes": {}
|
4516 |
}
|
4517 |
},
|
4518 |
+
"total_flos": 3.7507247651998925e+18,
|
4519 |
"train_batch_size": 4,
|
4520 |
"trial_name": null,
|
4521 |
"trial_params": null
|