romainnn commited on
Commit
3b5e2f6
·
verified ·
1 Parent(s): fa1ec37

Training in progress, step 632, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2b4cc1cef93d033abfc2f87d3b5b9c5dbacd72a5b0dc730c6659bff3c4ed2cb
3
  size 1001465824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:139b19928fa97b7c1693b19298b775f6c1f3de936d5316bec307c782a04054c1
3
  size 1001465824
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d3ed47ca30bb1233df822d49b40e5d03f9ce09e404501ed6c579fd67b6e7255
3
  size 509177556
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12adce1c2d0ed7d9b98485bf23a9ec1cfa71c5d5145fef3624db221bf4212989
3
  size 509177556
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40a9574e57f7a253b64d50c5f5629f324b4a1a5ac841f256ec2dca1bec61ee62
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f945a76a27af6da77da26bea2b0d33efbecc3a8bfb8cdd31c6f06af07cbed2fe
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1feb2c4e4f0ebf5517a21670a0fdcf3d94d1b879845629079677b365bcee24f4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a709aa6a7d0d804b893952095b83e581a0b61a8d7425622873aa0f43ed10f0a0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.073432445526123,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-600",
4
- "epoch": 0.11036512462061988,
5
  "eval_steps": 100,
6
- "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4263,6 +4263,230 @@
4263
  "eval_samples_per_second": 3.909,
4264
  "eval_steps_per_second": 0.977,
4265
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4266
  }
4267
  ],
4268
  "logging_steps": 1,
@@ -4286,12 +4510,12 @@
4286
  "should_evaluate": false,
4287
  "should_log": false,
4288
  "should_save": true,
4289
- "should_training_stop": false
4290
  },
4291
  "attributes": {}
4292
  }
4293
  },
4294
- "total_flos": 3.5600972702652826e+18,
4295
  "train_batch_size": 4,
4296
  "trial_name": null,
4297
  "trial_params": null
 
1
  {
2
  "best_metric": 1.073432445526123,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-600",
4
+ "epoch": 0.11625126460038628,
5
  "eval_steps": 100,
6
+ "global_step": 632,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4263
  "eval_samples_per_second": 3.909,
4264
  "eval_steps_per_second": 0.977,
4265
  "step": 600
4266
+ },
4267
+ {
4268
+ "epoch": 0.11054906649498758,
4269
+ "grad_norm": 1.0617674589157104,
4270
+ "learning_rate": 1.2232773595138415e-06,
4271
+ "loss": 7.3825,
4272
+ "step": 601
4273
+ },
4274
+ {
4275
+ "epoch": 0.11073300836935529,
4276
+ "grad_norm": 1.2446156740188599,
4277
+ "learning_rate": 1.145777792614189e-06,
4278
+ "loss": 8.8698,
4279
+ "step": 602
4280
+ },
4281
+ {
4282
+ "epoch": 0.11091695024372299,
4283
+ "grad_norm": 1.2019675970077515,
4284
+ "learning_rate": 1.070800041019504e-06,
4285
+ "loss": 8.9471,
4286
+ "step": 603
4287
+ },
4288
+ {
4289
+ "epoch": 0.11110089211809068,
4290
+ "grad_norm": 1.4310685396194458,
4291
+ "learning_rate": 9.98346017445706e-07,
4292
+ "loss": 8.777,
4293
+ "step": 604
4294
+ },
4295
+ {
4296
+ "epoch": 0.11128483399245838,
4297
+ "grad_norm": 1.1273508071899414,
4298
+ "learning_rate": 9.284175702272246e-07,
4299
+ "loss": 7.8448,
4300
+ "step": 605
4301
+ },
4302
+ {
4303
+ "epoch": 0.11146877586682609,
4304
+ "grad_norm": 1.1338694095611572,
4305
+ "learning_rate": 8.610164832699608e-07,
4306
+ "loss": 7.9259,
4307
+ "step": 606
4308
+ },
4309
+ {
4310
+ "epoch": 0.11165271774119379,
4311
+ "grad_norm": 1.441076397895813,
4312
+ "learning_rate": 7.961444760056891e-07,
4313
+ "loss": 9.1154,
4314
+ "step": 607
4315
+ },
4316
+ {
4317
+ "epoch": 0.11183665961556148,
4318
+ "grad_norm": 1.3927642107009888,
4319
+ "learning_rate": 7.338032033482711e-07,
4320
+ "loss": 7.2621,
4321
+ "step": 608
4322
+ },
4323
+ {
4324
+ "epoch": 0.11202060148992918,
4325
+ "grad_norm": 1.31500244140625,
4326
+ "learning_rate": 6.739942556513889e-07,
4327
+ "loss": 7.2625,
4328
+ "step": 609
4329
+ },
4330
+ {
4331
+ "epoch": 0.11220454336429689,
4332
+ "grad_norm": 1.1202362775802612,
4333
+ "learning_rate": 6.167191586679555e-07,
4334
+ "loss": 9.2775,
4335
+ "step": 610
4336
+ },
4337
+ {
4338
+ "epoch": 0.11238848523866458,
4339
+ "grad_norm": 1.0659925937652588,
4340
+ "learning_rate": 5.619793735112566e-07,
4341
+ "loss": 9.2408,
4342
+ "step": 611
4343
+ },
4344
+ {
4345
+ "epoch": 0.11257242711303228,
4346
+ "grad_norm": 1.259950041770935,
4347
+ "learning_rate": 5.097762966176256e-07,
4348
+ "loss": 7.6575,
4349
+ "step": 612
4350
+ },
4351
+ {
4352
+ "epoch": 0.11275636898739998,
4353
+ "grad_norm": 1.474528193473816,
4354
+ "learning_rate": 4.6011125971084924e-07,
4355
+ "loss": 8.3618,
4356
+ "step": 613
4357
+ },
4358
+ {
4359
+ "epoch": 0.11294031086176769,
4360
+ "grad_norm": 1.1022167205810547,
4361
+ "learning_rate": 4.129855297681617e-07,
4362
+ "loss": 7.7316,
4363
+ "step": 614
4364
+ },
4365
+ {
4366
+ "epoch": 0.11312425273613538,
4367
+ "grad_norm": 1.2318350076675415,
4368
+ "learning_rate": 3.684003089879484e-07,
4369
+ "loss": 9.4046,
4370
+ "step": 615
4371
+ },
4372
+ {
4373
+ "epoch": 0.11330819461050308,
4374
+ "grad_norm": 1.2324371337890625,
4375
+ "learning_rate": 3.2635673475910344e-07,
4376
+ "loss": 8.4448,
4377
+ "step": 616
4378
+ },
4379
+ {
4380
+ "epoch": 0.11349213648487078,
4381
+ "grad_norm": 1.224507212638855,
4382
+ "learning_rate": 2.8685587963194206e-07,
4383
+ "loss": 10.1946,
4384
+ "step": 617
4385
+ },
4386
+ {
4387
+ "epoch": 0.11367607835923849,
4388
+ "grad_norm": 1.0900365114212036,
4389
+ "learning_rate": 2.4989875129091125e-07,
4390
+ "loss": 7.4567,
4391
+ "step": 618
4392
+ },
4393
+ {
4394
+ "epoch": 0.11386002023360618,
4395
+ "grad_norm": 1.2144701480865479,
4396
+ "learning_rate": 2.1548629252883256e-07,
4397
+ "loss": 9.9855,
4398
+ "step": 619
4399
+ },
4400
+ {
4401
+ "epoch": 0.11404396210797388,
4402
+ "grad_norm": 1.4588361978530884,
4403
+ "learning_rate": 1.8361938122287703e-07,
4404
+ "loss": 9.7914,
4405
+ "step": 620
4406
+ },
4407
+ {
4408
+ "epoch": 0.11422790398234157,
4409
+ "grad_norm": 1.5850588083267212,
4410
+ "learning_rate": 1.5429883031217173e-07,
4411
+ "loss": 9.6143,
4412
+ "step": 621
4413
+ },
4414
+ {
4415
+ "epoch": 0.11441184585670928,
4416
+ "grad_norm": 1.0361146926879883,
4417
+ "learning_rate": 1.2752538777704992e-07,
4418
+ "loss": 6.8237,
4419
+ "step": 622
4420
+ },
4421
+ {
4422
+ "epoch": 0.11459578773107698,
4423
+ "grad_norm": 1.4321608543395996,
4424
+ "learning_rate": 1.0329973661996617e-07,
4425
+ "loss": 9.9172,
4426
+ "step": 623
4427
+ },
4428
+ {
4429
+ "epoch": 0.11477972960544468,
4430
+ "grad_norm": 1.2243539094924927,
4431
+ "learning_rate": 8.162249484809925e-08,
4432
+ "loss": 10.1801,
4433
+ "step": 624
4434
+ },
4435
+ {
4436
+ "epoch": 0.11496367147981237,
4437
+ "grad_norm": 0.9993793368339539,
4438
+ "learning_rate": 6.249421545755364e-08,
4439
+ "loss": 10.4931,
4440
+ "step": 625
4441
+ },
4442
+ {
4443
+ "epoch": 0.11514761335418008,
4444
+ "grad_norm": 1.1969102621078491,
4445
+ "learning_rate": 4.5915386419270736e-08,
4446
+ "loss": 9.4625,
4447
+ "step": 626
4448
+ },
4449
+ {
4450
+ "epoch": 0.11533155522854778,
4451
+ "grad_norm": 1.2495107650756836,
4452
+ "learning_rate": 3.188643066656116e-08,
4453
+ "loss": 8.6992,
4454
+ "step": 627
4455
+ },
4456
+ {
4457
+ "epoch": 0.11551549710291548,
4458
+ "grad_norm": 0.9973385334014893,
4459
+ "learning_rate": 2.0407706084368815e-08,
4460
+ "loss": 8.9503,
4461
+ "step": 628
4462
+ },
4463
+ {
4464
+ "epoch": 0.11569943897728317,
4465
+ "grad_norm": 1.3621132373809814,
4466
+ "learning_rate": 1.1479505500044951e-08,
4467
+ "loss": 8.2804,
4468
+ "step": 629
4469
+ },
4470
+ {
4471
+ "epoch": 0.11588338085165088,
4472
+ "grad_norm": 1.1957571506500244,
4473
+ "learning_rate": 5.102056675998501e-09,
4474
+ "loss": 9.484,
4475
+ "step": 630
4476
+ },
4477
+ {
4478
+ "epoch": 0.11606732272601858,
4479
+ "grad_norm": 1.095003604888916,
4480
+ "learning_rate": 1.2755223037896892e-09,
4481
+ "loss": 8.6529,
4482
+ "step": 631
4483
+ },
4484
+ {
4485
+ "epoch": 0.11625126460038628,
4486
+ "grad_norm": 1.1216403245925903,
4487
+ "learning_rate": 0.0,
4488
+ "loss": 10.7205,
4489
+ "step": 632
4490
  }
4491
  ],
4492
  "logging_steps": 1,
 
4510
  "should_evaluate": false,
4511
  "should_log": false,
4512
  "should_save": true,
4513
+ "should_training_stop": true
4514
  },
4515
  "attributes": {}
4516
  }
4517
  },
4518
+ "total_flos": 3.7507247651998925e+18,
4519
  "train_batch_size": 4,
4520
  "trial_name": null,
4521
  "trial_params": null