leixa commited on
Commit
773b11b
·
verified ·
1 Parent(s): 51dfeeb

Training in progress, step 204, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:952d1a882b9cca3451ed6a97b532359cc908cc7e228144834369739e7d673517
3
  size 132164608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c931d5dcab4934d664c86b7b9f1cc7cd35a706b9856206ace9d697eb010c61d
3
  size 132164608
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebd9b3809508d84f0b43948535f48349685b3070faa9b0780a120c15d7f6a8d6
3
  size 67487892
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29b3f55f7dbe9de2ff2648d336ff782d60d2eaf477c10452e4200763918493d8
3
  size 67487892
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2009f4880cf0cac08ca9532dea6b28d8de9f65ae7c47b80222df55d29c632810
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6936f48172630f3b195d633d05bbdd084bbd64378cb9f0296e98ae7438be100
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f21ce5519aba36efeb75a8dad39ab6bd85bd42d0ae24cbc1f5cfa5d96741b8bc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f9839d107756d9c8815de9164f2ebf92c05b3536704a349ca5892084df7663e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.09447068630175048,
5
  "eval_steps": 34,
6
- "global_step": 170,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -447,6 +447,98 @@
447
  "eval_samples_per_second": 7.825,
448
  "eval_steps_per_second": 0.978,
449
  "step": 170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
  }
451
  ],
452
  "logging_steps": 3,
@@ -466,7 +558,7 @@
466
  "attributes": {}
467
  }
468
  },
469
- "total_flos": 3.37712210540544e+17,
470
  "train_batch_size": 8,
471
  "trial_name": null,
472
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.11336482356210058,
5
  "eval_steps": 34,
6
+ "global_step": 204,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
447
  "eval_samples_per_second": 7.825,
448
  "eval_steps_per_second": 0.978,
449
  "step": 170
450
+ },
451
+ {
452
+ "epoch": 0.09502639622117255,
453
+ "grad_norm": 1.5192731618881226,
454
+ "learning_rate": 3.17617799075421e-05,
455
+ "loss": 4.9727,
456
+ "step": 171
457
+ },
458
+ {
459
+ "epoch": 0.09669352597943873,
460
+ "grad_norm": 2.1325037479400635,
461
+ "learning_rate": 3.1178227669141744e-05,
462
+ "loss": 5.287,
463
+ "step": 174
464
+ },
465
+ {
466
+ "epoch": 0.09836065573770492,
467
+ "grad_norm": 1.6394548416137695,
468
+ "learning_rate": 3.0591067519763895e-05,
469
+ "loss": 5.0878,
470
+ "step": 177
471
+ },
472
+ {
473
+ "epoch": 0.1000277854959711,
474
+ "grad_norm": 1.954785704612732,
475
+ "learning_rate": 3.0000642344401113e-05,
476
+ "loss": 5.7474,
477
+ "step": 180
478
+ },
479
+ {
480
+ "epoch": 0.1016949152542373,
481
+ "grad_norm": 1.7333064079284668,
482
+ "learning_rate": 2.9407296934729227e-05,
483
+ "loss": 5.2069,
484
+ "step": 183
485
+ },
486
+ {
487
+ "epoch": 0.10336204501250347,
488
+ "grad_norm": 1.7775465250015259,
489
+ "learning_rate": 2.8811377787758636e-05,
490
+ "loss": 4.8365,
491
+ "step": 186
492
+ },
493
+ {
494
+ "epoch": 0.10502917477076966,
495
+ "grad_norm": 1.766340970993042,
496
+ "learning_rate": 2.8213232903489865e-05,
497
+ "loss": 4.8806,
498
+ "step": 189
499
+ },
500
+ {
501
+ "epoch": 0.10669630452903585,
502
+ "grad_norm": 2.064275026321411,
503
+ "learning_rate": 2.761321158169134e-05,
504
+ "loss": 5.1876,
505
+ "step": 192
506
+ },
507
+ {
508
+ "epoch": 0.10836343428730202,
509
+ "grad_norm": 1.731985330581665,
510
+ "learning_rate": 2.7011664217918154e-05,
511
+ "loss": 4.6924,
512
+ "step": 195
513
+ },
514
+ {
515
+ "epoch": 0.11003056404556821,
516
+ "grad_norm": 1.8852187395095825,
517
+ "learning_rate": 2.6408942098890936e-05,
518
+ "loss": 5.0911,
519
+ "step": 198
520
+ },
521
+ {
522
+ "epoch": 0.1116976938038344,
523
+ "grad_norm": 1.8446505069732666,
524
+ "learning_rate": 2.580539719735433e-05,
525
+ "loss": 5.0379,
526
+ "step": 201
527
+ },
528
+ {
529
+ "epoch": 0.11336482356210058,
530
+ "grad_norm": 1.863871455192566,
531
+ "learning_rate": 2.5201381966534748e-05,
532
+ "loss": 5.3173,
533
+ "step": 204
534
+ },
535
+ {
536
+ "epoch": 0.11336482356210058,
537
+ "eval_loss": 1.3148518800735474,
538
+ "eval_runtime": 387.4809,
539
+ "eval_samples_per_second": 7.822,
540
+ "eval_steps_per_second": 0.978,
541
+ "step": 204
542
  }
543
  ],
544
  "logging_steps": 3,
 
558
  "attributes": {}
559
  }
560
  },
561
+ "total_flos": 4.07623549249536e+17,
562
  "train_batch_size": 8,
563
  "trial_name": null,
564
  "trial_params": null