masatochi commited on
Commit
8db5f47
·
verified ·
1 Parent(s): 81e9a2d

Training in progress, step 140, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:712500642cd5eacaf2d3d13424057784a9a6b504e88ecf383e62482119ab96b6
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff33b28108b3679d7be04706c6f792227844ae4246a7458e95f560f317bb2d0d
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc54186d6a7952617779177df5659ed1e0adab55bf50305de35608da22734c49
3
  size 43122580
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cef79a8dc424dcfafd9e0823028c49d788f31347b10bfd56f7b6b4e6bc1f8070
3
  size 43122580
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79354fda314c9103e2d55f6fd0e3e7ec5fe801812e33d2c8e4dd8c180772e09a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad528149b7ddaf36aa54c3270bc91600bfb527af6fb344cfc62207a9548bc407
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fc750a6dfb3e5c9f642238b7443b0984a56e79b7c2731a6e152ecfc3e32f4e7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:671828e69cd8fd42106344a5797cbc78f701a434a6386d9dfacd16451ba179aa
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.06601870529983496,
5
  "eval_steps": 34,
6
- "global_step": 135,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -984,6 +984,49 @@
984
  "learning_rate": 6.387583338128471e-05,
985
  "loss": 1.074,
986
  "step": 135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
987
  }
988
  ],
989
  "logging_steps": 1,
@@ -1003,7 +1046,7 @@
1003
  "attributes": {}
1004
  }
1005
  },
1006
- "total_flos": 5.992588163992781e+17,
1007
  "train_batch_size": 3,
1008
  "trial_name": null,
1009
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.06846384253316218,
5
  "eval_steps": 34,
6
+ "global_step": 140,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
984
  "learning_rate": 6.387583338128471e-05,
985
  "loss": 1.074,
986
  "step": 135
987
+ },
988
+ {
989
+ "epoch": 0.0665077327465004,
990
+ "grad_norm": 1.2866921424865723,
991
+ "learning_rate": 6.215889499576898e-05,
992
+ "loss": 1.1369,
993
+ "step": 136
994
+ },
995
+ {
996
+ "epoch": 0.0665077327465004,
997
+ "eval_loss": 1.008103847503662,
998
+ "eval_runtime": 1314.805,
999
+ "eval_samples_per_second": 1.965,
1000
+ "eval_steps_per_second": 0.655,
1001
+ "step": 136
1002
+ },
1003
+ {
1004
+ "epoch": 0.06699676019316585,
1005
+ "grad_norm": 1.012360692024231,
1006
+ "learning_rate": 6.0454879312945754e-05,
1007
+ "loss": 0.923,
1008
+ "step": 137
1009
+ },
1010
+ {
1011
+ "epoch": 0.06748578763983129,
1012
+ "grad_norm": 1.1338571310043335,
1013
+ "learning_rate": 5.876436825260967e-05,
1014
+ "loss": 0.8111,
1015
+ "step": 138
1016
+ },
1017
+ {
1018
+ "epoch": 0.06797481508649673,
1019
+ "grad_norm": 1.3135179281234741,
1020
+ "learning_rate": 5.708793912273911e-05,
1021
+ "loss": 1.0341,
1022
+ "step": 139
1023
+ },
1024
+ {
1025
+ "epoch": 0.06846384253316218,
1026
+ "grad_norm": 1.311312198638916,
1027
+ "learning_rate": 5.542616442234618e-05,
1028
+ "loss": 1.0275,
1029
+ "step": 140
1030
  }
1031
  ],
1032
  "logging_steps": 1,
 
1046
  "attributes": {}
1047
  }
1048
  },
1049
+ "total_flos": 6.214535873770291e+17,
1050
  "train_batch_size": 3,
1051
  "trial_name": null,
1052
  "trial_params": null