Training in progress, step 85400, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_config.json +5 -5
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +66 -3

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "k_proj",
-    "gate_proj",
-    "o_proj",
     "down_proj",
-    "v_proj",
     "q_proj",
-    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "down_proj",
     "q_proj",
+    "up_proj",
+    "gate_proj",
+    "v_proj",
+    "k_proj",
+    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c35a93f0fbc67dcff98e79a028ed18bce041ca6c504534cfeba1ffe113541a6
 size 2684416208

 version https://git-lfs.github.com/spec/v1
+oid sha256:c827c6acb286eef9eb5d9fab2316b7545ab03a9b49ef673a99c9760af01f486c
 size 2684416208

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:630b4e45f445964eef38461a3e815468bcb4d79c516277156cdd1f608dc6fd5f
 size 1364844242

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed24af72109d31f584e84102cf570b3ecb488d7c1a351bcbcfffde2791f83406
 size 1364844242

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9729d7d0dcfc381ff20b9d9582e8dcd5a65ff6a31a9fa0a4ab53b8e8735d6817
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3f17c9ef1f7995726c517252ce76596fff06bfd0bd04d841db28af93fbf681c2
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:998cab517918ddd7b7621f6bc4d2103805ef631de606bc987cf4431529e437c9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a35fbe204f24b2fb43e35237525d951bf4c389930c0542629031c4bddc16ea54
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.11628456892642636,
   "eval_steps": 200,
-  "global_step": 83600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2317,6 +2317,69 @@
       "learning_rate": 1.9340168348018822e-05,
       "loss": 1.7144,
       "step": 83600
     }
   ],
   "logging_steps": 200,
@@ -2336,7 +2399,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.596842508550865e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.1187883036640767,
   "eval_steps": 200,
+  "global_step": 85400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.9340168348018822e-05,
       "loss": 1.7144,
       "step": 83600
+    },
+    {
+      "epoch": 0.11656276167505418,
+      "grad_norm": 0.6080924272537231,
+      "learning_rate": 1.933704269142008e-05,
+      "loss": 1.6828,
+      "step": 83800
+    },
+    {
+      "epoch": 0.11684095442368199,
+      "grad_norm": 0.3429834246635437,
+      "learning_rate": 1.9333909902897212e-05,
+      "loss": 1.7374,
+      "step": 84000
+    },
+    {
+      "epoch": 0.11711914717230981,
+      "grad_norm": 0.34908148646354675,
+      "learning_rate": 1.9330769984843144e-05,
+      "loss": 1.7273,
+      "step": 84200
+    },
+    {
+      "epoch": 0.11739733992093762,
+      "grad_norm": 0.47220101952552795,
+      "learning_rate": 1.932762293965624e-05,
+      "loss": 1.6758,
+      "step": 84400
+    },
+    {
+      "epoch": 0.11767553266956543,
+      "grad_norm": 0.5649632215499878,
+      "learning_rate": 1.9324468769740307e-05,
+      "loss": 1.6967,
+      "step": 84600
+    },
+    {
+      "epoch": 0.11795372541819325,
+      "grad_norm": 0.3771503269672394,
+      "learning_rate": 1.932130747750461e-05,
+      "loss": 1.7156,
+      "step": 84800
+    },
+    {
+      "epoch": 0.11823191816682106,
+      "grad_norm": 0.3423559367656708,
+      "learning_rate": 1.9318139065363826e-05,
+      "loss": 1.6854,
+      "step": 85000
+    },
+    {
+      "epoch": 0.11851011091544887,
+      "grad_norm": 0.4594859182834625,
+      "learning_rate": 1.93149635357381e-05,
+      "loss": 1.7195,
+      "step": 85200
+    },
+    {
+      "epoch": 0.1187883036640767,
+      "grad_norm": 0.29249799251556396,
+      "learning_rate": 1.9311780891052998e-05,
+      "loss": 1.6567,
+      "step": 85400
     }
   ],
   "logging_steps": 200,
       "attributes": {}
     }
   },
+  "total_flos": 1.663223037366141e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null