Training in progress, step 85400

Browse files

Files changed (7) hide show

adapter_model.safetensors +1 -1
last-checkpoint/adapter_config.json +5 -5
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +3 -59

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b6804e9f66223b09bb658d657fc6e13aa50694672ebb634d1c800766bc2fa7b9
 size 2684416208

 version https://git-lfs.github.com/spec/v1
+oid sha256:c827c6acb286eef9eb5d9fab2316b7545ab03a9b49ef673a99c9760af01f486c
 size 2684416208

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "down_proj",
-    "q_proj",
-    "up_proj",
     "gate_proj",
     "v_proj",
-    "k_proj",
-    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "k_proj",
     "gate_proj",
+    "o_proj",
+    "down_proj",
     "v_proj",
+    "q_proj",
+    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b6804e9f66223b09bb658d657fc6e13aa50694672ebb634d1c800766bc2fa7b9
 size 2684416208

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c35a93f0fbc67dcff98e79a028ed18bce041ca6c504534cfeba1ffe113541a6
 size 2684416208

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:356a70458aa868105f82ab5806ad493a4b7a2b1a5f4f312d5fad9f6e2d84bd5f
 size 1364844242

 version https://git-lfs.github.com/spec/v1
+oid sha256:630b4e45f445964eef38461a3e815468bcb4d79c516277156cdd1f608dc6fd5f
 size 1364844242

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:146268fced5a4a83c4515dbc3b480e5b723bda9119bcafea221f6ab6c5493a08
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:9729d7d0dcfc381ff20b9d9582e8dcd5a65ff6a31a9fa0a4ab53b8e8735d6817
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f11268aacf97b04105141009394a27dfe91653458e6c3f073d55a42d01c2ddb3
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:998cab517918ddd7b7621f6bc4d2103805ef631de606bc987cf4431529e437c9
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.11851011091544887,
   "eval_steps": 200,
-  "global_step": 85200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2317,62 +2317,6 @@
       "learning_rate": 1.9340168348018822e-05,
       "loss": 1.7144,
       "step": 83600
-    },
-    {
-      "epoch": 0.11656276167505418,
-      "grad_norm": 0.6080924272537231,
-      "learning_rate": 1.933704269142008e-05,
-      "loss": 1.6828,
-      "step": 83800
-    },
-    {
-      "epoch": 0.11684095442368199,
-      "grad_norm": 0.3429834246635437,
-      "learning_rate": 1.9333909902897212e-05,
-      "loss": 1.7374,
-      "step": 84000
-    },
-    {
-      "epoch": 0.11711914717230981,
-      "grad_norm": 0.34908148646354675,
-      "learning_rate": 1.9330769984843144e-05,
-      "loss": 1.7273,
-      "step": 84200
-    },
-    {
-      "epoch": 0.11739733992093762,
-      "grad_norm": 0.47220101952552795,
-      "learning_rate": 1.932762293965624e-05,
-      "loss": 1.6758,
-      "step": 84400
-    },
-    {
-      "epoch": 0.11767553266956543,
-      "grad_norm": 0.5649632215499878,
-      "learning_rate": 1.9324468769740307e-05,
-      "loss": 1.6967,
-      "step": 84600
-    },
-    {
-      "epoch": 0.11795372541819325,
-      "grad_norm": 0.3771503269672394,
-      "learning_rate": 1.932130747750461e-05,
-      "loss": 1.7156,
-      "step": 84800
-    },
-    {
-      "epoch": 0.11823191816682106,
-      "grad_norm": 0.3423559367656708,
-      "learning_rate": 1.9318139065363826e-05,
-      "loss": 1.6854,
-      "step": 85000
-    },
-    {
-      "epoch": 0.11851011091544887,
-      "grad_norm": 0.4594859182834625,
-      "learning_rate": 1.93149635357381e-05,
-      "loss": 1.7195,
-      "step": 85200
     }
   ],
   "logging_steps": 200,
@@ -2392,7 +2336,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.6558717875730022e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.11628456892642636,
   "eval_steps": 200,
+  "global_step": 83600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.9340168348018822e-05,
       "loss": 1.7144,
       "step": 83600
     }
   ],
   "logging_steps": 200,
       "attributes": {}
     }
   },
+  "total_flos": 1.596842508550865e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null