Training in progress, step 87200

Browse files

Files changed (7) hide show

adapter_model.safetensors +1 -1
last-checkpoint/adapter_config.json +5 -5
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +3 -59

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:07df0acbf4b216f18a99849dcac2678b65a361a83ed5b92ae43775c5a8692726
 size 2684416208

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2266f5450f07ca58be26969588d2309a083856c6f1fbfcfef2944823461d4b8
 size 2684416208

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "up_proj",
-    "q_proj",
-    "o_proj",
     "down_proj",
-    "k_proj",
     "gate_proj",
-    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "down_proj",
+    "q_proj",
+    "up_proj",
     "gate_proj",
+    "v_proj",
+    "k_proj",
+    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:07df0acbf4b216f18a99849dcac2678b65a361a83ed5b92ae43775c5a8692726
 size 2684416208

 version https://git-lfs.github.com/spec/v1
+oid sha256:c827c6acb286eef9eb5d9fab2316b7545ab03a9b49ef673a99c9760af01f486c
 size 2684416208

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd04d428ba931b4786b903d4e7211dddf0ca3b8451539b3bb709c84d413332d7
 size 1364844242

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed24af72109d31f584e84102cf570b3ecb488d7c1a351bcbcfffde2791f83406
 size 1364844242

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fcb9c617bf418ff95f76a670463e6c8bf9bd7f093d6c516b2c7d33144a185253
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3f17c9ef1f7995726c517252ce76596fff06bfd0bd04d841db28af93fbf681c2
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ff131d8ecf56f15770f427617b305e326f9e9ad6f84803505e3515ea7f9525ea
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a35fbe204f24b2fb43e35237525d951bf4c389930c0542629031c4bddc16ea54
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.12101384565309921,
   "eval_steps": 200,
-  "global_step": 87000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2380,62 +2380,6 @@
       "learning_rate": 1.9311780891052998e-05,
       "loss": 1.6567,
       "step": 85400
-    },
-    {
-      "epoch": 0.1190664964127045,
-      "grad_norm": 0.23755620419979095,
-      "learning_rate": 1.930859113373952e-05,
-      "loss": 1.7054,
-      "step": 85600
-    },
-    {
-      "epoch": 0.11934468916133233,
-      "grad_norm": 0.29518914222717285,
-      "learning_rate": 1.9305394266234104e-05,
-      "loss": 1.6406,
-      "step": 85800
-    },
-    {
-      "epoch": 0.11962288190996014,
-      "grad_norm": 0.5197004675865173,
-      "learning_rate": 1.9302190290978622e-05,
-      "loss": 1.6807,
-      "step": 86000
-    },
-    {
-      "epoch": 0.11990107465858794,
-      "grad_norm": 0.2740679979324341,
-      "learning_rate": 1.929897921042036e-05,
-      "loss": 1.6977,
-      "step": 86200
-    },
-    {
-      "epoch": 0.12017926740721577,
-      "grad_norm": 0.33021771907806396,
-      "learning_rate": 1.9295761027012046e-05,
-      "loss": 1.6943,
-      "step": 86400
-    },
-    {
-      "epoch": 0.12045746015584358,
-      "grad_norm": 0.32778891921043396,
-      "learning_rate": 1.929253574321183e-05,
-      "loss": 1.6941,
-      "step": 86600
-    },
-    {
-      "epoch": 0.12073565290447139,
-      "grad_norm": 0.3531610369682312,
-      "learning_rate": 1.9289303361483284e-05,
-      "loss": 1.7031,
-      "step": 86800
-    },
-    {
-      "epoch": 0.12101384565309921,
-      "grad_norm": 0.4716193377971649,
-      "learning_rate": 1.9286063884295397e-05,
-      "loss": 1.668,
-      "step": 87000
     }
   ],
   "logging_steps": 200,
@@ -2455,7 +2399,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.7217876571997307e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.1187883036640767,
   "eval_steps": 200,
+  "global_step": 85400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.9311780891052998e-05,
       "loss": 1.6567,
       "step": 85400
     }
   ],
   "logging_steps": 200,
       "attributes": {}
     }
   },
+  "total_flos": 1.663223037366141e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null