MohamedAhmedAE commited on
Commit
8b39bc1
·
verified ·
1 Parent(s): 97ebc0e

Training in progress, step 87200

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07df0acbf4b216f18a99849dcac2678b65a361a83ed5b92ae43775c5a8692726
3
  size 2684416208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2266f5450f07ca58be26969588d2309a083856c6f1fbfcfef2944823461d4b8
3
  size 2684416208
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "up_proj",
24
- "q_proj",
25
- "o_proj",
26
  "down_proj",
27
- "k_proj",
 
28
  "gate_proj",
29
- "v_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
23
  "down_proj",
24
+ "q_proj",
25
+ "up_proj",
26
  "gate_proj",
27
+ "v_proj",
28
+ "k_proj",
29
+ "o_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07df0acbf4b216f18a99849dcac2678b65a361a83ed5b92ae43775c5a8692726
3
  size 2684416208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c827c6acb286eef9eb5d9fab2316b7545ab03a9b49ef673a99c9760af01f486c
3
  size 2684416208
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd04d428ba931b4786b903d4e7211dddf0ca3b8451539b3bb709c84d413332d7
3
  size 1364844242
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed24af72109d31f584e84102cf570b3ecb488d7c1a351bcbcfffde2791f83406
3
  size 1364844242
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcb9c617bf418ff95f76a670463e6c8bf9bd7f093d6c516b2c7d33144a185253
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f17c9ef1f7995726c517252ce76596fff06bfd0bd04d841db28af93fbf681c2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff131d8ecf56f15770f427617b305e326f9e9ad6f84803505e3515ea7f9525ea
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a35fbe204f24b2fb43e35237525d951bf4c389930c0542629031c4bddc16ea54
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.12101384565309921,
5
  "eval_steps": 200,
6
- "global_step": 87000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2380,62 +2380,6 @@
2380
  "learning_rate": 1.9311780891052998e-05,
2381
  "loss": 1.6567,
2382
  "step": 85400
2383
- },
2384
- {
2385
- "epoch": 0.1190664964127045,
2386
- "grad_norm": 0.23755620419979095,
2387
- "learning_rate": 1.930859113373952e-05,
2388
- "loss": 1.7054,
2389
- "step": 85600
2390
- },
2391
- {
2392
- "epoch": 0.11934468916133233,
2393
- "grad_norm": 0.29518914222717285,
2394
- "learning_rate": 1.9305394266234104e-05,
2395
- "loss": 1.6406,
2396
- "step": 85800
2397
- },
2398
- {
2399
- "epoch": 0.11962288190996014,
2400
- "grad_norm": 0.5197004675865173,
2401
- "learning_rate": 1.9302190290978622e-05,
2402
- "loss": 1.6807,
2403
- "step": 86000
2404
- },
2405
- {
2406
- "epoch": 0.11990107465858794,
2407
- "grad_norm": 0.2740679979324341,
2408
- "learning_rate": 1.929897921042036e-05,
2409
- "loss": 1.6977,
2410
- "step": 86200
2411
- },
2412
- {
2413
- "epoch": 0.12017926740721577,
2414
- "grad_norm": 0.33021771907806396,
2415
- "learning_rate": 1.9295761027012046e-05,
2416
- "loss": 1.6943,
2417
- "step": 86400
2418
- },
2419
- {
2420
- "epoch": 0.12045746015584358,
2421
- "grad_norm": 0.32778891921043396,
2422
- "learning_rate": 1.929253574321183e-05,
2423
- "loss": 1.6941,
2424
- "step": 86600
2425
- },
2426
- {
2427
- "epoch": 0.12073565290447139,
2428
- "grad_norm": 0.3531610369682312,
2429
- "learning_rate": 1.9289303361483284e-05,
2430
- "loss": 1.7031,
2431
- "step": 86800
2432
- },
2433
- {
2434
- "epoch": 0.12101384565309921,
2435
- "grad_norm": 0.4716193377971649,
2436
- "learning_rate": 1.9286063884295397e-05,
2437
- "loss": 1.668,
2438
- "step": 87000
2439
  }
2440
  ],
2441
  "logging_steps": 200,
@@ -2455,7 +2399,7 @@
2455
  "attributes": {}
2456
  }
2457
  },
2458
- "total_flos": 1.7217876571997307e+18,
2459
  "train_batch_size": 2,
2460
  "trial_name": null,
2461
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.1187883036640767,
5
  "eval_steps": 200,
6
+ "global_step": 85400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2380
  "learning_rate": 1.9311780891052998e-05,
2381
  "loss": 1.6567,
2382
  "step": 85400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2383
  }
2384
  ],
2385
  "logging_steps": 200,
 
2399
  "attributes": {}
2400
  }
2401
  },
2402
+ "total_flos": 1.663223037366141e+18,
2403
  "train_batch_size": 2,
2404
  "trial_name": null,
2405
  "trial_params": null