MohamedAhmedAE commited on
Commit
656c354
·
verified ·
1 Parent(s): f6e455d

Training in progress, step 240600

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f82faa289f66768e706b151dd1c8d787876e82e420c483f6f3a866380c150db
3
  size 1342238560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad2c04e4c9d9778549e502f8f4d5e5c7678fc1dcb6dbaa7898e81a74d789ffe6
3
  size 1342238560
last-checkpoint/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "down_proj",
27
- "q_proj",
28
  "k_proj",
29
- "up_proj",
30
  "gate_proj",
31
- "v_proj",
32
- "o_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "v_proj",
27
  "down_proj",
 
28
  "k_proj",
29
+ "o_proj",
30
  "gate_proj",
31
+ "up_proj",
32
+ "q_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f82faa289f66768e706b151dd1c8d787876e82e420c483f6f3a866380c150db
3
  size 1342238560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8e264d1dffd90f8b073222ccd33bf0ff291438d0b5aa7db5c240c57952d4d76
3
  size 1342238560
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f1801241fecb4d578d99565a821ed56d5d1517ebbd2254f63b66234b590deea
3
  size 683268498
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:167d57d3a8d308860c7c950b22c7af59b934e02a360c176ef4df5959b274a8c7
3
  size 683268498
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3306dd7802c8aec32f05706233922edc69e128370046e1eacbc69f3e786fdc2a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37ad8afbe49229feb93ffb8650328b92bba97953081ca74875c92d4fdc80a4a4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0f07a7a3d31c4c7ca96168e5cd97a141768e249d2dc3e853a16616d7c5fe5f6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d8504145a858461a123d740b2f06c2a9c70189e8b166f234b84ae255029b46d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.16719395820568334,
5
  "eval_steps": 500,
6
- "global_step": 240400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8197,230 +8197,6 @@
8197
  "learning_rate": 1.9772518872973653e-05,
8198
  "loss": 1.6526,
8199
  "step": 234000
8200
- },
8201
- {
8202
- "epoch": 0.16288196760304094,
8203
- "grad_norm": 3.734306812286377,
8204
- "learning_rate": 1.9772131974642406e-05,
8205
- "loss": 1.6493,
8206
- "step": 234200
8207
- },
8208
- {
8209
- "epoch": 0.16302106407409392,
8210
- "grad_norm": 6.192078590393066,
8211
- "learning_rate": 1.9771744753017348e-05,
8212
- "loss": 1.6857,
8213
- "step": 234400
8214
- },
8215
- {
8216
- "epoch": 0.1631601605451469,
8217
- "grad_norm": 2.771817207336426,
8218
- "learning_rate": 1.977135720811697e-05,
8219
- "loss": 1.6534,
8220
- "step": 234600
8221
- },
8222
- {
8223
- "epoch": 0.16329925701619988,
8224
- "grad_norm": 4.116189479827881,
8225
- "learning_rate": 1.9770969339959763e-05,
8226
- "loss": 1.6348,
8227
- "step": 234800
8228
- },
8229
- {
8230
- "epoch": 0.16343835348725286,
8231
- "grad_norm": 6.450043678283691,
8232
- "learning_rate": 1.9770581148564254e-05,
8233
- "loss": 1.6792,
8234
- "step": 235000
8235
- },
8236
- {
8237
- "epoch": 0.16357744995830584,
8238
- "grad_norm": 3.6712119579315186,
8239
- "learning_rate": 1.9770192633948966e-05,
8240
- "loss": 1.6559,
8241
- "step": 235200
8242
- },
8243
- {
8244
- "epoch": 0.16371654642935882,
8245
- "grad_norm": 3.8811490535736084,
8246
- "learning_rate": 1.976980379613245e-05,
8247
- "loss": 1.6473,
8248
- "step": 235400
8249
- },
8250
- {
8251
- "epoch": 0.1638556429004118,
8252
- "grad_norm": 8.544036865234375,
8253
- "learning_rate": 1.9769414635133272e-05,
8254
- "loss": 1.6666,
8255
- "step": 235600
8256
- },
8257
- {
8258
- "epoch": 0.16399473937146478,
8259
- "grad_norm": 2.6414921283721924,
8260
- "learning_rate": 1.9769025150970004e-05,
8261
- "loss": 1.7163,
8262
- "step": 235800
8263
- },
8264
- {
8265
- "epoch": 0.16413383584251776,
8266
- "grad_norm": 3.8313961029052734,
8267
- "learning_rate": 1.976863534366124e-05,
8268
- "loss": 1.6673,
8269
- "step": 236000
8270
- },
8271
- {
8272
- "epoch": 0.16427293231357074,
8273
- "grad_norm": 4.338851451873779,
8274
- "learning_rate": 1.97682452132256e-05,
8275
- "loss": 1.6587,
8276
- "step": 236200
8277
- },
8278
- {
8279
- "epoch": 0.16441202878462371,
8280
- "grad_norm": 5.920814514160156,
8281
- "learning_rate": 1.9767854759681694e-05,
8282
- "loss": 1.7192,
8283
- "step": 236400
8284
- },
8285
- {
8286
- "epoch": 0.1645511252556767,
8287
- "grad_norm": 7.062288761138916,
8288
- "learning_rate": 1.976746398304817e-05,
8289
- "loss": 1.6747,
8290
- "step": 236600
8291
- },
8292
- {
8293
- "epoch": 0.16469022172672967,
8294
- "grad_norm": 4.87226676940918,
8295
- "learning_rate": 1.976707288334368e-05,
8296
- "loss": 1.7216,
8297
- "step": 236800
8298
- },
8299
- {
8300
- "epoch": 0.16482931819778265,
8301
- "grad_norm": 4.253633499145508,
8302
- "learning_rate": 1.9766681460586894e-05,
8303
- "loss": 1.6602,
8304
- "step": 237000
8305
- },
8306
- {
8307
- "epoch": 0.16496841466883563,
8308
- "grad_norm": 5.2997822761535645,
8309
- "learning_rate": 1.9766289714796502e-05,
8310
- "loss": 1.6209,
8311
- "step": 237200
8312
- },
8313
- {
8314
- "epoch": 0.1651075111398886,
8315
- "grad_norm": 8.48527717590332,
8316
- "learning_rate": 1.97658976459912e-05,
8317
- "loss": 1.6526,
8318
- "step": 237400
8319
- },
8320
- {
8321
- "epoch": 0.16524660761094162,
8322
- "grad_norm": 3.7595603466033936,
8323
- "learning_rate": 1.9765505254189708e-05,
8324
- "loss": 1.718,
8325
- "step": 237600
8326
- },
8327
- {
8328
- "epoch": 0.1653857040819946,
8329
- "grad_norm": 2.8959290981292725,
8330
- "learning_rate": 1.9765112539410758e-05,
8331
- "loss": 1.729,
8332
- "step": 237800
8333
- },
8334
- {
8335
- "epoch": 0.16552480055304758,
8336
- "grad_norm": 3.3761868476867676,
8337
- "learning_rate": 1.97647195016731e-05,
8338
- "loss": 1.6648,
8339
- "step": 238000
8340
- },
8341
- {
8342
- "epoch": 0.16566389702410056,
8343
- "grad_norm": 6.8370585441589355,
8344
- "learning_rate": 1.9764326140995496e-05,
8345
- "loss": 1.6535,
8346
- "step": 238200
8347
- },
8348
- {
8349
- "epoch": 0.16580299349515354,
8350
- "grad_norm": 4.386465072631836,
8351
- "learning_rate": 1.976393245739672e-05,
8352
- "loss": 1.6181,
8353
- "step": 238400
8354
- },
8355
- {
8356
- "epoch": 0.16594208996620652,
8357
- "grad_norm": 2.054741144180298,
8358
- "learning_rate": 1.9763538450895576e-05,
8359
- "loss": 1.6094,
8360
- "step": 238600
8361
- },
8362
- {
8363
- "epoch": 0.1660811864372595,
8364
- "grad_norm": 4.956938743591309,
8365
- "learning_rate": 1.976314412151086e-05,
8366
- "loss": 1.7039,
8367
- "step": 238800
8368
- },
8369
- {
8370
- "epoch": 0.16622028290831248,
8371
- "grad_norm": 3.4034650325775146,
8372
- "learning_rate": 1.976274946926141e-05,
8373
- "loss": 1.7075,
8374
- "step": 239000
8375
- },
8376
- {
8377
- "epoch": 0.16635937937936546,
8378
- "grad_norm": 5.052691459655762,
8379
- "learning_rate": 1.976235449416606e-05,
8380
- "loss": 1.606,
8381
- "step": 239200
8382
- },
8383
- {
8384
- "epoch": 0.16649847585041844,
8385
- "grad_norm": 5.3564372062683105,
8386
- "learning_rate": 1.9761959196243662e-05,
8387
- "loss": 1.637,
8388
- "step": 239400
8389
- },
8390
- {
8391
- "epoch": 0.16663757232147142,
8392
- "grad_norm": 4.567344665527344,
8393
- "learning_rate": 1.9761563575513093e-05,
8394
- "loss": 1.6433,
8395
- "step": 239600
8396
- },
8397
- {
8398
- "epoch": 0.1667766687925244,
8399
- "grad_norm": 6.281781196594238,
8400
- "learning_rate": 1.9761167631993237e-05,
8401
- "loss": 1.6451,
8402
- "step": 239800
8403
- },
8404
- {
8405
- "epoch": 0.16691576526357738,
8406
- "grad_norm": 3.993034839630127,
8407
- "learning_rate": 1.9760771365703e-05,
8408
- "loss": 1.6326,
8409
- "step": 240000
8410
- },
8411
- {
8412
- "epoch": 0.16705486173463036,
8413
- "grad_norm": 6.029495716094971,
8414
- "learning_rate": 1.9760374776661288e-05,
8415
- "loss": 1.6587,
8416
- "step": 240200
8417
- },
8418
- {
8419
- "epoch": 0.16719395820568334,
8420
- "grad_norm": 4.002114772796631,
8421
- "learning_rate": 1.9759977864887044e-05,
8422
- "loss": 1.6484,
8423
- "step": 240400
8424
  }
8425
  ],
8426
  "logging_steps": 200,
@@ -8440,7 +8216,7 @@
8440
  "attributes": {}
8441
  }
8442
  },
8443
- "total_flos": 3.1998534751710167e+18,
8444
  "train_batch_size": 1,
8445
  "trial_name": null,
8446
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.16274287113198796,
5
  "eval_steps": 500,
6
+ "global_step": 234000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8197
  "learning_rate": 1.9772518872973653e-05,
8198
  "loss": 1.6526,
8199
  "step": 234000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8200
  }
8201
  ],
8202
  "logging_steps": 200,
 
8216
  "attributes": {}
8217
  }
8218
  },
8219
+ "total_flos": 3.114872184179589e+18,
8220
  "train_batch_size": 1,
8221
  "trial_name": null,
8222
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31159c9e3ece420d10b679508751f56bfb33866580a857e3a293714f0a805ecb
3
  size 6840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:804b58f86f1ce339812f5b2ce6cb00d1866f6589fe10723387689d878ffcc627
3
  size 6840