Training in progress, step 240600
Browse files- adapter_model.safetensors +1 -1
- last-checkpoint/adapter_config.json +4 -4
- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +3 -227
- last-checkpoint/training_args.bin +1 -1
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1342238560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad2c04e4c9d9778549e502f8f4d5e5c7678fc1dcb6dbaa7898e81a74d789ffe6
|
3 |
size 1342238560
|
last-checkpoint/adapter_config.json
CHANGED
@@ -23,13 +23,13 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
|
|
26 |
"down_proj",
|
27 |
-
"q_proj",
|
28 |
"k_proj",
|
29 |
-
"
|
30 |
"gate_proj",
|
31 |
-
"
|
32 |
-
"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
+
"v_proj",
|
27 |
"down_proj",
|
|
|
28 |
"k_proj",
|
29 |
+
"o_proj",
|
30 |
"gate_proj",
|
31 |
+
"up_proj",
|
32 |
+
"q_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1342238560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8e264d1dffd90f8b073222ccd33bf0ff291438d0b5aa7db5c240c57952d4d76
|
3 |
size 1342238560
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 683268498
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:167d57d3a8d308860c7c950b22c7af59b934e02a360c176ef4df5959b274a8c7
|
3 |
size 683268498
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37ad8afbe49229feb93ffb8650328b92bba97953081ca74875c92d4fdc80a4a4
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d8504145a858461a123d740b2f06c2a9c70189e8b166f234b84ae255029b46d
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8197,230 +8197,6 @@
|
|
8197 |
"learning_rate": 1.9772518872973653e-05,
|
8198 |
"loss": 1.6526,
|
8199 |
"step": 234000
|
8200 |
-
},
|
8201 |
-
{
|
8202 |
-
"epoch": 0.16288196760304094,
|
8203 |
-
"grad_norm": 3.734306812286377,
|
8204 |
-
"learning_rate": 1.9772131974642406e-05,
|
8205 |
-
"loss": 1.6493,
|
8206 |
-
"step": 234200
|
8207 |
-
},
|
8208 |
-
{
|
8209 |
-
"epoch": 0.16302106407409392,
|
8210 |
-
"grad_norm": 6.192078590393066,
|
8211 |
-
"learning_rate": 1.9771744753017348e-05,
|
8212 |
-
"loss": 1.6857,
|
8213 |
-
"step": 234400
|
8214 |
-
},
|
8215 |
-
{
|
8216 |
-
"epoch": 0.1631601605451469,
|
8217 |
-
"grad_norm": 2.771817207336426,
|
8218 |
-
"learning_rate": 1.977135720811697e-05,
|
8219 |
-
"loss": 1.6534,
|
8220 |
-
"step": 234600
|
8221 |
-
},
|
8222 |
-
{
|
8223 |
-
"epoch": 0.16329925701619988,
|
8224 |
-
"grad_norm": 4.116189479827881,
|
8225 |
-
"learning_rate": 1.9770969339959763e-05,
|
8226 |
-
"loss": 1.6348,
|
8227 |
-
"step": 234800
|
8228 |
-
},
|
8229 |
-
{
|
8230 |
-
"epoch": 0.16343835348725286,
|
8231 |
-
"grad_norm": 6.450043678283691,
|
8232 |
-
"learning_rate": 1.9770581148564254e-05,
|
8233 |
-
"loss": 1.6792,
|
8234 |
-
"step": 235000
|
8235 |
-
},
|
8236 |
-
{
|
8237 |
-
"epoch": 0.16357744995830584,
|
8238 |
-
"grad_norm": 3.6712119579315186,
|
8239 |
-
"learning_rate": 1.9770192633948966e-05,
|
8240 |
-
"loss": 1.6559,
|
8241 |
-
"step": 235200
|
8242 |
-
},
|
8243 |
-
{
|
8244 |
-
"epoch": 0.16371654642935882,
|
8245 |
-
"grad_norm": 3.8811490535736084,
|
8246 |
-
"learning_rate": 1.976980379613245e-05,
|
8247 |
-
"loss": 1.6473,
|
8248 |
-
"step": 235400
|
8249 |
-
},
|
8250 |
-
{
|
8251 |
-
"epoch": 0.1638556429004118,
|
8252 |
-
"grad_norm": 8.544036865234375,
|
8253 |
-
"learning_rate": 1.9769414635133272e-05,
|
8254 |
-
"loss": 1.6666,
|
8255 |
-
"step": 235600
|
8256 |
-
},
|
8257 |
-
{
|
8258 |
-
"epoch": 0.16399473937146478,
|
8259 |
-
"grad_norm": 2.6414921283721924,
|
8260 |
-
"learning_rate": 1.9769025150970004e-05,
|
8261 |
-
"loss": 1.7163,
|
8262 |
-
"step": 235800
|
8263 |
-
},
|
8264 |
-
{
|
8265 |
-
"epoch": 0.16413383584251776,
|
8266 |
-
"grad_norm": 3.8313961029052734,
|
8267 |
-
"learning_rate": 1.976863534366124e-05,
|
8268 |
-
"loss": 1.6673,
|
8269 |
-
"step": 236000
|
8270 |
-
},
|
8271 |
-
{
|
8272 |
-
"epoch": 0.16427293231357074,
|
8273 |
-
"grad_norm": 4.338851451873779,
|
8274 |
-
"learning_rate": 1.97682452132256e-05,
|
8275 |
-
"loss": 1.6587,
|
8276 |
-
"step": 236200
|
8277 |
-
},
|
8278 |
-
{
|
8279 |
-
"epoch": 0.16441202878462371,
|
8280 |
-
"grad_norm": 5.920814514160156,
|
8281 |
-
"learning_rate": 1.9767854759681694e-05,
|
8282 |
-
"loss": 1.7192,
|
8283 |
-
"step": 236400
|
8284 |
-
},
|
8285 |
-
{
|
8286 |
-
"epoch": 0.1645511252556767,
|
8287 |
-
"grad_norm": 7.062288761138916,
|
8288 |
-
"learning_rate": 1.976746398304817e-05,
|
8289 |
-
"loss": 1.6747,
|
8290 |
-
"step": 236600
|
8291 |
-
},
|
8292 |
-
{
|
8293 |
-
"epoch": 0.16469022172672967,
|
8294 |
-
"grad_norm": 4.87226676940918,
|
8295 |
-
"learning_rate": 1.976707288334368e-05,
|
8296 |
-
"loss": 1.7216,
|
8297 |
-
"step": 236800
|
8298 |
-
},
|
8299 |
-
{
|
8300 |
-
"epoch": 0.16482931819778265,
|
8301 |
-
"grad_norm": 4.253633499145508,
|
8302 |
-
"learning_rate": 1.9766681460586894e-05,
|
8303 |
-
"loss": 1.6602,
|
8304 |
-
"step": 237000
|
8305 |
-
},
|
8306 |
-
{
|
8307 |
-
"epoch": 0.16496841466883563,
|
8308 |
-
"grad_norm": 5.2997822761535645,
|
8309 |
-
"learning_rate": 1.9766289714796502e-05,
|
8310 |
-
"loss": 1.6209,
|
8311 |
-
"step": 237200
|
8312 |
-
},
|
8313 |
-
{
|
8314 |
-
"epoch": 0.1651075111398886,
|
8315 |
-
"grad_norm": 8.48527717590332,
|
8316 |
-
"learning_rate": 1.97658976459912e-05,
|
8317 |
-
"loss": 1.6526,
|
8318 |
-
"step": 237400
|
8319 |
-
},
|
8320 |
-
{
|
8321 |
-
"epoch": 0.16524660761094162,
|
8322 |
-
"grad_norm": 3.7595603466033936,
|
8323 |
-
"learning_rate": 1.9765505254189708e-05,
|
8324 |
-
"loss": 1.718,
|
8325 |
-
"step": 237600
|
8326 |
-
},
|
8327 |
-
{
|
8328 |
-
"epoch": 0.1653857040819946,
|
8329 |
-
"grad_norm": 2.8959290981292725,
|
8330 |
-
"learning_rate": 1.9765112539410758e-05,
|
8331 |
-
"loss": 1.729,
|
8332 |
-
"step": 237800
|
8333 |
-
},
|
8334 |
-
{
|
8335 |
-
"epoch": 0.16552480055304758,
|
8336 |
-
"grad_norm": 3.3761868476867676,
|
8337 |
-
"learning_rate": 1.97647195016731e-05,
|
8338 |
-
"loss": 1.6648,
|
8339 |
-
"step": 238000
|
8340 |
-
},
|
8341 |
-
{
|
8342 |
-
"epoch": 0.16566389702410056,
|
8343 |
-
"grad_norm": 6.8370585441589355,
|
8344 |
-
"learning_rate": 1.9764326140995496e-05,
|
8345 |
-
"loss": 1.6535,
|
8346 |
-
"step": 238200
|
8347 |
-
},
|
8348 |
-
{
|
8349 |
-
"epoch": 0.16580299349515354,
|
8350 |
-
"grad_norm": 4.386465072631836,
|
8351 |
-
"learning_rate": 1.976393245739672e-05,
|
8352 |
-
"loss": 1.6181,
|
8353 |
-
"step": 238400
|
8354 |
-
},
|
8355 |
-
{
|
8356 |
-
"epoch": 0.16594208996620652,
|
8357 |
-
"grad_norm": 2.054741144180298,
|
8358 |
-
"learning_rate": 1.9763538450895576e-05,
|
8359 |
-
"loss": 1.6094,
|
8360 |
-
"step": 238600
|
8361 |
-
},
|
8362 |
-
{
|
8363 |
-
"epoch": 0.1660811864372595,
|
8364 |
-
"grad_norm": 4.956938743591309,
|
8365 |
-
"learning_rate": 1.976314412151086e-05,
|
8366 |
-
"loss": 1.7039,
|
8367 |
-
"step": 238800
|
8368 |
-
},
|
8369 |
-
{
|
8370 |
-
"epoch": 0.16622028290831248,
|
8371 |
-
"grad_norm": 3.4034650325775146,
|
8372 |
-
"learning_rate": 1.976274946926141e-05,
|
8373 |
-
"loss": 1.7075,
|
8374 |
-
"step": 239000
|
8375 |
-
},
|
8376 |
-
{
|
8377 |
-
"epoch": 0.16635937937936546,
|
8378 |
-
"grad_norm": 5.052691459655762,
|
8379 |
-
"learning_rate": 1.976235449416606e-05,
|
8380 |
-
"loss": 1.606,
|
8381 |
-
"step": 239200
|
8382 |
-
},
|
8383 |
-
{
|
8384 |
-
"epoch": 0.16649847585041844,
|
8385 |
-
"grad_norm": 5.3564372062683105,
|
8386 |
-
"learning_rate": 1.9761959196243662e-05,
|
8387 |
-
"loss": 1.637,
|
8388 |
-
"step": 239400
|
8389 |
-
},
|
8390 |
-
{
|
8391 |
-
"epoch": 0.16663757232147142,
|
8392 |
-
"grad_norm": 4.567344665527344,
|
8393 |
-
"learning_rate": 1.9761563575513093e-05,
|
8394 |
-
"loss": 1.6433,
|
8395 |
-
"step": 239600
|
8396 |
-
},
|
8397 |
-
{
|
8398 |
-
"epoch": 0.1667766687925244,
|
8399 |
-
"grad_norm": 6.281781196594238,
|
8400 |
-
"learning_rate": 1.9761167631993237e-05,
|
8401 |
-
"loss": 1.6451,
|
8402 |
-
"step": 239800
|
8403 |
-
},
|
8404 |
-
{
|
8405 |
-
"epoch": 0.16691576526357738,
|
8406 |
-
"grad_norm": 3.993034839630127,
|
8407 |
-
"learning_rate": 1.9760771365703e-05,
|
8408 |
-
"loss": 1.6326,
|
8409 |
-
"step": 240000
|
8410 |
-
},
|
8411 |
-
{
|
8412 |
-
"epoch": 0.16705486173463036,
|
8413 |
-
"grad_norm": 6.029495716094971,
|
8414 |
-
"learning_rate": 1.9760374776661288e-05,
|
8415 |
-
"loss": 1.6587,
|
8416 |
-
"step": 240200
|
8417 |
-
},
|
8418 |
-
{
|
8419 |
-
"epoch": 0.16719395820568334,
|
8420 |
-
"grad_norm": 4.002114772796631,
|
8421 |
-
"learning_rate": 1.9759977864887044e-05,
|
8422 |
-
"loss": 1.6484,
|
8423 |
-
"step": 240400
|
8424 |
}
|
8425 |
],
|
8426 |
"logging_steps": 200,
|
@@ -8440,7 +8216,7 @@
|
|
8440 |
"attributes": {}
|
8441 |
}
|
8442 |
},
|
8443 |
-
"total_flos": 3.
|
8444 |
"train_batch_size": 1,
|
8445 |
"trial_name": null,
|
8446 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.16274287113198796,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 234000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8197 |
"learning_rate": 1.9772518872973653e-05,
|
8198 |
"loss": 1.6526,
|
8199 |
"step": 234000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8200 |
}
|
8201 |
],
|
8202 |
"logging_steps": 200,
|
|
|
8216 |
"attributes": {}
|
8217 |
}
|
8218 |
},
|
8219 |
+
"total_flos": 3.114872184179589e+18,
|
8220 |
"train_batch_size": 1,
|
8221 |
"trial_name": null,
|
8222 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:804b58f86f1ce339812f5b2ce6cb00d1866f6589fe10723387689d878ffcc627
|
3 |
size 6840
|