Training in progress, step 98800
Browse files- adapter_model.safetensors +1 -1
- last-checkpoint/adapter_config.json +4 -4
- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +3 -1081
- last-checkpoint/training_args.bin +1 -1
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 360740440
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b35cb4877d8334962276c7da8c84c133d65ddb9b2d7e1db7e303689c89c692ca
|
3 |
size 360740440
|
last-checkpoint/adapter_config.json
CHANGED
@@ -23,13 +23,13 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
-
"q_proj",
|
27 |
"gate_proj",
|
|
|
|
|
|
|
28 |
"o_proj",
|
29 |
"k_proj",
|
30 |
-
"
|
31 |
-
"v_proj",
|
32 |
-
"down_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
|
|
26 |
"gate_proj",
|
27 |
+
"q_proj",
|
28 |
+
"up_proj",
|
29 |
+
"down_proj",
|
30 |
"o_proj",
|
31 |
"k_proj",
|
32 |
+
"v_proj"
|
|
|
|
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 360740440
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4de4264c63a59dce7704a8c5a40a2eabb335ae0f4c08793186a7dac53476b3e
|
3 |
size 360740440
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 184019218
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21ac6e3bc175827fd1e8dd90d0622a6f7ecc2ec3bd02f6aab9cb49b3e69b310d
|
3 |
size 184019218
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef2b75f883be9ec7671e357b5847cf1010397479859ed73d1431ac8ed8c32dc8
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3208921042601e649749f7b0ba9884e81041a7072e9d9acf88a51bc50dfd8b97
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2380,1084 +2380,6 @@
|
|
2380 |
"learning_rate": 1.998080708864831e-05,
|
2381 |
"loss": 1.9318,
|
2382 |
"step": 67800
|
2383 |
-
},
|
2384 |
-
{
|
2385 |
-
"epoch": 0.04729280015801359,
|
2386 |
-
"grad_norm": 3.2300922870635986,
|
2387 |
-
"learning_rate": 1.998069374127872e-05,
|
2388 |
-
"loss": 1.8863,
|
2389 |
-
"step": 68000
|
2390 |
-
},
|
2391 |
-
{
|
2392 |
-
"epoch": 0.04743189662906657,
|
2393 |
-
"grad_norm": 5.228055000305176,
|
2394 |
-
"learning_rate": 1.9980580060658826e-05,
|
2395 |
-
"loss": 1.9176,
|
2396 |
-
"step": 68200
|
2397 |
-
},
|
2398 |
-
{
|
2399 |
-
"epoch": 0.04757099310011955,
|
2400 |
-
"grad_norm": 3.513674736022949,
|
2401 |
-
"learning_rate": 1.9980466046794044e-05,
|
2402 |
-
"loss": 1.9543,
|
2403 |
-
"step": 68400
|
2404 |
-
},
|
2405 |
-
{
|
2406 |
-
"epoch": 0.04771008957117254,
|
2407 |
-
"grad_norm": 5.115189552307129,
|
2408 |
-
"learning_rate": 1.998035169968983e-05,
|
2409 |
-
"loss": 1.9288,
|
2410 |
-
"step": 68600
|
2411 |
-
},
|
2412 |
-
{
|
2413 |
-
"epoch": 0.04784918604222552,
|
2414 |
-
"grad_norm": 8.184946060180664,
|
2415 |
-
"learning_rate": 1.998023701935163e-05,
|
2416 |
-
"loss": 1.9589,
|
2417 |
-
"step": 68800
|
2418 |
-
},
|
2419 |
-
{
|
2420 |
-
"epoch": 0.047988282513278496,
|
2421 |
-
"grad_norm": 4.300464630126953,
|
2422 |
-
"learning_rate": 1.998012200578493e-05,
|
2423 |
-
"loss": 1.9337,
|
2424 |
-
"step": 69000
|
2425 |
-
},
|
2426 |
-
{
|
2427 |
-
"epoch": 0.048127378984331476,
|
2428 |
-
"grad_norm": 4.387246131896973,
|
2429 |
-
"learning_rate": 1.998000665899521e-05,
|
2430 |
-
"loss": 1.9173,
|
2431 |
-
"step": 69200
|
2432 |
-
},
|
2433 |
-
{
|
2434 |
-
"epoch": 0.04826647545538446,
|
2435 |
-
"grad_norm": 5.051788330078125,
|
2436 |
-
"learning_rate": 1.9979890978987976e-05,
|
2437 |
-
"loss": 1.9104,
|
2438 |
-
"step": 69400
|
2439 |
-
},
|
2440 |
-
{
|
2441 |
-
"epoch": 0.04840557192643744,
|
2442 |
-
"grad_norm": 9.470113754272461,
|
2443 |
-
"learning_rate": 1.9979774965768762e-05,
|
2444 |
-
"loss": 1.9325,
|
2445 |
-
"step": 69600
|
2446 |
-
},
|
2447 |
-
{
|
2448 |
-
"epoch": 0.04854466839749042,
|
2449 |
-
"grad_norm": 4.497230529785156,
|
2450 |
-
"learning_rate": 1.99796586193431e-05,
|
2451 |
-
"loss": 1.9815,
|
2452 |
-
"step": 69800
|
2453 |
-
},
|
2454 |
-
{
|
2455 |
-
"epoch": 0.0486837648685434,
|
2456 |
-
"grad_norm": 4.782131671905518,
|
2457 |
-
"learning_rate": 1.997954193971655e-05,
|
2458 |
-
"loss": 1.902,
|
2459 |
-
"step": 70000
|
2460 |
-
},
|
2461 |
-
{
|
2462 |
-
"epoch": 0.04882286133959638,
|
2463 |
-
"grad_norm": 5.133215427398682,
|
2464 |
-
"learning_rate": 1.997942492689467e-05,
|
2465 |
-
"loss": 1.9728,
|
2466 |
-
"step": 70200
|
2467 |
-
},
|
2468 |
-
{
|
2469 |
-
"epoch": 0.04896195781064937,
|
2470 |
-
"grad_norm": 4.0771942138671875,
|
2471 |
-
"learning_rate": 1.9979307580883048e-05,
|
2472 |
-
"loss": 1.9499,
|
2473 |
-
"step": 70400
|
2474 |
-
},
|
2475 |
-
{
|
2476 |
-
"epoch": 0.04910105428170235,
|
2477 |
-
"grad_norm": 5.347265720367432,
|
2478 |
-
"learning_rate": 1.9979189901687298e-05,
|
2479 |
-
"loss": 1.943,
|
2480 |
-
"step": 70600
|
2481 |
-
},
|
2482 |
-
{
|
2483 |
-
"epoch": 0.04924015075275533,
|
2484 |
-
"grad_norm": 8.083806037902832,
|
2485 |
-
"learning_rate": 1.9979071889313028e-05,
|
2486 |
-
"loss": 1.9202,
|
2487 |
-
"step": 70800
|
2488 |
-
},
|
2489 |
-
{
|
2490 |
-
"epoch": 0.049379247223808306,
|
2491 |
-
"grad_norm": 5.8364386558532715,
|
2492 |
-
"learning_rate": 1.9978953543765876e-05,
|
2493 |
-
"loss": 1.9831,
|
2494 |
-
"step": 71000
|
2495 |
-
},
|
2496 |
-
{
|
2497 |
-
"epoch": 0.04951834369486129,
|
2498 |
-
"grad_norm": 4.9726152420043945,
|
2499 |
-
"learning_rate": 1.9978834865051492e-05,
|
2500 |
-
"loss": 1.9912,
|
2501 |
-
"step": 71200
|
2502 |
-
},
|
2503 |
-
{
|
2504 |
-
"epoch": 0.04965744016591427,
|
2505 |
-
"grad_norm": 3.8392043113708496,
|
2506 |
-
"learning_rate": 1.9978715853175532e-05,
|
2507 |
-
"loss": 1.9531,
|
2508 |
-
"step": 71400
|
2509 |
-
},
|
2510 |
-
{
|
2511 |
-
"epoch": 0.04979653663696725,
|
2512 |
-
"grad_norm": 8.026137351989746,
|
2513 |
-
"learning_rate": 1.997859650814369e-05,
|
2514 |
-
"loss": 1.8849,
|
2515 |
-
"step": 71600
|
2516 |
-
},
|
2517 |
-
{
|
2518 |
-
"epoch": 0.04993563310802023,
|
2519 |
-
"grad_norm": 5.833446979522705,
|
2520 |
-
"learning_rate": 1.997847682996165e-05,
|
2521 |
-
"loss": 2.0134,
|
2522 |
-
"step": 71800
|
2523 |
-
},
|
2524 |
-
{
|
2525 |
-
"epoch": 0.05007472957907321,
|
2526 |
-
"grad_norm": 3.1947782039642334,
|
2527 |
-
"learning_rate": 1.9978356818635146e-05,
|
2528 |
-
"loss": 1.9404,
|
2529 |
-
"step": 72000
|
2530 |
-
},
|
2531 |
-
{
|
2532 |
-
"epoch": 0.0502138260501262,
|
2533 |
-
"grad_norm": 3.642251968383789,
|
2534 |
-
"learning_rate": 1.997823647416989e-05,
|
2535 |
-
"loss": 1.914,
|
2536 |
-
"step": 72200
|
2537 |
-
},
|
2538 |
-
{
|
2539 |
-
"epoch": 0.05035292252117918,
|
2540 |
-
"grad_norm": 4.345871448516846,
|
2541 |
-
"learning_rate": 1.997811579657163e-05,
|
2542 |
-
"loss": 1.8788,
|
2543 |
-
"step": 72400
|
2544 |
-
},
|
2545 |
-
{
|
2546 |
-
"epoch": 0.05049201899223216,
|
2547 |
-
"grad_norm": 8.562602043151855,
|
2548 |
-
"learning_rate": 1.997799478584613e-05,
|
2549 |
-
"loss": 1.9885,
|
2550 |
-
"step": 72600
|
2551 |
-
},
|
2552 |
-
{
|
2553 |
-
"epoch": 0.05063111546328514,
|
2554 |
-
"grad_norm": 3.9181063175201416,
|
2555 |
-
"learning_rate": 1.997787344199917e-05,
|
2556 |
-
"loss": 1.9824,
|
2557 |
-
"step": 72800
|
2558 |
-
},
|
2559 |
-
{
|
2560 |
-
"epoch": 0.050770211934338116,
|
2561 |
-
"grad_norm": 5.295780181884766,
|
2562 |
-
"learning_rate": 1.997775176503653e-05,
|
2563 |
-
"loss": 1.9066,
|
2564 |
-
"step": 73000
|
2565 |
-
},
|
2566 |
-
{
|
2567 |
-
"epoch": 0.0509093084053911,
|
2568 |
-
"grad_norm": 5.285375118255615,
|
2569 |
-
"learning_rate": 1.9977629754964036e-05,
|
2570 |
-
"loss": 1.917,
|
2571 |
-
"step": 73200
|
2572 |
-
},
|
2573 |
-
{
|
2574 |
-
"epoch": 0.05104840487644408,
|
2575 |
-
"grad_norm": 5.803408622741699,
|
2576 |
-
"learning_rate": 1.99775074117875e-05,
|
2577 |
-
"loss": 1.9555,
|
2578 |
-
"step": 73400
|
2579 |
-
},
|
2580 |
-
{
|
2581 |
-
"epoch": 0.05118750134749706,
|
2582 |
-
"grad_norm": 4.541822910308838,
|
2583 |
-
"learning_rate": 1.9977384735512765e-05,
|
2584 |
-
"loss": 1.889,
|
2585 |
-
"step": 73600
|
2586 |
-
},
|
2587 |
-
{
|
2588 |
-
"epoch": 0.05132659781855004,
|
2589 |
-
"grad_norm": 5.164264678955078,
|
2590 |
-
"learning_rate": 1.9977261726145692e-05,
|
2591 |
-
"loss": 1.8705,
|
2592 |
-
"step": 73800
|
2593 |
-
},
|
2594 |
-
{
|
2595 |
-
"epoch": 0.05146569428960303,
|
2596 |
-
"grad_norm": 5.912387847900391,
|
2597 |
-
"learning_rate": 1.997713838369215e-05,
|
2598 |
-
"loss": 1.959,
|
2599 |
-
"step": 74000
|
2600 |
-
},
|
2601 |
-
{
|
2602 |
-
"epoch": 0.05160479076065601,
|
2603 |
-
"grad_norm": 4.968830585479736,
|
2604 |
-
"learning_rate": 1.9977014708158027e-05,
|
2605 |
-
"loss": 1.9991,
|
2606 |
-
"step": 74200
|
2607 |
-
},
|
2608 |
-
{
|
2609 |
-
"epoch": 0.05174388723170899,
|
2610 |
-
"grad_norm": 4.360265731811523,
|
2611 |
-
"learning_rate": 1.997689069954923e-05,
|
2612 |
-
"loss": 1.9133,
|
2613 |
-
"step": 74400
|
2614 |
-
},
|
2615 |
-
{
|
2616 |
-
"epoch": 0.05188298370276197,
|
2617 |
-
"grad_norm": 4.5406999588012695,
|
2618 |
-
"learning_rate": 1.997676635787167e-05,
|
2619 |
-
"loss": 1.9622,
|
2620 |
-
"step": 74600
|
2621 |
-
},
|
2622 |
-
{
|
2623 |
-
"epoch": 0.05202208017381495,
|
2624 |
-
"grad_norm": 6.6085615158081055,
|
2625 |
-
"learning_rate": 1.9976641683131293e-05,
|
2626 |
-
"loss": 1.9628,
|
2627 |
-
"step": 74800
|
2628 |
-
},
|
2629 |
-
{
|
2630 |
-
"epoch": 0.05216117664486793,
|
2631 |
-
"grad_norm": 3.78121018409729,
|
2632 |
-
"learning_rate": 1.9976516675334044e-05,
|
2633 |
-
"loss": 1.9271,
|
2634 |
-
"step": 75000
|
2635 |
-
},
|
2636 |
-
{
|
2637 |
-
"epoch": 0.05230027311592091,
|
2638 |
-
"grad_norm": 4.836414813995361,
|
2639 |
-
"learning_rate": 1.99763913344859e-05,
|
2640 |
-
"loss": 1.9079,
|
2641 |
-
"step": 75200
|
2642 |
-
},
|
2643 |
-
{
|
2644 |
-
"epoch": 0.05243936958697389,
|
2645 |
-
"grad_norm": 4.3670759201049805,
|
2646 |
-
"learning_rate": 1.997626566059284e-05,
|
2647 |
-
"loss": 1.9355,
|
2648 |
-
"step": 75400
|
2649 |
-
},
|
2650 |
-
{
|
2651 |
-
"epoch": 0.05257846605802687,
|
2652 |
-
"grad_norm": 4.553194999694824,
|
2653 |
-
"learning_rate": 1.9976139653660858e-05,
|
2654 |
-
"loss": 1.9888,
|
2655 |
-
"step": 75600
|
2656 |
-
},
|
2657 |
-
{
|
2658 |
-
"epoch": 0.05271756252907986,
|
2659 |
-
"grad_norm": 5.623137474060059,
|
2660 |
-
"learning_rate": 1.997601331369597e-05,
|
2661 |
-
"loss": 1.9318,
|
2662 |
-
"step": 75800
|
2663 |
-
},
|
2664 |
-
{
|
2665 |
-
"epoch": 0.05285665900013284,
|
2666 |
-
"grad_norm": 5.056840896606445,
|
2667 |
-
"learning_rate": 1.9975886640704223e-05,
|
2668 |
-
"loss": 1.9439,
|
2669 |
-
"step": 76000
|
2670 |
-
},
|
2671 |
-
{
|
2672 |
-
"epoch": 0.05299575547118582,
|
2673 |
-
"grad_norm": 5.085750579833984,
|
2674 |
-
"learning_rate": 1.9975759634691644e-05,
|
2675 |
-
"loss": 1.9081,
|
2676 |
-
"step": 76200
|
2677 |
-
},
|
2678 |
-
{
|
2679 |
-
"epoch": 0.0531348519422388,
|
2680 |
-
"grad_norm": 6.532809257507324,
|
2681 |
-
"learning_rate": 1.9975632295664304e-05,
|
2682 |
-
"loss": 1.9654,
|
2683 |
-
"step": 76400
|
2684 |
-
},
|
2685 |
-
{
|
2686 |
-
"epoch": 0.05327394841329178,
|
2687 |
-
"grad_norm": 5.280889987945557,
|
2688 |
-
"learning_rate": 1.9975504623628285e-05,
|
2689 |
-
"loss": 1.9278,
|
2690 |
-
"step": 76600
|
2691 |
-
},
|
2692 |
-
{
|
2693 |
-
"epoch": 0.053413044884344764,
|
2694 |
-
"grad_norm": 4.8631591796875,
|
2695 |
-
"learning_rate": 1.9975376618589682e-05,
|
2696 |
-
"loss": 1.9152,
|
2697 |
-
"step": 76800
|
2698 |
-
},
|
2699 |
-
{
|
2700 |
-
"epoch": 0.053552141355397744,
|
2701 |
-
"grad_norm": 4.5169901847839355,
|
2702 |
-
"learning_rate": 1.9975248280554598e-05,
|
2703 |
-
"loss": 1.9628,
|
2704 |
-
"step": 77000
|
2705 |
-
},
|
2706 |
-
{
|
2707 |
-
"epoch": 0.05369123782645072,
|
2708 |
-
"grad_norm": 7.0524373054504395,
|
2709 |
-
"learning_rate": 1.9975119609529164e-05,
|
2710 |
-
"loss": 1.9577,
|
2711 |
-
"step": 77200
|
2712 |
-
},
|
2713 |
-
{
|
2714 |
-
"epoch": 0.0538303342975037,
|
2715 |
-
"grad_norm": 3.7885243892669678,
|
2716 |
-
"learning_rate": 1.9974990605519528e-05,
|
2717 |
-
"loss": 1.953,
|
2718 |
-
"step": 77400
|
2719 |
-
},
|
2720 |
-
{
|
2721 |
-
"epoch": 0.05396943076855669,
|
2722 |
-
"grad_norm": 4.719120979309082,
|
2723 |
-
"learning_rate": 1.997486126853184e-05,
|
2724 |
-
"loss": 1.908,
|
2725 |
-
"step": 77600
|
2726 |
-
},
|
2727 |
-
{
|
2728 |
-
"epoch": 0.05410852723960967,
|
2729 |
-
"grad_norm": 3.3026559352874756,
|
2730 |
-
"learning_rate": 1.9974731598572283e-05,
|
2731 |
-
"loss": 1.9467,
|
2732 |
-
"step": 77800
|
2733 |
-
},
|
2734 |
-
{
|
2735 |
-
"epoch": 0.05424762371066265,
|
2736 |
-
"grad_norm": 3.9477977752685547,
|
2737 |
-
"learning_rate": 1.997460159564704e-05,
|
2738 |
-
"loss": 1.8798,
|
2739 |
-
"step": 78000
|
2740 |
-
},
|
2741 |
-
{
|
2742 |
-
"epoch": 0.05438672018171563,
|
2743 |
-
"grad_norm": 5.302116870880127,
|
2744 |
-
"learning_rate": 1.997447125976232e-05,
|
2745 |
-
"loss": 1.9372,
|
2746 |
-
"step": 78200
|
2747 |
-
},
|
2748 |
-
{
|
2749 |
-
"epoch": 0.05452581665276861,
|
2750 |
-
"grad_norm": 3.8946664333343506,
|
2751 |
-
"learning_rate": 1.9974340590924342e-05,
|
2752 |
-
"loss": 1.9257,
|
2753 |
-
"step": 78400
|
2754 |
-
},
|
2755 |
-
{
|
2756 |
-
"epoch": 0.054664913123821594,
|
2757 |
-
"grad_norm": 5.974159240722656,
|
2758 |
-
"learning_rate": 1.997420958913935e-05,
|
2759 |
-
"loss": 1.8942,
|
2760 |
-
"step": 78600
|
2761 |
-
},
|
2762 |
-
{
|
2763 |
-
"epoch": 0.054804009594874574,
|
2764 |
-
"grad_norm": 5.002580642700195,
|
2765 |
-
"learning_rate": 1.9974078254413595e-05,
|
2766 |
-
"loss": 1.9036,
|
2767 |
-
"step": 78800
|
2768 |
-
},
|
2769 |
-
{
|
2770 |
-
"epoch": 0.054943106065927554,
|
2771 |
-
"grad_norm": 5.091346263885498,
|
2772 |
-
"learning_rate": 1.9973946586753347e-05,
|
2773 |
-
"loss": 1.9415,
|
2774 |
-
"step": 79000
|
2775 |
-
},
|
2776 |
-
{
|
2777 |
-
"epoch": 0.05508220253698053,
|
2778 |
-
"grad_norm": 3.594421625137329,
|
2779 |
-
"learning_rate": 1.997381458616489e-05,
|
2780 |
-
"loss": 2.0063,
|
2781 |
-
"step": 79200
|
2782 |
-
},
|
2783 |
-
{
|
2784 |
-
"epoch": 0.05522129900803352,
|
2785 |
-
"grad_norm": 4.16220760345459,
|
2786 |
-
"learning_rate": 1.9973682252654528e-05,
|
2787 |
-
"loss": 1.9647,
|
2788 |
-
"step": 79400
|
2789 |
-
},
|
2790 |
-
{
|
2791 |
-
"epoch": 0.0553603954790865,
|
2792 |
-
"grad_norm": 4.97799015045166,
|
2793 |
-
"learning_rate": 1.9973549586228574e-05,
|
2794 |
-
"loss": 1.9203,
|
2795 |
-
"step": 79600
|
2796 |
-
},
|
2797 |
-
{
|
2798 |
-
"epoch": 0.05549949195013948,
|
2799 |
-
"grad_norm": 6.2803521156311035,
|
2800 |
-
"learning_rate": 1.9973416586893366e-05,
|
2801 |
-
"loss": 1.9479,
|
2802 |
-
"step": 79800
|
2803 |
-
},
|
2804 |
-
{
|
2805 |
-
"epoch": 0.05563858842119246,
|
2806 |
-
"grad_norm": 6.788886547088623,
|
2807 |
-
"learning_rate": 1.9973283254655252e-05,
|
2808 |
-
"loss": 1.8882,
|
2809 |
-
"step": 80000
|
2810 |
-
},
|
2811 |
-
{
|
2812 |
-
"epoch": 0.05577768489224544,
|
2813 |
-
"grad_norm": 4.553073406219482,
|
2814 |
-
"learning_rate": 1.99731495895206e-05,
|
2815 |
-
"loss": 1.9328,
|
2816 |
-
"step": 80200
|
2817 |
-
},
|
2818 |
-
{
|
2819 |
-
"epoch": 0.055916781363298425,
|
2820 |
-
"grad_norm": 4.717394828796387,
|
2821 |
-
"learning_rate": 1.9973015591495787e-05,
|
2822 |
-
"loss": 1.88,
|
2823 |
-
"step": 80400
|
2824 |
-
},
|
2825 |
-
{
|
2826 |
-
"epoch": 0.056055877834351404,
|
2827 |
-
"grad_norm": 5.210158824920654,
|
2828 |
-
"learning_rate": 1.9972881260587207e-05,
|
2829 |
-
"loss": 1.9725,
|
2830 |
-
"step": 80600
|
2831 |
-
},
|
2832 |
-
{
|
2833 |
-
"epoch": 0.056194974305404384,
|
2834 |
-
"grad_norm": 4.219607353210449,
|
2835 |
-
"learning_rate": 1.997274659680128e-05,
|
2836 |
-
"loss": 1.9263,
|
2837 |
-
"step": 80800
|
2838 |
-
},
|
2839 |
-
{
|
2840 |
-
"epoch": 0.056334070776457364,
|
2841 |
-
"grad_norm": 4.876551151275635,
|
2842 |
-
"learning_rate": 1.997261160014443e-05,
|
2843 |
-
"loss": 1.9166,
|
2844 |
-
"step": 81000
|
2845 |
-
},
|
2846 |
-
{
|
2847 |
-
"epoch": 0.05647316724751035,
|
2848 |
-
"grad_norm": 4.498875617980957,
|
2849 |
-
"learning_rate": 1.9972476270623103e-05,
|
2850 |
-
"loss": 1.9293,
|
2851 |
-
"step": 81200
|
2852 |
-
},
|
2853 |
-
{
|
2854 |
-
"epoch": 0.05661226371856333,
|
2855 |
-
"grad_norm": 4.708183288574219,
|
2856 |
-
"learning_rate": 1.9972340608243763e-05,
|
2857 |
-
"loss": 1.8991,
|
2858 |
-
"step": 81400
|
2859 |
-
},
|
2860 |
-
{
|
2861 |
-
"epoch": 0.05675136018961631,
|
2862 |
-
"grad_norm": 4.428593158721924,
|
2863 |
-
"learning_rate": 1.997220461301288e-05,
|
2864 |
-
"loss": 1.8605,
|
2865 |
-
"step": 81600
|
2866 |
-
},
|
2867 |
-
{
|
2868 |
-
"epoch": 0.05689045666066929,
|
2869 |
-
"grad_norm": 7.380921363830566,
|
2870 |
-
"learning_rate": 1.997206828493695e-05,
|
2871 |
-
"loss": 1.9595,
|
2872 |
-
"step": 81800
|
2873 |
-
},
|
2874 |
-
{
|
2875 |
-
"epoch": 0.05702955313172227,
|
2876 |
-
"grad_norm": 4.452649116516113,
|
2877 |
-
"learning_rate": 1.9971931624022477e-05,
|
2878 |
-
"loss": 1.9898,
|
2879 |
-
"step": 82000
|
2880 |
-
},
|
2881 |
-
{
|
2882 |
-
"epoch": 0.057168649602775255,
|
2883 |
-
"grad_norm": 5.155123233795166,
|
2884 |
-
"learning_rate": 1.997179463027599e-05,
|
2885 |
-
"loss": 1.9956,
|
2886 |
-
"step": 82200
|
2887 |
-
},
|
2888 |
-
{
|
2889 |
-
"epoch": 0.057307746073828235,
|
2890 |
-
"grad_norm": 4.984164237976074,
|
2891 |
-
"learning_rate": 1.997165730370403e-05,
|
2892 |
-
"loss": 1.9642,
|
2893 |
-
"step": 82400
|
2894 |
-
},
|
2895 |
-
{
|
2896 |
-
"epoch": 0.057446842544881214,
|
2897 |
-
"grad_norm": 4.363987445831299,
|
2898 |
-
"learning_rate": 1.9971519644313147e-05,
|
2899 |
-
"loss": 1.9301,
|
2900 |
-
"step": 82600
|
2901 |
-
},
|
2902 |
-
{
|
2903 |
-
"epoch": 0.057585939015934194,
|
2904 |
-
"grad_norm": 4.11600399017334,
|
2905 |
-
"learning_rate": 1.9971381652109915e-05,
|
2906 |
-
"loss": 1.9154,
|
2907 |
-
"step": 82800
|
2908 |
-
},
|
2909 |
-
{
|
2910 |
-
"epoch": 0.05772503548698718,
|
2911 |
-
"grad_norm": 4.238101005554199,
|
2912 |
-
"learning_rate": 1.9971243327100923e-05,
|
2913 |
-
"loss": 1.9414,
|
2914 |
-
"step": 83000
|
2915 |
-
},
|
2916 |
-
{
|
2917 |
-
"epoch": 0.05786413195804016,
|
2918 |
-
"grad_norm": 4.86619234085083,
|
2919 |
-
"learning_rate": 1.9971104669292777e-05,
|
2920 |
-
"loss": 1.9366,
|
2921 |
-
"step": 83200
|
2922 |
-
},
|
2923 |
-
{
|
2924 |
-
"epoch": 0.05800322842909314,
|
2925 |
-
"grad_norm": 4.146606922149658,
|
2926 |
-
"learning_rate": 1.997096567869209e-05,
|
2927 |
-
"loss": 1.9064,
|
2928 |
-
"step": 83400
|
2929 |
-
},
|
2930 |
-
{
|
2931 |
-
"epoch": 0.05814232490014612,
|
2932 |
-
"grad_norm": 6.376154899597168,
|
2933 |
-
"learning_rate": 1.99708263553055e-05,
|
2934 |
-
"loss": 1.951,
|
2935 |
-
"step": 83600
|
2936 |
-
},
|
2937 |
-
{
|
2938 |
-
"epoch": 0.0582814213711991,
|
2939 |
-
"grad_norm": 4.6015625,
|
2940 |
-
"learning_rate": 1.997068669913966e-05,
|
2941 |
-
"loss": 1.9448,
|
2942 |
-
"step": 83800
|
2943 |
-
},
|
2944 |
-
{
|
2945 |
-
"epoch": 0.058420517842252086,
|
2946 |
-
"grad_norm": 6.202853202819824,
|
2947 |
-
"learning_rate": 1.9970546710201236e-05,
|
2948 |
-
"loss": 1.9763,
|
2949 |
-
"step": 84000
|
2950 |
-
},
|
2951 |
-
{
|
2952 |
-
"epoch": 0.058559614313305065,
|
2953 |
-
"grad_norm": 4.391757965087891,
|
2954 |
-
"learning_rate": 1.9970406388496907e-05,
|
2955 |
-
"loss": 1.8496,
|
2956 |
-
"step": 84200
|
2957 |
-
},
|
2958 |
-
{
|
2959 |
-
"epoch": 0.058698710784358045,
|
2960 |
-
"grad_norm": 7.166893482208252,
|
2961 |
-
"learning_rate": 1.9970265734033377e-05,
|
2962 |
-
"loss": 1.9595,
|
2963 |
-
"step": 84400
|
2964 |
-
},
|
2965 |
-
{
|
2966 |
-
"epoch": 0.058837807255411025,
|
2967 |
-
"grad_norm": 5.879612922668457,
|
2968 |
-
"learning_rate": 1.997012474681736e-05,
|
2969 |
-
"loss": 1.9344,
|
2970 |
-
"step": 84600
|
2971 |
-
},
|
2972 |
-
{
|
2973 |
-
"epoch": 0.05897690372646401,
|
2974 |
-
"grad_norm": 4.643247127532959,
|
2975 |
-
"learning_rate": 1.9969983426855583e-05,
|
2976 |
-
"loss": 1.936,
|
2977 |
-
"step": 84800
|
2978 |
-
},
|
2979 |
-
{
|
2980 |
-
"epoch": 0.05911600019751699,
|
2981 |
-
"grad_norm": 3.9626338481903076,
|
2982 |
-
"learning_rate": 1.9969841774154797e-05,
|
2983 |
-
"loss": 1.951,
|
2984 |
-
"step": 85000
|
2985 |
-
},
|
2986 |
-
{
|
2987 |
-
"epoch": 0.05925509666856997,
|
2988 |
-
"grad_norm": 6.601691246032715,
|
2989 |
-
"learning_rate": 1.9969699788721763e-05,
|
2990 |
-
"loss": 1.9287,
|
2991 |
-
"step": 85200
|
2992 |
-
},
|
2993 |
-
{
|
2994 |
-
"epoch": 0.05939419313962295,
|
2995 |
-
"grad_norm": 4.563479423522949,
|
2996 |
-
"learning_rate": 1.9969557470563257e-05,
|
2997 |
-
"loss": 1.8866,
|
2998 |
-
"step": 85400
|
2999 |
-
},
|
3000 |
-
{
|
3001 |
-
"epoch": 0.05953328961067593,
|
3002 |
-
"grad_norm": 8.620516777038574,
|
3003 |
-
"learning_rate": 1.9969414819686076e-05,
|
3004 |
-
"loss": 1.914,
|
3005 |
-
"step": 85600
|
3006 |
-
},
|
3007 |
-
{
|
3008 |
-
"epoch": 0.059672386081728916,
|
3009 |
-
"grad_norm": 7.413761615753174,
|
3010 |
-
"learning_rate": 1.996927183609703e-05,
|
3011 |
-
"loss": 1.9449,
|
3012 |
-
"step": 85800
|
3013 |
-
},
|
3014 |
-
{
|
3015 |
-
"epoch": 0.059811482552781896,
|
3016 |
-
"grad_norm": 6.8260016441345215,
|
3017 |
-
"learning_rate": 1.9969128519802942e-05,
|
3018 |
-
"loss": 1.9363,
|
3019 |
-
"step": 86000
|
3020 |
-
},
|
3021 |
-
{
|
3022 |
-
"epoch": 0.059950579023834875,
|
3023 |
-
"grad_norm": 4.007237434387207,
|
3024 |
-
"learning_rate": 1.9968984870810654e-05,
|
3025 |
-
"loss": 1.9422,
|
3026 |
-
"step": 86200
|
3027 |
-
},
|
3028 |
-
{
|
3029 |
-
"epoch": 0.060089675494887855,
|
3030 |
-
"grad_norm": 3.5244007110595703,
|
3031 |
-
"learning_rate": 1.9968840889127022e-05,
|
3032 |
-
"loss": 1.9647,
|
3033 |
-
"step": 86400
|
3034 |
-
},
|
3035 |
-
{
|
3036 |
-
"epoch": 0.060228771965940835,
|
3037 |
-
"grad_norm": 5.591712474822998,
|
3038 |
-
"learning_rate": 1.996869657475893e-05,
|
3039 |
-
"loss": 1.9003,
|
3040 |
-
"step": 86600
|
3041 |
-
},
|
3042 |
-
{
|
3043 |
-
"epoch": 0.06036786843699382,
|
3044 |
-
"grad_norm": 6.790228366851807,
|
3045 |
-
"learning_rate": 1.9968551927713252e-05,
|
3046 |
-
"loss": 1.9424,
|
3047 |
-
"step": 86800
|
3048 |
-
},
|
3049 |
-
{
|
3050 |
-
"epoch": 0.0605069649080468,
|
3051 |
-
"grad_norm": 7.662969589233398,
|
3052 |
-
"learning_rate": 1.9968406947996906e-05,
|
3053 |
-
"loss": 1.9268,
|
3054 |
-
"step": 87000
|
3055 |
-
},
|
3056 |
-
{
|
3057 |
-
"epoch": 0.06064606137909978,
|
3058 |
-
"grad_norm": 7.937498092651367,
|
3059 |
-
"learning_rate": 1.9968261635616807e-05,
|
3060 |
-
"loss": 1.9033,
|
3061 |
-
"step": 87200
|
3062 |
-
},
|
3063 |
-
{
|
3064 |
-
"epoch": 0.06078515785015276,
|
3065 |
-
"grad_norm": 3.6962380409240723,
|
3066 |
-
"learning_rate": 1.9968115990579892e-05,
|
3067 |
-
"loss": 1.9427,
|
3068 |
-
"step": 87400
|
3069 |
-
},
|
3070 |
-
{
|
3071 |
-
"epoch": 0.06092425432120575,
|
3072 |
-
"grad_norm": 8.26564884185791,
|
3073 |
-
"learning_rate": 1.996797001289312e-05,
|
3074 |
-
"loss": 1.9067,
|
3075 |
-
"step": 87600
|
3076 |
-
},
|
3077 |
-
{
|
3078 |
-
"epoch": 0.061063350792258726,
|
3079 |
-
"grad_norm": 6.891269683837891,
|
3080 |
-
"learning_rate": 1.996782370256345e-05,
|
3081 |
-
"loss": 1.8875,
|
3082 |
-
"step": 87800
|
3083 |
-
},
|
3084 |
-
{
|
3085 |
-
"epoch": 0.061202447263311706,
|
3086 |
-
"grad_norm": 3.3418822288513184,
|
3087 |
-
"learning_rate": 1.996767705959787e-05,
|
3088 |
-
"loss": 2.0397,
|
3089 |
-
"step": 88000
|
3090 |
-
},
|
3091 |
-
{
|
3092 |
-
"epoch": 0.061341543734364685,
|
3093 |
-
"grad_norm": 4.370149612426758,
|
3094 |
-
"learning_rate": 1.9967530084003388e-05,
|
3095 |
-
"loss": 1.976,
|
3096 |
-
"step": 88200
|
3097 |
-
},
|
3098 |
-
{
|
3099 |
-
"epoch": 0.061480640205417665,
|
3100 |
-
"grad_norm": 6.564484596252441,
|
3101 |
-
"learning_rate": 1.9967382775787013e-05,
|
3102 |
-
"loss": 1.9072,
|
3103 |
-
"step": 88400
|
3104 |
-
},
|
3105 |
-
{
|
3106 |
-
"epoch": 0.06161973667647065,
|
3107 |
-
"grad_norm": 4.912674903869629,
|
3108 |
-
"learning_rate": 1.9967235134955777e-05,
|
3109 |
-
"loss": 1.9404,
|
3110 |
-
"step": 88600
|
3111 |
-
},
|
3112 |
-
{
|
3113 |
-
"epoch": 0.06175883314752363,
|
3114 |
-
"grad_norm": 7.586604118347168,
|
3115 |
-
"learning_rate": 1.996708716151673e-05,
|
3116 |
-
"loss": 1.9895,
|
3117 |
-
"step": 88800
|
3118 |
-
},
|
3119 |
-
{
|
3120 |
-
"epoch": 0.06189792961857661,
|
3121 |
-
"grad_norm": 4.773323059082031,
|
3122 |
-
"learning_rate": 1.996693885547694e-05,
|
3123 |
-
"loss": 1.9679,
|
3124 |
-
"step": 89000
|
3125 |
-
},
|
3126 |
-
{
|
3127 |
-
"epoch": 0.06203702608962959,
|
3128 |
-
"grad_norm": 3.4076714515686035,
|
3129 |
-
"learning_rate": 1.9966790216843476e-05,
|
3130 |
-
"loss": 1.9246,
|
3131 |
-
"step": 89200
|
3132 |
-
},
|
3133 |
-
{
|
3134 |
-
"epoch": 0.06217612256068258,
|
3135 |
-
"grad_norm": 4.651917934417725,
|
3136 |
-
"learning_rate": 1.996664124562345e-05,
|
3137 |
-
"loss": 1.9442,
|
3138 |
-
"step": 89400
|
3139 |
-
},
|
3140 |
-
{
|
3141 |
-
"epoch": 0.06231521903173556,
|
3142 |
-
"grad_norm": 5.476116180419922,
|
3143 |
-
"learning_rate": 1.9966491941823962e-05,
|
3144 |
-
"loss": 1.9255,
|
3145 |
-
"step": 89600
|
3146 |
-
},
|
3147 |
-
{
|
3148 |
-
"epoch": 0.062454315502788536,
|
3149 |
-
"grad_norm": 5.208308696746826,
|
3150 |
-
"learning_rate": 1.996634230545214e-05,
|
3151 |
-
"loss": 1.9196,
|
3152 |
-
"step": 89800
|
3153 |
-
},
|
3154 |
-
{
|
3155 |
-
"epoch": 0.06259341197384152,
|
3156 |
-
"grad_norm": 5.869366645812988,
|
3157 |
-
"learning_rate": 1.9966192336515128e-05,
|
3158 |
-
"loss": 1.9648,
|
3159 |
-
"step": 90000
|
3160 |
-
},
|
3161 |
-
{
|
3162 |
-
"epoch": 0.0627325084448945,
|
3163 |
-
"grad_norm": 4.808291912078857,
|
3164 |
-
"learning_rate": 1.9966042035020093e-05,
|
3165 |
-
"loss": 1.9309,
|
3166 |
-
"step": 90200
|
3167 |
-
},
|
3168 |
-
{
|
3169 |
-
"epoch": 0.06287160491594748,
|
3170 |
-
"grad_norm": 4.214413166046143,
|
3171 |
-
"learning_rate": 1.9965891400974203e-05,
|
3172 |
-
"loss": 1.9012,
|
3173 |
-
"step": 90400
|
3174 |
-
},
|
3175 |
-
{
|
3176 |
-
"epoch": 0.06301070138700046,
|
3177 |
-
"grad_norm": 5.237168312072754,
|
3178 |
-
"learning_rate": 1.996574043438465e-05,
|
3179 |
-
"loss": 1.8358,
|
3180 |
-
"step": 90600
|
3181 |
-
},
|
3182 |
-
{
|
3183 |
-
"epoch": 0.06314979785805344,
|
3184 |
-
"grad_norm": 4.680624961853027,
|
3185 |
-
"learning_rate": 1.996558913525864e-05,
|
3186 |
-
"loss": 1.8669,
|
3187 |
-
"step": 90800
|
3188 |
-
},
|
3189 |
-
{
|
3190 |
-
"epoch": 0.06328889432910642,
|
3191 |
-
"grad_norm": 7.541216850280762,
|
3192 |
-
"learning_rate": 1.9965437503603396e-05,
|
3193 |
-
"loss": 1.9261,
|
3194 |
-
"step": 91000
|
3195 |
-
},
|
3196 |
-
{
|
3197 |
-
"epoch": 0.0634279908001594,
|
3198 |
-
"grad_norm": 5.40226936340332,
|
3199 |
-
"learning_rate": 1.996528553942616e-05,
|
3200 |
-
"loss": 1.9315,
|
3201 |
-
"step": 91200
|
3202 |
-
},
|
3203 |
-
{
|
3204 |
-
"epoch": 0.06356708727121238,
|
3205 |
-
"grad_norm": 5.167973518371582,
|
3206 |
-
"learning_rate": 1.9965133242734188e-05,
|
3207 |
-
"loss": 1.9366,
|
3208 |
-
"step": 91400
|
3209 |
-
},
|
3210 |
-
{
|
3211 |
-
"epoch": 0.06370618374226537,
|
3212 |
-
"grad_norm": 4.3975510597229,
|
3213 |
-
"learning_rate": 1.9964980613534744e-05,
|
3214 |
-
"loss": 1.93,
|
3215 |
-
"step": 91600
|
3216 |
-
},
|
3217 |
-
{
|
3218 |
-
"epoch": 0.06384528021331835,
|
3219 |
-
"grad_norm": 4.667608737945557,
|
3220 |
-
"learning_rate": 1.9964827651835115e-05,
|
3221 |
-
"loss": 1.9256,
|
3222 |
-
"step": 91800
|
3223 |
-
},
|
3224 |
-
{
|
3225 |
-
"epoch": 0.06398437668437133,
|
3226 |
-
"grad_norm": 5.848495960235596,
|
3227 |
-
"learning_rate": 1.9964674357642614e-05,
|
3228 |
-
"loss": 1.9243,
|
3229 |
-
"step": 92000
|
3230 |
-
},
|
3231 |
-
{
|
3232 |
-
"epoch": 0.06412347315542431,
|
3233 |
-
"grad_norm": 5.80579948425293,
|
3234 |
-
"learning_rate": 1.9964520730964544e-05,
|
3235 |
-
"loss": 1.9286,
|
3236 |
-
"step": 92200
|
3237 |
-
},
|
3238 |
-
{
|
3239 |
-
"epoch": 0.06426256962647729,
|
3240 |
-
"grad_norm": 4.185863971710205,
|
3241 |
-
"learning_rate": 1.9964366771808244e-05,
|
3242 |
-
"loss": 1.8637,
|
3243 |
-
"step": 92400
|
3244 |
-
},
|
3245 |
-
{
|
3246 |
-
"epoch": 0.06440166609753027,
|
3247 |
-
"grad_norm": 7.295627117156982,
|
3248 |
-
"learning_rate": 1.9964212480181067e-05,
|
3249 |
-
"loss": 1.9646,
|
3250 |
-
"step": 92600
|
3251 |
-
},
|
3252 |
-
{
|
3253 |
-
"epoch": 0.06454076256858325,
|
3254 |
-
"grad_norm": 4.519957542419434,
|
3255 |
-
"learning_rate": 1.9964057856090382e-05,
|
3256 |
-
"loss": 1.8934,
|
3257 |
-
"step": 92800
|
3258 |
-
},
|
3259 |
-
{
|
3260 |
-
"epoch": 0.06467985903963623,
|
3261 |
-
"grad_norm": 4.814670085906982,
|
3262 |
-
"learning_rate": 1.9963902899543565e-05,
|
3263 |
-
"loss": 1.8971,
|
3264 |
-
"step": 93000
|
3265 |
-
},
|
3266 |
-
{
|
3267 |
-
"epoch": 0.06481895551068921,
|
3268 |
-
"grad_norm": 4.680553913116455,
|
3269 |
-
"learning_rate": 1.996374761054801e-05,
|
3270 |
-
"loss": 1.972,
|
3271 |
-
"step": 93200
|
3272 |
-
},
|
3273 |
-
{
|
3274 |
-
"epoch": 0.06495805198174219,
|
3275 |
-
"grad_norm": 5.095190525054932,
|
3276 |
-
"learning_rate": 1.996359198911114e-05,
|
3277 |
-
"loss": 1.9197,
|
3278 |
-
"step": 93400
|
3279 |
-
},
|
3280 |
-
{
|
3281 |
-
"epoch": 0.06509714845279518,
|
3282 |
-
"grad_norm": 6.054664134979248,
|
3283 |
-
"learning_rate": 1.9963436035240377e-05,
|
3284 |
-
"loss": 1.9458,
|
3285 |
-
"step": 93600
|
3286 |
-
},
|
3287 |
-
{
|
3288 |
-
"epoch": 0.06523624492384816,
|
3289 |
-
"grad_norm": 3.4787888526916504,
|
3290 |
-
"learning_rate": 1.9963279748943166e-05,
|
3291 |
-
"loss": 1.9688,
|
3292 |
-
"step": 93800
|
3293 |
-
},
|
3294 |
-
{
|
3295 |
-
"epoch": 0.06537534139490114,
|
3296 |
-
"grad_norm": 4.545236587524414,
|
3297 |
-
"learning_rate": 1.996312313022697e-05,
|
3298 |
-
"loss": 1.9123,
|
3299 |
-
"step": 94000
|
3300 |
-
},
|
3301 |
-
{
|
3302 |
-
"epoch": 0.06551443786595412,
|
3303 |
-
"grad_norm": 4.842303276062012,
|
3304 |
-
"learning_rate": 1.9962966179099264e-05,
|
3305 |
-
"loss": 1.9129,
|
3306 |
-
"step": 94200
|
3307 |
-
},
|
3308 |
-
{
|
3309 |
-
"epoch": 0.0656535343370071,
|
3310 |
-
"grad_norm": 5.7866363525390625,
|
3311 |
-
"learning_rate": 1.9962808895567545e-05,
|
3312 |
-
"loss": 1.9368,
|
3313 |
-
"step": 94400
|
3314 |
-
},
|
3315 |
-
{
|
3316 |
-
"epoch": 0.06579263080806008,
|
3317 |
-
"grad_norm": 5.4849772453308105,
|
3318 |
-
"learning_rate": 1.996265127963932e-05,
|
3319 |
-
"loss": 1.9761,
|
3320 |
-
"step": 94600
|
3321 |
-
},
|
3322 |
-
{
|
3323 |
-
"epoch": 0.06593172727911306,
|
3324 |
-
"grad_norm": 5.275039196014404,
|
3325 |
-
"learning_rate": 1.996249333132211e-05,
|
3326 |
-
"loss": 1.9699,
|
3327 |
-
"step": 94800
|
3328 |
-
},
|
3329 |
-
{
|
3330 |
-
"epoch": 0.06607082375016604,
|
3331 |
-
"grad_norm": 4.070720672607422,
|
3332 |
-
"learning_rate": 1.996233505062346e-05,
|
3333 |
-
"loss": 1.933,
|
3334 |
-
"step": 95000
|
3335 |
-
},
|
3336 |
-
{
|
3337 |
-
"epoch": 0.06620992022121902,
|
3338 |
-
"grad_norm": 6.514541149139404,
|
3339 |
-
"learning_rate": 1.9962176437550923e-05,
|
3340 |
-
"loss": 1.9931,
|
3341 |
-
"step": 95200
|
3342 |
-
},
|
3343 |
-
{
|
3344 |
-
"epoch": 0.06634901669227201,
|
3345 |
-
"grad_norm": 4.82623291015625,
|
3346 |
-
"learning_rate": 1.9962017492112068e-05,
|
3347 |
-
"loss": 1.9025,
|
3348 |
-
"step": 95400
|
3349 |
-
},
|
3350 |
-
{
|
3351 |
-
"epoch": 0.066488113163325,
|
3352 |
-
"grad_norm": 5.900183200836182,
|
3353 |
-
"learning_rate": 1.996185821431449e-05,
|
3354 |
-
"loss": 1.843,
|
3355 |
-
"step": 95600
|
3356 |
-
},
|
3357 |
-
{
|
3358 |
-
"epoch": 0.06662720963437797,
|
3359 |
-
"grad_norm": 6.181861877441406,
|
3360 |
-
"learning_rate": 1.9961698604165786e-05,
|
3361 |
-
"loss": 1.9491,
|
3362 |
-
"step": 95800
|
3363 |
-
},
|
3364 |
-
{
|
3365 |
-
"epoch": 0.06676630610543095,
|
3366 |
-
"grad_norm": 3.4571354389190674,
|
3367 |
-
"learning_rate": 1.996153866167358e-05,
|
3368 |
-
"loss": 1.9481,
|
3369 |
-
"step": 96000
|
3370 |
-
},
|
3371 |
-
{
|
3372 |
-
"epoch": 0.06690540257648393,
|
3373 |
-
"grad_norm": 5.187012195587158,
|
3374 |
-
"learning_rate": 1.996137838684551e-05,
|
3375 |
-
"loss": 1.9242,
|
3376 |
-
"step": 96200
|
3377 |
-
},
|
3378 |
-
{
|
3379 |
-
"epoch": 0.06704449904753691,
|
3380 |
-
"grad_norm": 3.7097227573394775,
|
3381 |
-
"learning_rate": 1.9961217779689218e-05,
|
3382 |
-
"loss": 1.9269,
|
3383 |
-
"step": 96400
|
3384 |
-
},
|
3385 |
-
{
|
3386 |
-
"epoch": 0.06718359551858989,
|
3387 |
-
"grad_norm": 4.761998176574707,
|
3388 |
-
"learning_rate": 1.9961056840212385e-05,
|
3389 |
-
"loss": 1.8955,
|
3390 |
-
"step": 96600
|
3391 |
-
},
|
3392 |
-
{
|
3393 |
-
"epoch": 0.06732269198964287,
|
3394 |
-
"grad_norm": 4.146270751953125,
|
3395 |
-
"learning_rate": 1.9960895568422676e-05,
|
3396 |
-
"loss": 1.937,
|
3397 |
-
"step": 96800
|
3398 |
-
},
|
3399 |
-
{
|
3400 |
-
"epoch": 0.06746178846069585,
|
3401 |
-
"grad_norm": 5.371631145477295,
|
3402 |
-
"learning_rate": 1.9960733964327807e-05,
|
3403 |
-
"loss": 1.9091,
|
3404 |
-
"step": 97000
|
3405 |
-
},
|
3406 |
-
{
|
3407 |
-
"epoch": 0.06760088493174884,
|
3408 |
-
"grad_norm": 4.294393539428711,
|
3409 |
-
"learning_rate": 1.9960572027935482e-05,
|
3410 |
-
"loss": 1.9376,
|
3411 |
-
"step": 97200
|
3412 |
-
},
|
3413 |
-
{
|
3414 |
-
"epoch": 0.06773998140280182,
|
3415 |
-
"grad_norm": 5.409416675567627,
|
3416 |
-
"learning_rate": 1.9960409759253436e-05,
|
3417 |
-
"loss": 1.982,
|
3418 |
-
"step": 97400
|
3419 |
-
},
|
3420 |
-
{
|
3421 |
-
"epoch": 0.0678790778738548,
|
3422 |
-
"grad_norm": 4.644226551055908,
|
3423 |
-
"learning_rate": 1.996024715828942e-05,
|
3424 |
-
"loss": 1.9178,
|
3425 |
-
"step": 97600
|
3426 |
-
},
|
3427 |
-
{
|
3428 |
-
"epoch": 0.06801817434490778,
|
3429 |
-
"grad_norm": 4.132181644439697,
|
3430 |
-
"learning_rate": 1.9960084225051184e-05,
|
3431 |
-
"loss": 1.9435,
|
3432 |
-
"step": 97800
|
3433 |
-
},
|
3434 |
-
{
|
3435 |
-
"epoch": 0.06815727081596076,
|
3436 |
-
"grad_norm": 4.070497512817383,
|
3437 |
-
"learning_rate": 1.9959920959546515e-05,
|
3438 |
-
"loss": 1.8981,
|
3439 |
-
"step": 98000
|
3440 |
-
},
|
3441 |
-
{
|
3442 |
-
"epoch": 0.06829636728701374,
|
3443 |
-
"grad_norm": 5.847506046295166,
|
3444 |
-
"learning_rate": 1.9959757361783212e-05,
|
3445 |
-
"loss": 1.9199,
|
3446 |
-
"step": 98200
|
3447 |
-
},
|
3448 |
-
{
|
3449 |
-
"epoch": 0.06843546375806672,
|
3450 |
-
"grad_norm": 4.320737838745117,
|
3451 |
-
"learning_rate": 1.995959343176907e-05,
|
3452 |
-
"loss": 1.8637,
|
3453 |
-
"step": 98400
|
3454 |
-
},
|
3455 |
-
{
|
3456 |
-
"epoch": 0.0685745602291197,
|
3457 |
-
"grad_norm": 5.623171329498291,
|
3458 |
-
"learning_rate": 1.9959429169511926e-05,
|
3459 |
-
"loss": 1.9098,
|
3460 |
-
"step": 98600
|
3461 |
}
|
3462 |
],
|
3463 |
"logging_steps": 200,
|
@@ -3477,7 +2399,7 @@
|
|
3477 |
"attributes": {}
|
3478 |
}
|
3479 |
},
|
3480 |
-
"total_flos": 1.
|
3481 |
"train_batch_size": 1,
|
3482 |
"trial_name": null,
|
3483 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.04715370368696061,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 67800,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2380 |
"learning_rate": 1.998080708864831e-05,
|
2381 |
"loss": 1.9318,
|
2382 |
"step": 67800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2383 |
}
|
2384 |
],
|
2385 |
"logging_steps": 200,
|
|
|
2399 |
"attributes": {}
|
2400 |
}
|
2401 |
},
|
2402 |
+
"total_flos": 1.2273570791604634e+17,
|
2403 |
"train_batch_size": 1,
|
2404 |
"trial_name": null,
|
2405 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de8f5d7c6388c07217b7475e4a370096f3af94e2a444c9da31d132205474b950
|
3 |
size 6840
|