|
{ |
|
"best_metric": 1.403451919555664, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.02408405334617816, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00012042026673089081, |
|
"grad_norm": 51.93952178955078, |
|
"learning_rate": 1e-05, |
|
"loss": 8.9258, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00012042026673089081, |
|
"eval_loss": 2.869473695755005, |
|
"eval_runtime": 1613.5833, |
|
"eval_samples_per_second": 8.668, |
|
"eval_steps_per_second": 2.167, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00024084053346178162, |
|
"grad_norm": 60.6890869140625, |
|
"learning_rate": 2e-05, |
|
"loss": 9.6361, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00036126080019267245, |
|
"grad_norm": 49.853965759277344, |
|
"learning_rate": 3e-05, |
|
"loss": 9.3639, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00048168106692356323, |
|
"grad_norm": 44.821109771728516, |
|
"learning_rate": 4e-05, |
|
"loss": 9.3872, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0006021013336544541, |
|
"grad_norm": 52.650264739990234, |
|
"learning_rate": 5e-05, |
|
"loss": 8.6714, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0007225216003853449, |
|
"grad_norm": 48.42646408081055, |
|
"learning_rate": 6e-05, |
|
"loss": 8.5036, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0008429418671162356, |
|
"grad_norm": 41.25947952270508, |
|
"learning_rate": 7e-05, |
|
"loss": 7.4928, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0009633621338471265, |
|
"grad_norm": 35.24884033203125, |
|
"learning_rate": 8e-05, |
|
"loss": 7.9738, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0010837824005780173, |
|
"grad_norm": 56.005191802978516, |
|
"learning_rate": 9e-05, |
|
"loss": 7.6931, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0012042026673089081, |
|
"grad_norm": 55.710784912109375, |
|
"learning_rate": 0.0001, |
|
"loss": 8.2487, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.001324622934039799, |
|
"grad_norm": 30.49041748046875, |
|
"learning_rate": 9.999316524962345e-05, |
|
"loss": 7.5405, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0014450432007706898, |
|
"grad_norm": 27.38810157775879, |
|
"learning_rate": 9.997266286704631e-05, |
|
"loss": 7.4251, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0015654634675015804, |
|
"grad_norm": 33.649085998535156, |
|
"learning_rate": 9.993849845741524e-05, |
|
"loss": 7.4058, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0016858837342324713, |
|
"grad_norm": 25.100475311279297, |
|
"learning_rate": 9.989068136093873e-05, |
|
"loss": 7.6971, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.001806304000963362, |
|
"grad_norm": 29.93914031982422, |
|
"learning_rate": 9.98292246503335e-05, |
|
"loss": 5.9399, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.001926724267694253, |
|
"grad_norm": 46.186336517333984, |
|
"learning_rate": 9.975414512725057e-05, |
|
"loss": 6.8558, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0020471445344251438, |
|
"grad_norm": 25.15791130065918, |
|
"learning_rate": 9.966546331768191e-05, |
|
"loss": 6.7501, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0021675648011560346, |
|
"grad_norm": 31.10689353942871, |
|
"learning_rate": 9.956320346634876e-05, |
|
"loss": 7.1959, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0022879850678869254, |
|
"grad_norm": 28.72784423828125, |
|
"learning_rate": 9.944739353007344e-05, |
|
"loss": 6.7444, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0024084053346178163, |
|
"grad_norm": 30.710111618041992, |
|
"learning_rate": 9.931806517013612e-05, |
|
"loss": 7.9429, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.002528825601348707, |
|
"grad_norm": 25.784887313842773, |
|
"learning_rate": 9.917525374361912e-05, |
|
"loss": 7.3364, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.002649245868079598, |
|
"grad_norm": 28.78515625, |
|
"learning_rate": 9.901899829374047e-05, |
|
"loss": 6.82, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0027696661348104888, |
|
"grad_norm": 39.985713958740234, |
|
"learning_rate": 9.884934153917997e-05, |
|
"loss": 7.9684, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0028900864015413796, |
|
"grad_norm": 28.817001342773438, |
|
"learning_rate": 9.86663298624003e-05, |
|
"loss": 7.268, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.00301050666827227, |
|
"grad_norm": 29.924957275390625, |
|
"learning_rate": 9.847001329696653e-05, |
|
"loss": 7.3812, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.003130926935003161, |
|
"grad_norm": 25.788951873779297, |
|
"learning_rate": 9.826044551386744e-05, |
|
"loss": 7.6471, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0032513472017340517, |
|
"grad_norm": 41.178375244140625, |
|
"learning_rate": 9.803768380684242e-05, |
|
"loss": 7.2271, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0033717674684649425, |
|
"grad_norm": 44.785343170166016, |
|
"learning_rate": 9.780178907671789e-05, |
|
"loss": 8.1423, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0034921877351958334, |
|
"grad_norm": 61.3057861328125, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 7.6013, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.003612608001926724, |
|
"grad_norm": 38.59844970703125, |
|
"learning_rate": 9.729086208503174e-05, |
|
"loss": 7.5294, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.003733028268657615, |
|
"grad_norm": 39.695777893066406, |
|
"learning_rate": 9.701596950580806e-05, |
|
"loss": 6.7086, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.003853448535388506, |
|
"grad_norm": 21.75993537902832, |
|
"learning_rate": 9.672822322997305e-05, |
|
"loss": 6.1454, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.003973868802119396, |
|
"grad_norm": 23.870563507080078, |
|
"learning_rate": 9.642770192448536e-05, |
|
"loss": 5.9876, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0040942890688502875, |
|
"grad_norm": 20.87793731689453, |
|
"learning_rate": 9.611448774886924e-05, |
|
"loss": 5.3824, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.004214709335581178, |
|
"grad_norm": 18.55045509338379, |
|
"learning_rate": 9.578866633275288e-05, |
|
"loss": 4.9466, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.004335129602312069, |
|
"grad_norm": 19.357398986816406, |
|
"learning_rate": 9.545032675245813e-05, |
|
"loss": 5.4976, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.00445554986904296, |
|
"grad_norm": 19.688756942749023, |
|
"learning_rate": 9.509956150664796e-05, |
|
"loss": 5.8604, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.004575970135773851, |
|
"grad_norm": 27.641071319580078, |
|
"learning_rate": 9.473646649103818e-05, |
|
"loss": 5.2527, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.004696390402504741, |
|
"grad_norm": 25.47074317932129, |
|
"learning_rate": 9.43611409721806e-05, |
|
"loss": 5.385, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0048168106692356326, |
|
"grad_norm": 20.537351608276367, |
|
"learning_rate": 9.397368756032445e-05, |
|
"loss": 5.119, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.004937230935966523, |
|
"grad_norm": 21.654861450195312, |
|
"learning_rate": 9.357421218136386e-05, |
|
"loss": 4.6382, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.005057651202697414, |
|
"grad_norm": 22.191814422607422, |
|
"learning_rate": 9.316282404787871e-05, |
|
"loss": 5.1202, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.005178071469428305, |
|
"grad_norm": 19.029094696044922, |
|
"learning_rate": 9.273963562927695e-05, |
|
"loss": 5.2586, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.005298491736159196, |
|
"grad_norm": 18.765365600585938, |
|
"learning_rate": 9.230476262104677e-05, |
|
"loss": 4.8687, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.005418912002890086, |
|
"grad_norm": 17.537826538085938, |
|
"learning_rate": 9.185832391312644e-05, |
|
"loss": 4.6707, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0055393322696209776, |
|
"grad_norm": 24.50990867614746, |
|
"learning_rate": 9.140044155740101e-05, |
|
"loss": 5.1278, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.005659752536351868, |
|
"grad_norm": 25.02142906188965, |
|
"learning_rate": 9.093124073433463e-05, |
|
"loss": 5.2466, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.005780172803082759, |
|
"grad_norm": 22.66439437866211, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 5.0619, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.00590059306981365, |
|
"grad_norm": 22.0762939453125, |
|
"learning_rate": 8.995939984474624e-05, |
|
"loss": 5.0657, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.00602101333654454, |
|
"grad_norm": 25.61806297302246, |
|
"learning_rate": 8.945702546981969e-05, |
|
"loss": 4.1101, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.00602101333654454, |
|
"eval_loss": 1.7742722034454346, |
|
"eval_runtime": 1624.1807, |
|
"eval_samples_per_second": 8.612, |
|
"eval_steps_per_second": 2.153, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.006141433603275431, |
|
"grad_norm": 47.616703033447266, |
|
"learning_rate": 8.894386393810563e-05, |
|
"loss": 6.9972, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.006261853870006322, |
|
"grad_norm": 27.753664016723633, |
|
"learning_rate": 8.842005554284296e-05, |
|
"loss": 6.9823, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.006382274136737213, |
|
"grad_norm": 25.53302574157715, |
|
"learning_rate": 8.788574348801675e-05, |
|
"loss": 6.5077, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.006502694403468103, |
|
"grad_norm": 29.319658279418945, |
|
"learning_rate": 8.73410738492077e-05, |
|
"loss": 6.7296, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.006623114670198995, |
|
"grad_norm": 25.24061393737793, |
|
"learning_rate": 8.678619553365659e-05, |
|
"loss": 5.6474, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.006743534936929885, |
|
"grad_norm": 21.104772567749023, |
|
"learning_rate": 8.622126023955446e-05, |
|
"loss": 5.9952, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.006863955203660776, |
|
"grad_norm": 20.799846649169922, |
|
"learning_rate": 8.564642241456986e-05, |
|
"loss": 6.0229, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.006984375470391667, |
|
"grad_norm": 15.64926528930664, |
|
"learning_rate": 8.506183921362443e-05, |
|
"loss": 5.9788, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.007104795737122558, |
|
"grad_norm": 19.641931533813477, |
|
"learning_rate": 8.44676704559283e-05, |
|
"loss": 6.0515, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.007225216003853448, |
|
"grad_norm": 23.41942024230957, |
|
"learning_rate": 8.386407858128706e-05, |
|
"loss": 6.7682, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.00734563627058434, |
|
"grad_norm": 17.75417709350586, |
|
"learning_rate": 8.32512286056924e-05, |
|
"loss": 6.5946, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.00746605653731523, |
|
"grad_norm": 20.17172622680664, |
|
"learning_rate": 8.262928807620843e-05, |
|
"loss": 6.3011, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.007586476804046121, |
|
"grad_norm": 18.58847999572754, |
|
"learning_rate": 8.199842702516583e-05, |
|
"loss": 6.2966, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.007706897070777012, |
|
"grad_norm": 17.919443130493164, |
|
"learning_rate": 8.135881792367686e-05, |
|
"loss": 5.8059, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.007827317337507902, |
|
"grad_norm": 19.31273651123047, |
|
"learning_rate": 8.07106356344834e-05, |
|
"loss": 6.4417, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.007947737604238793, |
|
"grad_norm": 25.046363830566406, |
|
"learning_rate": 8.005405736415126e-05, |
|
"loss": 6.5747, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.008068157870969685, |
|
"grad_norm": 23.319303512573242, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 6.6744, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.008188578137700575, |
|
"grad_norm": 22.34912872314453, |
|
"learning_rate": 7.871643313414718e-05, |
|
"loss": 6.3949, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.008308998404431465, |
|
"grad_norm": 39.79279327392578, |
|
"learning_rate": 7.803575286758364e-05, |
|
"loss": 6.6698, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.008429418671162356, |
|
"grad_norm": 20.666879653930664, |
|
"learning_rate": 7.734740790612136e-05, |
|
"loss": 6.3727, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.008549838937893248, |
|
"grad_norm": 22.23097038269043, |
|
"learning_rate": 7.66515864363997e-05, |
|
"loss": 6.7035, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.008670259204624138, |
|
"grad_norm": 20.92945098876953, |
|
"learning_rate": 7.594847868906076e-05, |
|
"loss": 6.7143, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.008790679471355029, |
|
"grad_norm": 22.52288055419922, |
|
"learning_rate": 7.52382768867422e-05, |
|
"loss": 7.2588, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.00891109973808592, |
|
"grad_norm": 26.24117660522461, |
|
"learning_rate": 7.452117519152542e-05, |
|
"loss": 7.2383, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.009031520004816811, |
|
"grad_norm": 23.615114212036133, |
|
"learning_rate": 7.379736965185368e-05, |
|
"loss": 7.0135, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.009151940271547702, |
|
"grad_norm": 31.925701141357422, |
|
"learning_rate": 7.30670581489344e-05, |
|
"loss": 7.4073, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.009272360538278592, |
|
"grad_norm": 27.146015167236328, |
|
"learning_rate": 7.233044034264034e-05, |
|
"loss": 6.8409, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.009392780805009483, |
|
"grad_norm": 27.483409881591797, |
|
"learning_rate": 7.158771761692464e-05, |
|
"loss": 8.3212, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.009513201071740375, |
|
"grad_norm": 28.326284408569336, |
|
"learning_rate": 7.083909302476453e-05, |
|
"loss": 7.1594, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.009633621338471265, |
|
"grad_norm": 31.824556350708008, |
|
"learning_rate": 7.008477123264848e-05, |
|
"loss": 5.7829, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.009754041605202156, |
|
"grad_norm": 19.395477294921875, |
|
"learning_rate": 6.932495846462261e-05, |
|
"loss": 6.1882, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.009874461871933046, |
|
"grad_norm": 21.194475173950195, |
|
"learning_rate": 6.855986244591104e-05, |
|
"loss": 5.6866, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.009994882138663936, |
|
"grad_norm": 29.51766014099121, |
|
"learning_rate": 6.778969234612584e-05, |
|
"loss": 6.1513, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.010115302405394828, |
|
"grad_norm": 16.740205764770508, |
|
"learning_rate": 6.701465872208216e-05, |
|
"loss": 4.9738, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.010235722672125719, |
|
"grad_norm": 14.368413925170898, |
|
"learning_rate": 6.623497346023418e-05, |
|
"loss": 4.8067, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.01035614293885661, |
|
"grad_norm": 14.684015274047852, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 4.3561, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.0104765632055875, |
|
"grad_norm": 15.932342529296875, |
|
"learning_rate": 6.466250186922325e-05, |
|
"loss": 4.7409, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.010596983472318392, |
|
"grad_norm": 16.245309829711914, |
|
"learning_rate": 6.387014543809223e-05, |
|
"loss": 4.7687, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.010717403739049282, |
|
"grad_norm": 16.515871047973633, |
|
"learning_rate": 6.307399704769099e-05, |
|
"loss": 4.7443, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.010837824005780173, |
|
"grad_norm": 41.284793853759766, |
|
"learning_rate": 6.227427435703997e-05, |
|
"loss": 4.6509, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.010958244272511063, |
|
"grad_norm": 19.552005767822266, |
|
"learning_rate": 6.147119600233758e-05, |
|
"loss": 4.4044, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.011078664539241955, |
|
"grad_norm": 18.036300659179688, |
|
"learning_rate": 6.066498153718735e-05, |
|
"loss": 5.0476, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.011199084805972846, |
|
"grad_norm": 17.481563568115234, |
|
"learning_rate": 5.985585137257401e-05, |
|
"loss": 4.7952, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.011319505072703736, |
|
"grad_norm": 14.768298149108887, |
|
"learning_rate": 5.90440267166055e-05, |
|
"loss": 4.1727, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.011439925339434626, |
|
"grad_norm": 18.626522064208984, |
|
"learning_rate": 5.8229729514036705e-05, |
|
"loss": 5.0798, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.011560345606165518, |
|
"grad_norm": 16.772817611694336, |
|
"learning_rate": 5.74131823855921e-05, |
|
"loss": 4.5098, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.011680765872896409, |
|
"grad_norm": 17.050249099731445, |
|
"learning_rate": 5.6594608567103456e-05, |
|
"loss": 4.3496, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.0118011861396273, |
|
"grad_norm": 19.16489028930664, |
|
"learning_rate": 5.577423184847932e-05, |
|
"loss": 4.4216, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.01192160640635819, |
|
"grad_norm": 20.456478118896484, |
|
"learning_rate": 5.495227651252315e-05, |
|
"loss": 4.1681, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.01204202667308908, |
|
"grad_norm": 17.859432220458984, |
|
"learning_rate": 5.4128967273616625e-05, |
|
"loss": 4.5742, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01204202667308908, |
|
"eval_loss": 1.48580801486969, |
|
"eval_runtime": 1625.6909, |
|
"eval_samples_per_second": 8.604, |
|
"eval_steps_per_second": 2.151, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.012162446939819972, |
|
"grad_norm": 16.177490234375, |
|
"learning_rate": 5.330452921628497e-05, |
|
"loss": 5.4763, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.012282867206550863, |
|
"grad_norm": 16.521148681640625, |
|
"learning_rate": 5.247918773366112e-05, |
|
"loss": 5.9816, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.012403287473281753, |
|
"grad_norm": 14.81554126739502, |
|
"learning_rate": 5.165316846586541e-05, |
|
"loss": 5.6084, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.012523707740012643, |
|
"grad_norm": 13.736647605895996, |
|
"learning_rate": 5.0826697238317935e-05, |
|
"loss": 5.2882, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.012644128006743536, |
|
"grad_norm": 18.304468154907227, |
|
"learning_rate": 5e-05, |
|
"loss": 5.8812, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.012764548273474426, |
|
"grad_norm": 18.765226364135742, |
|
"learning_rate": 4.917330276168208e-05, |
|
"loss": 5.8377, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.012884968540205316, |
|
"grad_norm": 15.608325958251953, |
|
"learning_rate": 4.834683153413459e-05, |
|
"loss": 5.9864, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.013005388806936207, |
|
"grad_norm": 14.807425498962402, |
|
"learning_rate": 4.7520812266338885e-05, |
|
"loss": 5.8354, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.013125809073667099, |
|
"grad_norm": 17.752962112426758, |
|
"learning_rate": 4.669547078371504e-05, |
|
"loss": 6.3213, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.01324622934039799, |
|
"grad_norm": 16.0859432220459, |
|
"learning_rate": 4.5871032726383386e-05, |
|
"loss": 6.3305, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.01336664960712888, |
|
"grad_norm": 17.609752655029297, |
|
"learning_rate": 4.504772348747687e-05, |
|
"loss": 6.2805, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.01348706987385977, |
|
"grad_norm": 18.305526733398438, |
|
"learning_rate": 4.4225768151520694e-05, |
|
"loss": 6.1058, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.013607490140590662, |
|
"grad_norm": 15.66598129272461, |
|
"learning_rate": 4.3405391432896555e-05, |
|
"loss": 6.2276, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.013727910407321553, |
|
"grad_norm": 18.535301208496094, |
|
"learning_rate": 4.2586817614407895e-05, |
|
"loss": 6.6646, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.013848330674052443, |
|
"grad_norm": 20.5335693359375, |
|
"learning_rate": 4.17702704859633e-05, |
|
"loss": 6.6563, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.013968750940783333, |
|
"grad_norm": 18.792814254760742, |
|
"learning_rate": 4.095597328339452e-05, |
|
"loss": 6.7214, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.014089171207514224, |
|
"grad_norm": 18.318065643310547, |
|
"learning_rate": 4.0144148627425993e-05, |
|
"loss": 6.6231, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.014209591474245116, |
|
"grad_norm": 15.907029151916504, |
|
"learning_rate": 3.933501846281267e-05, |
|
"loss": 6.3682, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.014330011740976006, |
|
"grad_norm": 17.899784088134766, |
|
"learning_rate": 3.852880399766243e-05, |
|
"loss": 6.7449, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.014450432007706897, |
|
"grad_norm": 20.180458068847656, |
|
"learning_rate": 3.772572564296005e-05, |
|
"loss": 7.5801, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.014570852274437787, |
|
"grad_norm": 30.31849479675293, |
|
"learning_rate": 3.6926002952309016e-05, |
|
"loss": 7.2028, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.01469127254116868, |
|
"grad_norm": 21.378841400146484, |
|
"learning_rate": 3.612985456190778e-05, |
|
"loss": 7.0255, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.01481169280789957, |
|
"grad_norm": 18.160831451416016, |
|
"learning_rate": 3.533749813077677e-05, |
|
"loss": 7.235, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.01493211307463046, |
|
"grad_norm": 19.299564361572266, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 6.5939, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.01505253334136135, |
|
"grad_norm": 22.814464569091797, |
|
"learning_rate": 3.3765026539765834e-05, |
|
"loss": 7.6817, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.015172953608092243, |
|
"grad_norm": 19.72553253173828, |
|
"learning_rate": 3.298534127791785e-05, |
|
"loss": 7.2951, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.015293373874823133, |
|
"grad_norm": 27.33616828918457, |
|
"learning_rate": 3.221030765387417e-05, |
|
"loss": 8.9711, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.015413794141554023, |
|
"grad_norm": 17.395479202270508, |
|
"learning_rate": 3.144013755408895e-05, |
|
"loss": 5.0004, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.015534214408284914, |
|
"grad_norm": 23.039718627929688, |
|
"learning_rate": 3.0675041535377405e-05, |
|
"loss": 5.8222, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.015654634675015804, |
|
"grad_norm": 18.903642654418945, |
|
"learning_rate": 2.991522876735154e-05, |
|
"loss": 5.7666, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.015775054941746695, |
|
"grad_norm": 17.474811553955078, |
|
"learning_rate": 2.916090697523549e-05, |
|
"loss": 5.5946, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.015895475208477585, |
|
"grad_norm": 15.69929027557373, |
|
"learning_rate": 2.8412282383075363e-05, |
|
"loss": 5.485, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.01601589547520848, |
|
"grad_norm": 14.750075340270996, |
|
"learning_rate": 2.766955965735968e-05, |
|
"loss": 5.3102, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.01613631574193937, |
|
"grad_norm": 14.924285888671875, |
|
"learning_rate": 2.693294185106562e-05, |
|
"loss": 5.0332, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.01625673600867026, |
|
"grad_norm": 18.159025192260742, |
|
"learning_rate": 2.6202630348146324e-05, |
|
"loss": 5.2681, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.01637715627540115, |
|
"grad_norm": 17.145938873291016, |
|
"learning_rate": 2.547882480847461e-05, |
|
"loss": 4.6567, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.01649757654213204, |
|
"grad_norm": 18.082134246826172, |
|
"learning_rate": 2.476172311325783e-05, |
|
"loss": 4.8821, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.01661799680886293, |
|
"grad_norm": 16.63163185119629, |
|
"learning_rate": 2.405152131093926e-05, |
|
"loss": 5.1819, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.01673841707559382, |
|
"grad_norm": 13.199299812316895, |
|
"learning_rate": 2.3348413563600325e-05, |
|
"loss": 4.443, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.016858837342324712, |
|
"grad_norm": 12.945577621459961, |
|
"learning_rate": 2.2652592093878666e-05, |
|
"loss": 4.4436, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.016979257609055606, |
|
"grad_norm": 14.055652618408203, |
|
"learning_rate": 2.196424713241637e-05, |
|
"loss": 4.9036, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.017099677875786496, |
|
"grad_norm": 14.148738861083984, |
|
"learning_rate": 2.128356686585282e-05, |
|
"loss": 4.2584, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.017220098142517386, |
|
"grad_norm": 15.948631286621094, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 4.9077, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.017340518409248277, |
|
"grad_norm": 14.8971586227417, |
|
"learning_rate": 1.9945942635848748e-05, |
|
"loss": 5.1003, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.017460938675979167, |
|
"grad_norm": 17.40970802307129, |
|
"learning_rate": 1.928936436551661e-05, |
|
"loss": 4.8937, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.017581358942710058, |
|
"grad_norm": 14.42086410522461, |
|
"learning_rate": 1.8641182076323148e-05, |
|
"loss": 4.09, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.017701779209440948, |
|
"grad_norm": 13.88598346710205, |
|
"learning_rate": 1.800157297483417e-05, |
|
"loss": 4.2703, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.01782219947617184, |
|
"grad_norm": 17.1777286529541, |
|
"learning_rate": 1.7370711923791567e-05, |
|
"loss": 4.753, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.01794261974290273, |
|
"grad_norm": 22.961992263793945, |
|
"learning_rate": 1.6748771394307585e-05, |
|
"loss": 6.3804, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.018063040009633623, |
|
"grad_norm": 20.709712982177734, |
|
"learning_rate": 1.6135921418712956e-05, |
|
"loss": 5.3321, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.018063040009633623, |
|
"eval_loss": 1.4118815660476685, |
|
"eval_runtime": 1624.7275, |
|
"eval_samples_per_second": 8.609, |
|
"eval_steps_per_second": 2.152, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.018183460276364513, |
|
"grad_norm": 15.155057907104492, |
|
"learning_rate": 1.553232954407171e-05, |
|
"loss": 5.3383, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.018303880543095404, |
|
"grad_norm": 16.10227394104004, |
|
"learning_rate": 1.4938160786375572e-05, |
|
"loss": 5.5482, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.018424300809826294, |
|
"grad_norm": 14.932662010192871, |
|
"learning_rate": 1.435357758543015e-05, |
|
"loss": 5.9801, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.018544721076557184, |
|
"grad_norm": 14.543388366699219, |
|
"learning_rate": 1.3778739760445552e-05, |
|
"loss": 6.0454, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.018665141343288075, |
|
"grad_norm": 13.57608413696289, |
|
"learning_rate": 1.3213804466343421e-05, |
|
"loss": 5.3455, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.018785561610018965, |
|
"grad_norm": 14.566596984863281, |
|
"learning_rate": 1.2658926150792322e-05, |
|
"loss": 5.8565, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.018905981876749856, |
|
"grad_norm": 16.764638900756836, |
|
"learning_rate": 1.2114256511983274e-05, |
|
"loss": 6.2423, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.01902640214348075, |
|
"grad_norm": 13.570207595825195, |
|
"learning_rate": 1.157994445715706e-05, |
|
"loss": 5.244, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.01914682241021164, |
|
"grad_norm": 15.006532669067383, |
|
"learning_rate": 1.1056136061894384e-05, |
|
"loss": 5.8121, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.01926724267694253, |
|
"grad_norm": 16.283781051635742, |
|
"learning_rate": 1.0542974530180327e-05, |
|
"loss": 5.7995, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.01938766294367342, |
|
"grad_norm": 16.97859764099121, |
|
"learning_rate": 1.0040600155253765e-05, |
|
"loss": 6.0864, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.01950808321040431, |
|
"grad_norm": 16.730764389038086, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 6.1446, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.0196285034771352, |
|
"grad_norm": 21.55950164794922, |
|
"learning_rate": 9.068759265665384e-06, |
|
"loss": 6.9383, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.019748923743866092, |
|
"grad_norm": 15.633070945739746, |
|
"learning_rate": 8.599558442598998e-06, |
|
"loss": 5.7383, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.019869344010596982, |
|
"grad_norm": 15.562016487121582, |
|
"learning_rate": 8.141676086873572e-06, |
|
"loss": 6.0859, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.019989764277327873, |
|
"grad_norm": 16.343961715698242, |
|
"learning_rate": 7.695237378953223e-06, |
|
"loss": 6.5659, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.020110184544058766, |
|
"grad_norm": 15.342779159545898, |
|
"learning_rate": 7.260364370723044e-06, |
|
"loss": 5.9446, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.020230604810789657, |
|
"grad_norm": 17.29735565185547, |
|
"learning_rate": 6.837175952121306e-06, |
|
"loss": 6.8397, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.020351025077520547, |
|
"grad_norm": 18.98801612854004, |
|
"learning_rate": 6.425787818636131e-06, |
|
"loss": 7.2175, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.020471445344251438, |
|
"grad_norm": 18.11264419555664, |
|
"learning_rate": 6.026312439675552e-06, |
|
"loss": 7.3273, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.020591865610982328, |
|
"grad_norm": 17.66663932800293, |
|
"learning_rate": 5.6388590278194096e-06, |
|
"loss": 5.952, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.02071228587771322, |
|
"grad_norm": 16.35909652709961, |
|
"learning_rate": 5.263533508961827e-06, |
|
"loss": 6.7025, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.02083270614444411, |
|
"grad_norm": 19.418554306030273, |
|
"learning_rate": 4.900438493352055e-06, |
|
"loss": 7.5946, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.020953126411175, |
|
"grad_norm": 17.824628829956055, |
|
"learning_rate": 4.549673247541875e-06, |
|
"loss": 6.5499, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.021073546677905893, |
|
"grad_norm": 18.88226318359375, |
|
"learning_rate": 4.2113336672471245e-06, |
|
"loss": 7.4214, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.021193966944636784, |
|
"grad_norm": 21.682384490966797, |
|
"learning_rate": 3.885512251130763e-06, |
|
"loss": 7.1637, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.021314387211367674, |
|
"grad_norm": 23.528362274169922, |
|
"learning_rate": 3.5722980755146517e-06, |
|
"loss": 8.0835, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.021434807478098564, |
|
"grad_norm": 29.83986473083496, |
|
"learning_rate": 3.271776770026963e-06, |
|
"loss": 9.481, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.021555227744829455, |
|
"grad_norm": 19.354703903198242, |
|
"learning_rate": 2.9840304941919415e-06, |
|
"loss": 5.6199, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.021675648011560345, |
|
"grad_norm": 13.174699783325195, |
|
"learning_rate": 2.7091379149682685e-06, |
|
"loss": 4.2994, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.021796068278291236, |
|
"grad_norm": 22.209993362426758, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 5.7409, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.021916488545022126, |
|
"grad_norm": 13.499473571777344, |
|
"learning_rate": 2.1982109232821178e-06, |
|
"loss": 5.0113, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.022036908811753016, |
|
"grad_norm": 16.739299774169922, |
|
"learning_rate": 1.962316193157593e-06, |
|
"loss": 5.207, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.02215732907848391, |
|
"grad_norm": 14.593423843383789, |
|
"learning_rate": 1.7395544861325718e-06, |
|
"loss": 5.5588, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.0222777493452148, |
|
"grad_norm": 18.718555450439453, |
|
"learning_rate": 1.5299867030334814e-06, |
|
"loss": 5.8553, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.02239816961194569, |
|
"grad_norm": 14.348808288574219, |
|
"learning_rate": 1.333670137599713e-06, |
|
"loss": 5.3079, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.02251858987867658, |
|
"grad_norm": 13.264379501342773, |
|
"learning_rate": 1.1506584608200367e-06, |
|
"loss": 4.1346, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.022639010145407472, |
|
"grad_norm": 14.749774932861328, |
|
"learning_rate": 9.810017062595322e-07, |
|
"loss": 4.5342, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.022759430412138362, |
|
"grad_norm": 13.615583419799805, |
|
"learning_rate": 8.247462563808817e-07, |
|
"loss": 4.5825, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.022879850678869253, |
|
"grad_norm": 14.845542907714844, |
|
"learning_rate": 6.819348298638839e-07, |
|
"loss": 4.9798, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.023000270945600143, |
|
"grad_norm": 13.39441204071045, |
|
"learning_rate": 5.526064699265753e-07, |
|
"loss": 4.9815, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.023120691212331037, |
|
"grad_norm": 15.792435646057129, |
|
"learning_rate": 4.367965336512403e-07, |
|
"loss": 4.9431, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.023241111479061927, |
|
"grad_norm": 11.874963760375977, |
|
"learning_rate": 3.3453668231809286e-07, |
|
"loss": 4.2139, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.023361531745792818, |
|
"grad_norm": 13.765231132507324, |
|
"learning_rate": 2.458548727494292e-07, |
|
"loss": 4.555, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.023481952012523708, |
|
"grad_norm": 12.91434383392334, |
|
"learning_rate": 1.7077534966650766e-07, |
|
"loss": 4.3116, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.0236023722792546, |
|
"grad_norm": 15.871813774108887, |
|
"learning_rate": 1.0931863906127327e-07, |
|
"loss": 4.6468, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.02372279254598549, |
|
"grad_norm": 14.559955596923828, |
|
"learning_rate": 6.150154258476315e-08, |
|
"loss": 4.6366, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.02384321281271638, |
|
"grad_norm": 14.423089981079102, |
|
"learning_rate": 2.7337132953697554e-08, |
|
"loss": 4.4505, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.02396363307944727, |
|
"grad_norm": 16.981185913085938, |
|
"learning_rate": 6.834750376549792e-09, |
|
"loss": 4.2474, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.02408405334617816, |
|
"grad_norm": 15.380423545837402, |
|
"learning_rate": 0.0, |
|
"loss": 4.0541, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02408405334617816, |
|
"eval_loss": 1.403451919555664, |
|
"eval_runtime": 1622.9561, |
|
"eval_samples_per_second": 8.618, |
|
"eval_steps_per_second": 2.155, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.475451310300201e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|