|
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 97.62050030506407,
|
|
"eval_steps": 500,
|
|
"global_step": 20000,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.09762050030506407,
|
|
"grad_norm": 0.5707364082336426,
|
|
"learning_rate": 4e-05,
|
|
"loss": 2.2566,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.19524100061012814,
|
|
"grad_norm": 0.7393651008605957,
|
|
"learning_rate": 8e-05,
|
|
"loss": 2.203,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.2928615009151922,
|
|
"grad_norm": 0.8320680856704712,
|
|
"learning_rate": 0.00012,
|
|
"loss": 1.9533,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.3904820012202563,
|
|
"grad_norm": 1.2113606929779053,
|
|
"learning_rate": 0.00016,
|
|
"loss": 1.8878,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.4881025015253203,
|
|
"grad_norm": 1.4215683937072754,
|
|
"learning_rate": 0.0002,
|
|
"loss": 1.7688,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.5857230018303844,
|
|
"grad_norm": 1.8242409229278564,
|
|
"learning_rate": 0.0001998688524590164,
|
|
"loss": 1.7308,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.6833435021354485,
|
|
"grad_norm": 1.6889506578445435,
|
|
"learning_rate": 0.0001997377049180328,
|
|
"loss": 1.6553,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.7809640024405126,
|
|
"grad_norm": 2.1998350620269775,
|
|
"learning_rate": 0.00019960655737704918,
|
|
"loss": 1.5954,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.8785845027455765,
|
|
"grad_norm": 1.5626238584518433,
|
|
"learning_rate": 0.0001994754098360656,
|
|
"loss": 1.5732,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.9762050030506406,
|
|
"grad_norm": 2.350252389907837,
|
|
"learning_rate": 0.00019934426229508198,
|
|
"loss": 1.5001,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 1.0738255033557047,
|
|
"grad_norm": 1.7499576807022095,
|
|
"learning_rate": 0.00019921311475409837,
|
|
"loss": 1.5056,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 1.1714460036607688,
|
|
"grad_norm": 2.5555903911590576,
|
|
"learning_rate": 0.00019908196721311476,
|
|
"loss": 1.3946,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 1.2690665039658329,
|
|
"grad_norm": 2.44691801071167,
|
|
"learning_rate": 0.00019895081967213115,
|
|
"loss": 1.4672,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 1.366687004270897,
|
|
"grad_norm": 1.75604248046875,
|
|
"learning_rate": 0.00019881967213114757,
|
|
"loss": 1.4255,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 1.4643075045759608,
|
|
"grad_norm": 2.170323610305786,
|
|
"learning_rate": 0.00019868852459016393,
|
|
"loss": 1.3473,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 1.561928004881025,
|
|
"grad_norm": 2.3095695972442627,
|
|
"learning_rate": 0.00019855737704918035,
|
|
"loss": 1.3471,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 1.659548505186089,
|
|
"grad_norm": 2.668290138244629,
|
|
"learning_rate": 0.00019842622950819674,
|
|
"loss": 1.2861,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 1.757169005491153,
|
|
"grad_norm": 2.395897150039673,
|
|
"learning_rate": 0.00019829508196721313,
|
|
"loss": 1.2914,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 1.8547895057962172,
|
|
"grad_norm": 2.7138261795043945,
|
|
"learning_rate": 0.00019816393442622951,
|
|
"loss": 1.3455,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 1.9524100061012812,
|
|
"grad_norm": 2.28128981590271,
|
|
"learning_rate": 0.0001980327868852459,
|
|
"loss": 1.2907,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 2.0500305064063453,
|
|
"grad_norm": 2.284174919128418,
|
|
"learning_rate": 0.00019790163934426232,
|
|
"loss": 1.2074,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 2.1476510067114094,
|
|
"grad_norm": 2.7497994899749756,
|
|
"learning_rate": 0.00019777049180327868,
|
|
"loss": 1.2135,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 2.2452715070164735,
|
|
"grad_norm": 3.0772790908813477,
|
|
"learning_rate": 0.0001976393442622951,
|
|
"loss": 1.1622,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 2.3428920073215376,
|
|
"grad_norm": 2.3680825233459473,
|
|
"learning_rate": 0.0001975081967213115,
|
|
"loss": 1.2219,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 2.4405125076266017,
|
|
"grad_norm": 3.132049798965454,
|
|
"learning_rate": 0.00019737704918032788,
|
|
"loss": 1.2523,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 2.5381330079316657,
|
|
"grad_norm": 2.8542871475219727,
|
|
"learning_rate": 0.00019724590163934427,
|
|
"loss": 1.1163,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 2.63575350823673,
|
|
"grad_norm": 2.5308499336242676,
|
|
"learning_rate": 0.00019711475409836066,
|
|
"loss": 1.1624,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 2.733374008541794,
|
|
"grad_norm": 2.3678996562957764,
|
|
"learning_rate": 0.00019698360655737707,
|
|
"loss": 1.1856,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 2.830994508846858,
|
|
"grad_norm": 3.0295815467834473,
|
|
"learning_rate": 0.00019685245901639344,
|
|
"loss": 1.1193,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 2.9286150091519216,
|
|
"grad_norm": 3.7163286209106445,
|
|
"learning_rate": 0.00019672131147540985,
|
|
"loss": 1.1479,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 3.026235509456986,
|
|
"grad_norm": 2.840359926223755,
|
|
"learning_rate": 0.00019659016393442624,
|
|
"loss": 1.1554,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 3.1238560097620502,
|
|
"grad_norm": 2.579162836074829,
|
|
"learning_rate": 0.00019645901639344263,
|
|
"loss": 1.0088,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 3.221476510067114,
|
|
"grad_norm": 2.772268295288086,
|
|
"learning_rate": 0.00019632786885245902,
|
|
"loss": 1.0412,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 3.319097010372178,
|
|
"grad_norm": 3.2339484691619873,
|
|
"learning_rate": 0.0001961967213114754,
|
|
"loss": 1.043,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 3.416717510677242,
|
|
"grad_norm": 3.7858259677886963,
|
|
"learning_rate": 0.00019606557377049183,
|
|
"loss": 1.1187,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 3.514338010982306,
|
|
"grad_norm": 3.17150616645813,
|
|
"learning_rate": 0.00019593442622950822,
|
|
"loss": 1.0731,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 3.61195851128737,
|
|
"grad_norm": 2.9254567623138428,
|
|
"learning_rate": 0.0001958032786885246,
|
|
"loss": 1.0242,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 3.7095790115924343,
|
|
"grad_norm": 2.4400126934051514,
|
|
"learning_rate": 0.000195672131147541,
|
|
"loss": 1.0911,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 3.8071995118974984,
|
|
"grad_norm": 2.7202625274658203,
|
|
"learning_rate": 0.00019554098360655738,
|
|
"loss": 1.0451,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 3.9048200122025625,
|
|
"grad_norm": 3.0309741497039795,
|
|
"learning_rate": 0.00019540983606557377,
|
|
"loss": 1.0188,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 4.002440512507627,
|
|
"grad_norm": 4.291023254394531,
|
|
"learning_rate": 0.00019527868852459016,
|
|
"loss": 1.0858,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 4.100061012812691,
|
|
"grad_norm": 3.4548487663269043,
|
|
"learning_rate": 0.00019514754098360658,
|
|
"loss": 0.9553,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 4.197681513117755,
|
|
"grad_norm": 2.882427453994751,
|
|
"learning_rate": 0.00019501639344262297,
|
|
"loss": 0.9491,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 4.295302013422819,
|
|
"grad_norm": 3.2026467323303223,
|
|
"learning_rate": 0.00019488524590163936,
|
|
"loss": 0.9529,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 4.392922513727883,
|
|
"grad_norm": 2.676392078399658,
|
|
"learning_rate": 0.00019475409836065575,
|
|
"loss": 0.9296,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 4.490543014032947,
|
|
"grad_norm": 2.8527650833129883,
|
|
"learning_rate": 0.00019462295081967214,
|
|
"loss": 0.9826,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 4.588163514338011,
|
|
"grad_norm": 2.832995891571045,
|
|
"learning_rate": 0.00019449180327868855,
|
|
"loss": 0.9337,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 4.685784014643075,
|
|
"grad_norm": 2.9931800365448,
|
|
"learning_rate": 0.00019436065573770491,
|
|
"loss": 0.9455,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 4.783404514948139,
|
|
"grad_norm": 3.4157402515411377,
|
|
"learning_rate": 0.00019422950819672133,
|
|
"loss": 1.0286,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 4.881025015253203,
|
|
"grad_norm": 2.846972942352295,
|
|
"learning_rate": 0.00019409836065573772,
|
|
"loss": 0.9536,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 4.978645515558267,
|
|
"grad_norm": 3.4363763332366943,
|
|
"learning_rate": 0.0001939672131147541,
|
|
"loss": 0.967,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 5.0762660158633315,
|
|
"grad_norm": 3.347905397415161,
|
|
"learning_rate": 0.0001938360655737705,
|
|
"loss": 0.9291,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 5.173886516168396,
|
|
"grad_norm": 3.016284704208374,
|
|
"learning_rate": 0.0001937049180327869,
|
|
"loss": 0.8722,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 5.27150701647346,
|
|
"grad_norm": 3.5366976261138916,
|
|
"learning_rate": 0.0001935737704918033,
|
|
"loss": 0.8839,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 5.369127516778524,
|
|
"grad_norm": 3.550899028778076,
|
|
"learning_rate": 0.00019344262295081967,
|
|
"loss": 0.8386,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 5.466748017083588,
|
|
"grad_norm": 3.651362180709839,
|
|
"learning_rate": 0.00019331147540983608,
|
|
"loss": 0.8998,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 5.564368517388652,
|
|
"grad_norm": 2.9918806552886963,
|
|
"learning_rate": 0.00019318032786885247,
|
|
"loss": 0.8472,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 5.661989017693716,
|
|
"grad_norm": 3.7066636085510254,
|
|
"learning_rate": 0.00019304918032786886,
|
|
"loss": 0.8966,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 5.75960951799878,
|
|
"grad_norm": 3.1231799125671387,
|
|
"learning_rate": 0.00019291803278688525,
|
|
"loss": 0.8989,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 5.857230018303844,
|
|
"grad_norm": 3.0318100452423096,
|
|
"learning_rate": 0.00019278688524590164,
|
|
"loss": 0.9222,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 5.954850518608908,
|
|
"grad_norm": 3.3124337196350098,
|
|
"learning_rate": 0.00019265573770491806,
|
|
"loss": 0.8827,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 6.052471018913972,
|
|
"grad_norm": 3.1748123168945312,
|
|
"learning_rate": 0.00019252459016393442,
|
|
"loss": 0.845,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 6.150091519219036,
|
|
"grad_norm": 3.320768356323242,
|
|
"learning_rate": 0.00019239344262295084,
|
|
"loss": 0.7315,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 6.2477120195241005,
|
|
"grad_norm": 4.226164817810059,
|
|
"learning_rate": 0.00019226229508196723,
|
|
"loss": 0.7999,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 6.345332519829164,
|
|
"grad_norm": 3.12298583984375,
|
|
"learning_rate": 0.00019213114754098362,
|
|
"loss": 0.8337,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 6.442953020134228,
|
|
"grad_norm": 3.4633209705352783,
|
|
"learning_rate": 0.000192,
|
|
"loss": 0.7834,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 6.540573520439292,
|
|
"grad_norm": 3.116576671600342,
|
|
"learning_rate": 0.0001918688524590164,
|
|
"loss": 0.8386,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 6.638194020744356,
|
|
"grad_norm": 3.255690336227417,
|
|
"learning_rate": 0.0001917377049180328,
|
|
"loss": 0.902,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 6.73581452104942,
|
|
"grad_norm": 4.117364406585693,
|
|
"learning_rate": 0.00019160655737704917,
|
|
"loss": 0.7897,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 6.833435021354484,
|
|
"grad_norm": 3.813042163848877,
|
|
"learning_rate": 0.0001914754098360656,
|
|
"loss": 0.7975,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 6.931055521659548,
|
|
"grad_norm": 3.2562716007232666,
|
|
"learning_rate": 0.00019134426229508198,
|
|
"loss": 0.885,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 7.028676021964612,
|
|
"grad_norm": 3.0337297916412354,
|
|
"learning_rate": 0.00019121311475409837,
|
|
"loss": 0.7571,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 7.126296522269676,
|
|
"grad_norm": 3.4187958240509033,
|
|
"learning_rate": 0.00019108196721311476,
|
|
"loss": 0.7317,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 7.22391702257474,
|
|
"grad_norm": 3.2497494220733643,
|
|
"learning_rate": 0.00019095081967213115,
|
|
"loss": 0.6953,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 7.3215375228798045,
|
|
"grad_norm": 3.452319383621216,
|
|
"learning_rate": 0.00019081967213114756,
|
|
"loss": 0.7026,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 7.419158023184869,
|
|
"grad_norm": 4.487509250640869,
|
|
"learning_rate": 0.00019068852459016395,
|
|
"loss": 0.7552,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 7.516778523489933,
|
|
"grad_norm": 3.4719717502593994,
|
|
"learning_rate": 0.00019055737704918034,
|
|
"loss": 0.7426,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 7.614399023794997,
|
|
"grad_norm": 3.9266843795776367,
|
|
"learning_rate": 0.00019042622950819673,
|
|
"loss": 0.762,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 7.712019524100061,
|
|
"grad_norm": 3.0717546939849854,
|
|
"learning_rate": 0.00019029508196721312,
|
|
"loss": 0.7574,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 7.809640024405125,
|
|
"grad_norm": 3.793149948120117,
|
|
"learning_rate": 0.0001901639344262295,
|
|
"loss": 0.7817,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 7.907260524710189,
|
|
"grad_norm": 3.288499593734741,
|
|
"learning_rate": 0.0001900327868852459,
|
|
"loss": 0.7759,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 8.004881025015253,
|
|
"grad_norm": 2.6891720294952393,
|
|
"learning_rate": 0.00018990163934426232,
|
|
"loss": 0.8212,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 8.102501525320317,
|
|
"grad_norm": 3.331376075744629,
|
|
"learning_rate": 0.0001897704918032787,
|
|
"loss": 0.6659,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 8.200122025625381,
|
|
"grad_norm": 3.3468194007873535,
|
|
"learning_rate": 0.0001896393442622951,
|
|
"loss": 0.6458,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 8.297742525930445,
|
|
"grad_norm": 4.916980266571045,
|
|
"learning_rate": 0.00018950819672131148,
|
|
"loss": 0.6748,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 8.39536302623551,
|
|
"grad_norm": 3.4678823947906494,
|
|
"learning_rate": 0.00018937704918032787,
|
|
"loss": 0.6349,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 8.492983526540574,
|
|
"grad_norm": 4.540040493011475,
|
|
"learning_rate": 0.0001892459016393443,
|
|
"loss": 0.7256,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 8.590604026845638,
|
|
"grad_norm": 3.6092071533203125,
|
|
"learning_rate": 0.00018911475409836065,
|
|
"loss": 0.6708,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 8.688224527150702,
|
|
"grad_norm": 4.159832954406738,
|
|
"learning_rate": 0.00018898360655737707,
|
|
"loss": 0.6974,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 8.785845027455766,
|
|
"grad_norm": 3.980943202972412,
|
|
"learning_rate": 0.00018885245901639346,
|
|
"loss": 0.7268,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 8.88346552776083,
|
|
"grad_norm": 3.106861114501953,
|
|
"learning_rate": 0.00018872131147540985,
|
|
"loss": 0.7583,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 8.981086028065894,
|
|
"grad_norm": 2.672461748123169,
|
|
"learning_rate": 0.00018859016393442624,
|
|
"loss": 0.7199,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 9.078706528370958,
|
|
"grad_norm": 3.1480796337127686,
|
|
"learning_rate": 0.00018845901639344263,
|
|
"loss": 0.6583,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 9.176327028676022,
|
|
"grad_norm": 3.3770973682403564,
|
|
"learning_rate": 0.00018832786885245904,
|
|
"loss": 0.6317,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 9.273947528981086,
|
|
"grad_norm": 3.452083110809326,
|
|
"learning_rate": 0.0001881967213114754,
|
|
"loss": 0.6166,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 9.37156802928615,
|
|
"grad_norm": 3.6544485092163086,
|
|
"learning_rate": 0.00018806557377049182,
|
|
"loss": 0.6128,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 9.469188529591214,
|
|
"grad_norm": 3.462320566177368,
|
|
"learning_rate": 0.0001879344262295082,
|
|
"loss": 0.6442,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 9.566809029896278,
|
|
"grad_norm": 3.0532991886138916,
|
|
"learning_rate": 0.0001878032786885246,
|
|
"loss": 0.6631,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 9.664429530201343,
|
|
"grad_norm": 4.109907627105713,
|
|
"learning_rate": 0.000187672131147541,
|
|
"loss": 0.6634,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 9.762050030506407,
|
|
"grad_norm": 3.7715537548065186,
|
|
"learning_rate": 0.00018754098360655738,
|
|
"loss": 0.6004,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 9.85967053081147,
|
|
"grad_norm": 4.588443756103516,
|
|
"learning_rate": 0.0001874098360655738,
|
|
"loss": 0.6816,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 9.957291031116535,
|
|
"grad_norm": 3.2902603149414062,
|
|
"learning_rate": 0.00018727868852459016,
|
|
"loss": 0.682,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 10.054911531421599,
|
|
"grad_norm": 3.070920705795288,
|
|
"learning_rate": 0.00018714754098360657,
|
|
"loss": 0.6007,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 10.152532031726663,
|
|
"grad_norm": 3.761850357055664,
|
|
"learning_rate": 0.00018701639344262296,
|
|
"loss": 0.5478,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 10.250152532031727,
|
|
"grad_norm": 2.9650540351867676,
|
|
"learning_rate": 0.00018688524590163935,
|
|
"loss": 0.5792,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 10.347773032336791,
|
|
"grad_norm": 3.3296761512756348,
|
|
"learning_rate": 0.00018675409836065574,
|
|
"loss": 0.5849,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 10.445393532641855,
|
|
"grad_norm": 4.055027008056641,
|
|
"learning_rate": 0.00018662295081967213,
|
|
"loss": 0.5417,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 10.54301403294692,
|
|
"grad_norm": 3.581878662109375,
|
|
"learning_rate": 0.00018649180327868855,
|
|
"loss": 0.6285,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 10.640634533251983,
|
|
"grad_norm": 3.667356491088867,
|
|
"learning_rate": 0.00018636065573770494,
|
|
"loss": 0.5767,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 10.738255033557047,
|
|
"grad_norm": 4.004351615905762,
|
|
"learning_rate": 0.00018622950819672133,
|
|
"loss": 0.6339,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 10.835875533862112,
|
|
"grad_norm": 3.4327890872955322,
|
|
"learning_rate": 0.00018609836065573772,
|
|
"loss": 0.6258,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 10.933496034167176,
|
|
"grad_norm": 3.3382508754730225,
|
|
"learning_rate": 0.0001859672131147541,
|
|
"loss": 0.641,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 11.03111653447224,
|
|
"grad_norm": 2.9254095554351807,
|
|
"learning_rate": 0.0001858360655737705,
|
|
"loss": 0.5932,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 11.128737034777304,
|
|
"grad_norm": 4.12520694732666,
|
|
"learning_rate": 0.00018570491803278688,
|
|
"loss": 0.5007,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 11.226357535082368,
|
|
"grad_norm": 3.348695993423462,
|
|
"learning_rate": 0.0001855737704918033,
|
|
"loss": 0.4967,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 11.323978035387432,
|
|
"grad_norm": 3.62343430519104,
|
|
"learning_rate": 0.0001854426229508197,
|
|
"loss": 0.5312,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 11.421598535692496,
|
|
"grad_norm": 3.4885001182556152,
|
|
"learning_rate": 0.00018531147540983608,
|
|
"loss": 0.5675,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 11.51921903599756,
|
|
"grad_norm": 4.001424789428711,
|
|
"learning_rate": 0.00018518032786885247,
|
|
"loss": 0.5654,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 11.616839536302624,
|
|
"grad_norm": 3.180741310119629,
|
|
"learning_rate": 0.00018504918032786886,
|
|
"loss": 0.5473,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 11.714460036607688,
|
|
"grad_norm": 3.3385472297668457,
|
|
"learning_rate": 0.00018491803278688527,
|
|
"loss": 0.601,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 11.812080536912752,
|
|
"grad_norm": 3.1975746154785156,
|
|
"learning_rate": 0.00018478688524590164,
|
|
"loss": 0.559,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 11.909701037217816,
|
|
"grad_norm": 4.126131057739258,
|
|
"learning_rate": 0.00018465573770491805,
|
|
"loss": 0.517,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 12.00732153752288,
|
|
"grad_norm": 3.1047425270080566,
|
|
"learning_rate": 0.00018452459016393444,
|
|
"loss": 0.5853,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 12.104942037827945,
|
|
"grad_norm": 4.153341293334961,
|
|
"learning_rate": 0.0001843934426229508,
|
|
"loss": 0.4683,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 12.202562538133009,
|
|
"grad_norm": 3.550233840942383,
|
|
"learning_rate": 0.00018426229508196722,
|
|
"loss": 0.4802,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 12.300183038438073,
|
|
"grad_norm": 3.3238728046417236,
|
|
"learning_rate": 0.0001841311475409836,
|
|
"loss": 0.5219,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 12.397803538743137,
|
|
"grad_norm": 4.092185974121094,
|
|
"learning_rate": 0.00018400000000000003,
|
|
"loss": 0.4783,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 12.495424039048201,
|
|
"grad_norm": 3.4403531551361084,
|
|
"learning_rate": 0.0001838688524590164,
|
|
"loss": 0.5247,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 12.593044539353265,
|
|
"grad_norm": 4.026747226715088,
|
|
"learning_rate": 0.0001837377049180328,
|
|
"loss": 0.4838,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 12.690665039658327,
|
|
"grad_norm": 2.7140889167785645,
|
|
"learning_rate": 0.0001836065573770492,
|
|
"loss": 0.5417,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 12.788285539963393,
|
|
"grad_norm": 4.172866344451904,
|
|
"learning_rate": 0.00018347540983606558,
|
|
"loss": 0.5133,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 12.885906040268456,
|
|
"grad_norm": 4.674078464508057,
|
|
"learning_rate": 0.00018334426229508197,
|
|
"loss": 0.5093,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 12.98352654057352,
|
|
"grad_norm": 3.6748011112213135,
|
|
"learning_rate": 0.00018321311475409836,
|
|
"loss": 0.5365,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 13.081147040878584,
|
|
"grad_norm": 5.05755615234375,
|
|
"learning_rate": 0.00018308196721311478,
|
|
"loss": 0.4356,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 13.178767541183648,
|
|
"grad_norm": 3.9897286891937256,
|
|
"learning_rate": 0.00018295081967213114,
|
|
"loss": 0.4228,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 13.276388041488712,
|
|
"grad_norm": 3.8581581115722656,
|
|
"learning_rate": 0.00018281967213114756,
|
|
"loss": 0.4019,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 13.374008541793776,
|
|
"grad_norm": 3.3325164318084717,
|
|
"learning_rate": 0.00018268852459016395,
|
|
"loss": 0.4715,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 13.47162904209884,
|
|
"grad_norm": 3.3978488445281982,
|
|
"learning_rate": 0.00018255737704918034,
|
|
"loss": 0.4634,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 13.569249542403904,
|
|
"grad_norm": 3.491314172744751,
|
|
"learning_rate": 0.00018242622950819673,
|
|
"loss": 0.4942,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 13.666870042708968,
|
|
"grad_norm": 3.275783061981201,
|
|
"learning_rate": 0.00018229508196721312,
|
|
"loss": 0.4807,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 13.764490543014032,
|
|
"grad_norm": 4.8541717529296875,
|
|
"learning_rate": 0.00018216393442622953,
|
|
"loss": 0.4684,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 13.862111043319096,
|
|
"grad_norm": 4.203775882720947,
|
|
"learning_rate": 0.00018203278688524592,
|
|
"loss": 0.515,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 13.95973154362416,
|
|
"grad_norm": 3.885730504989624,
|
|
"learning_rate": 0.0001819016393442623,
|
|
"loss": 0.5263,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 14.057352043929225,
|
|
"grad_norm": 4.247331619262695,
|
|
"learning_rate": 0.0001817704918032787,
|
|
"loss": 0.4553,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 14.154972544234289,
|
|
"grad_norm": 4.659804344177246,
|
|
"learning_rate": 0.0001816393442622951,
|
|
"loss": 0.403,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 14.252593044539353,
|
|
"grad_norm": 3.525756359100342,
|
|
"learning_rate": 0.00018150819672131148,
|
|
"loss": 0.4128,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 14.350213544844417,
|
|
"grad_norm": 3.7535457611083984,
|
|
"learning_rate": 0.00018137704918032787,
|
|
"loss": 0.3969,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 14.44783404514948,
|
|
"grad_norm": 3.828665256500244,
|
|
"learning_rate": 0.00018124590163934429,
|
|
"loss": 0.4321,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 14.545454545454545,
|
|
"grad_norm": 3.7969393730163574,
|
|
"learning_rate": 0.00018111475409836067,
|
|
"loss": 0.4382,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 14.643075045759609,
|
|
"grad_norm": 4.983769416809082,
|
|
"learning_rate": 0.00018098360655737704,
|
|
"loss": 0.4656,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 14.740695546064673,
|
|
"grad_norm": 3.585157632827759,
|
|
"learning_rate": 0.00018085245901639345,
|
|
"loss": 0.4576,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 14.838316046369737,
|
|
"grad_norm": 3.859992265701294,
|
|
"learning_rate": 0.00018072131147540984,
|
|
"loss": 0.4712,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 14.935936546674801,
|
|
"grad_norm": 3.2431323528289795,
|
|
"learning_rate": 0.00018059016393442626,
|
|
"loss": 0.4526,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 15.033557046979865,
|
|
"grad_norm": 3.1578800678253174,
|
|
"learning_rate": 0.00018045901639344262,
|
|
"loss": 0.4129,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 15.13117754728493,
|
|
"grad_norm": 4.049219608306885,
|
|
"learning_rate": 0.00018032786885245904,
|
|
"loss": 0.3485,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 15.228798047589994,
|
|
"grad_norm": 3.642855167388916,
|
|
"learning_rate": 0.00018019672131147543,
|
|
"loss": 0.3607,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 15.326418547895058,
|
|
"grad_norm": 3.5239570140838623,
|
|
"learning_rate": 0.0001800655737704918,
|
|
"loss": 0.3844,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 15.424039048200122,
|
|
"grad_norm": 2.894887685775757,
|
|
"learning_rate": 0.0001799344262295082,
|
|
"loss": 0.4111,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 15.521659548505186,
|
|
"grad_norm": 3.1862661838531494,
|
|
"learning_rate": 0.0001798032786885246,
|
|
"loss": 0.4,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 15.61928004881025,
|
|
"grad_norm": 3.169221878051758,
|
|
"learning_rate": 0.000179672131147541,
|
|
"loss": 0.3914,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 15.716900549115314,
|
|
"grad_norm": 4.413252830505371,
|
|
"learning_rate": 0.00017954098360655737,
|
|
"loss": 0.4268,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 15.814521049420378,
|
|
"grad_norm": 4.1161208152771,
|
|
"learning_rate": 0.0001794098360655738,
|
|
"loss": 0.4261,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 15.912141549725442,
|
|
"grad_norm": 3.044705867767334,
|
|
"learning_rate": 0.00017927868852459018,
|
|
"loss": 0.4628,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 16.009762050030506,
|
|
"grad_norm": 3.2134783267974854,
|
|
"learning_rate": 0.00017914754098360657,
|
|
"loss": 0.4368,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 16.107382550335572,
|
|
"grad_norm": 3.8068933486938477,
|
|
"learning_rate": 0.00017901639344262296,
|
|
"loss": 0.2993,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 16.205003050640634,
|
|
"grad_norm": 4.097984313964844,
|
|
"learning_rate": 0.00017888524590163935,
|
|
"loss": 0.3458,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 16.3026235509457,
|
|
"grad_norm": 3.5351762771606445,
|
|
"learning_rate": 0.00017875409836065576,
|
|
"loss": 0.3449,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 16.400244051250763,
|
|
"grad_norm": 3.3672287464141846,
|
|
"learning_rate": 0.00017862295081967213,
|
|
"loss": 0.3708,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 16.49786455155583,
|
|
"grad_norm": 3.6430435180664062,
|
|
"learning_rate": 0.00017849180327868852,
|
|
"loss": 0.369,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 16.59548505186089,
|
|
"grad_norm": 4.193872928619385,
|
|
"learning_rate": 0.00017836065573770493,
|
|
"loss": 0.4063,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 16.693105552165953,
|
|
"grad_norm": 3.818737506866455,
|
|
"learning_rate": 0.00017822950819672132,
|
|
"loss": 0.3777,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 16.79072605247102,
|
|
"grad_norm": 3.7047250270843506,
|
|
"learning_rate": 0.0001780983606557377,
|
|
"loss": 0.3939,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 16.888346552776085,
|
|
"grad_norm": 4.005129337310791,
|
|
"learning_rate": 0.0001779672131147541,
|
|
"loss": 0.4125,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 16.985967053081147,
|
|
"grad_norm": 4.845798015594482,
|
|
"learning_rate": 0.00017783606557377052,
|
|
"loss": 0.428,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 17.08358755338621,
|
|
"grad_norm": 4.9272894859313965,
|
|
"learning_rate": 0.0001777049180327869,
|
|
"loss": 0.3169,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 17.181208053691275,
|
|
"grad_norm": 3.334076166152954,
|
|
"learning_rate": 0.00017757377049180327,
|
|
"loss": 0.336,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 17.278828553996338,
|
|
"grad_norm": 3.2157669067382812,
|
|
"learning_rate": 0.00017744262295081969,
|
|
"loss": 0.3612,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 17.376449054301403,
|
|
"grad_norm": 3.595738410949707,
|
|
"learning_rate": 0.00017731147540983607,
|
|
"loss": 0.3627,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 17.474069554606466,
|
|
"grad_norm": 4.483953475952148,
|
|
"learning_rate": 0.00017718032786885246,
|
|
"loss": 0.316,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 17.57169005491153,
|
|
"grad_norm": 3.636593818664551,
|
|
"learning_rate": 0.00017704918032786885,
|
|
"loss": 0.3751,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 17.669310555216594,
|
|
"grad_norm": 3.6285219192504883,
|
|
"learning_rate": 0.00017691803278688527,
|
|
"loss": 0.3695,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 17.76693105552166,
|
|
"grad_norm": 3.8501439094543457,
|
|
"learning_rate": 0.00017678688524590166,
|
|
"loss": 0.3525,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 17.864551555826722,
|
|
"grad_norm": 3.410341739654541,
|
|
"learning_rate": 0.00017665573770491802,
|
|
"loss": 0.3672,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 17.962172056131788,
|
|
"grad_norm": 3.972015380859375,
|
|
"learning_rate": 0.00017652459016393444,
|
|
"loss": 0.3551,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 18.05979255643685,
|
|
"grad_norm": 4.821686744689941,
|
|
"learning_rate": 0.00017639344262295083,
|
|
"loss": 0.334,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 18.157413056741916,
|
|
"grad_norm": 3.1545865535736084,
|
|
"learning_rate": 0.00017626229508196724,
|
|
"loss": 0.307,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 18.25503355704698,
|
|
"grad_norm": 3.6867642402648926,
|
|
"learning_rate": 0.0001761311475409836,
|
|
"loss": 0.2859,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 18.352654057352044,
|
|
"grad_norm": 3.3141942024230957,
|
|
"learning_rate": 0.00017600000000000002,
|
|
"loss": 0.341,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 18.450274557657107,
|
|
"grad_norm": 3.5395824909210205,
|
|
"learning_rate": 0.0001758688524590164,
|
|
"loss": 0.3412,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 18.547895057962172,
|
|
"grad_norm": 3.7669973373413086,
|
|
"learning_rate": 0.00017573770491803277,
|
|
"loss": 0.3216,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 18.645515558267235,
|
|
"grad_norm": 3.6290788650512695,
|
|
"learning_rate": 0.0001756065573770492,
|
|
"loss": 0.3222,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 18.7431360585723,
|
|
"grad_norm": 3.3772177696228027,
|
|
"learning_rate": 0.00017547540983606558,
|
|
"loss": 0.3301,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 18.840756558877363,
|
|
"grad_norm": 4.57468843460083,
|
|
"learning_rate": 0.000175344262295082,
|
|
"loss": 0.3289,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 18.93837705918243,
|
|
"grad_norm": 4.921297073364258,
|
|
"learning_rate": 0.00017521311475409836,
|
|
"loss": 0.3456,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 19.03599755948749,
|
|
"grad_norm": 3.4319562911987305,
|
|
"learning_rate": 0.00017508196721311475,
|
|
"loss": 0.3273,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 19.133618059792557,
|
|
"grad_norm": 3.177849054336548,
|
|
"learning_rate": 0.00017495081967213116,
|
|
"loss": 0.2475,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 19.23123856009762,
|
|
"grad_norm": 3.6201579570770264,
|
|
"learning_rate": 0.00017481967213114753,
|
|
"loss": 0.2825,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 19.328859060402685,
|
|
"grad_norm": 3.9348137378692627,
|
|
"learning_rate": 0.00017468852459016394,
|
|
"loss": 0.2999,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 19.426479560707747,
|
|
"grad_norm": 3.8892483711242676,
|
|
"learning_rate": 0.00017455737704918033,
|
|
"loss": 0.2946,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 19.524100061012813,
|
|
"grad_norm": 3.8046462535858154,
|
|
"learning_rate": 0.00017442622950819675,
|
|
"loss": 0.3273,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 19.621720561317876,
|
|
"grad_norm": 3.8215136528015137,
|
|
"learning_rate": 0.0001742950819672131,
|
|
"loss": 0.3057,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 19.71934106162294,
|
|
"grad_norm": 4.213250637054443,
|
|
"learning_rate": 0.0001741639344262295,
|
|
"loss": 0.3168,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 19.816961561928004,
|
|
"grad_norm": 4.45851469039917,
|
|
"learning_rate": 0.00017403278688524592,
|
|
"loss": 0.3483,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 19.91458206223307,
|
|
"grad_norm": 3.3840818405151367,
|
|
"learning_rate": 0.0001739016393442623,
|
|
"loss": 0.316,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 20.012202562538132,
|
|
"grad_norm": 2.586810350418091,
|
|
"learning_rate": 0.0001737704918032787,
|
|
"loss": 0.3167,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 20.109823062843198,
|
|
"grad_norm": 2.4695241451263428,
|
|
"learning_rate": 0.00017363934426229509,
|
|
"loss": 0.2693,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 20.20744356314826,
|
|
"grad_norm": 3.0270965099334717,
|
|
"learning_rate": 0.0001735081967213115,
|
|
"loss": 0.2699,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 20.305064063453326,
|
|
"grad_norm": 3.3233821392059326,
|
|
"learning_rate": 0.00017337704918032786,
|
|
"loss": 0.261,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 20.40268456375839,
|
|
"grad_norm": 3.30206561088562,
|
|
"learning_rate": 0.00017324590163934425,
|
|
"loss": 0.2843,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 20.500305064063454,
|
|
"grad_norm": 3.157876968383789,
|
|
"learning_rate": 0.00017311475409836067,
|
|
"loss": 0.2794,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 20.597925564368516,
|
|
"grad_norm": 2.9424281120300293,
|
|
"learning_rate": 0.00017298360655737706,
|
|
"loss": 0.3006,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 20.695546064673582,
|
|
"grad_norm": 4.470980167388916,
|
|
"learning_rate": 0.00017285245901639345,
|
|
"loss": 0.3001,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 20.793166564978645,
|
|
"grad_norm": 5.169465065002441,
|
|
"learning_rate": 0.00017272131147540984,
|
|
"loss": 0.2855,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 20.89078706528371,
|
|
"grad_norm": 4.266078472137451,
|
|
"learning_rate": 0.00017259016393442625,
|
|
"loss": 0.3039,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 20.988407565588773,
|
|
"grad_norm": 3.450538158416748,
|
|
"learning_rate": 0.00017245901639344264,
|
|
"loss": 0.3151,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 21.08602806589384,
|
|
"grad_norm": 3.0616092681884766,
|
|
"learning_rate": 0.000172327868852459,
|
|
"loss": 0.2414,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 21.1836485661989,
|
|
"grad_norm": 2.623687267303467,
|
|
"learning_rate": 0.00017219672131147542,
|
|
"loss": 0.2257,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 21.281269066503967,
|
|
"grad_norm": 3.4922776222229004,
|
|
"learning_rate": 0.0001720655737704918,
|
|
"loss": 0.2405,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 21.37888956680903,
|
|
"grad_norm": 3.7959911823272705,
|
|
"learning_rate": 0.0001719344262295082,
|
|
"loss": 0.2732,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 21.476510067114095,
|
|
"grad_norm": 3.0746493339538574,
|
|
"learning_rate": 0.0001718032786885246,
|
|
"loss": 0.2592,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 21.574130567419157,
|
|
"grad_norm": 4.570422649383545,
|
|
"learning_rate": 0.00017167213114754098,
|
|
"loss": 0.2742,
|
|
"step": 4420
|
|
},
|
|
{
|
|
"epoch": 21.671751067724223,
|
|
"grad_norm": 4.301755905151367,
|
|
"learning_rate": 0.0001715409836065574,
|
|
"loss": 0.2843,
|
|
"step": 4440
|
|
},
|
|
{
|
|
"epoch": 21.769371568029285,
|
|
"grad_norm": 3.110278367996216,
|
|
"learning_rate": 0.00017140983606557376,
|
|
"loss": 0.2863,
|
|
"step": 4460
|
|
},
|
|
{
|
|
"epoch": 21.86699206833435,
|
|
"grad_norm": 4.253392219543457,
|
|
"learning_rate": 0.00017127868852459018,
|
|
"loss": 0.2968,
|
|
"step": 4480
|
|
},
|
|
{
|
|
"epoch": 21.964612568639414,
|
|
"grad_norm": 3.6689677238464355,
|
|
"learning_rate": 0.00017114754098360656,
|
|
"loss": 0.2918,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 22.06223306894448,
|
|
"grad_norm": 3.50288462638855,
|
|
"learning_rate": 0.00017101639344262298,
|
|
"loss": 0.2567,
|
|
"step": 4520
|
|
},
|
|
{
|
|
"epoch": 22.15985356924954,
|
|
"grad_norm": 3.2581892013549805,
|
|
"learning_rate": 0.00017088524590163934,
|
|
"loss": 0.2029,
|
|
"step": 4540
|
|
},
|
|
{
|
|
"epoch": 22.257474069554608,
|
|
"grad_norm": 3.4691686630249023,
|
|
"learning_rate": 0.00017075409836065573,
|
|
"loss": 0.2299,
|
|
"step": 4560
|
|
},
|
|
{
|
|
"epoch": 22.35509456985967,
|
|
"grad_norm": 4.3092427253723145,
|
|
"learning_rate": 0.00017062295081967215,
|
|
"loss": 0.2578,
|
|
"step": 4580
|
|
},
|
|
{
|
|
"epoch": 22.452715070164736,
|
|
"grad_norm": 3.8213038444519043,
|
|
"learning_rate": 0.0001704918032786885,
|
|
"loss": 0.2766,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 22.550335570469798,
|
|
"grad_norm": 3.121110200881958,
|
|
"learning_rate": 0.00017036065573770493,
|
|
"loss": 0.2612,
|
|
"step": 4620
|
|
},
|
|
{
|
|
"epoch": 22.647956070774864,
|
|
"grad_norm": 2.7411513328552246,
|
|
"learning_rate": 0.00017022950819672132,
|
|
"loss": 0.2459,
|
|
"step": 4640
|
|
},
|
|
{
|
|
"epoch": 22.745576571079926,
|
|
"grad_norm": 3.85884428024292,
|
|
"learning_rate": 0.00017009836065573773,
|
|
"loss": 0.2757,
|
|
"step": 4660
|
|
},
|
|
{
|
|
"epoch": 22.843197071384992,
|
|
"grad_norm": 3.128300189971924,
|
|
"learning_rate": 0.0001699672131147541,
|
|
"loss": 0.2498,
|
|
"step": 4680
|
|
},
|
|
{
|
|
"epoch": 22.940817571690054,
|
|
"grad_norm": 2.7686431407928467,
|
|
"learning_rate": 0.00016983606557377049,
|
|
"loss": 0.2608,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 23.03843807199512,
|
|
"grad_norm": 3.427732229232788,
|
|
"learning_rate": 0.0001697049180327869,
|
|
"loss": 0.2449,
|
|
"step": 4720
|
|
},
|
|
{
|
|
"epoch": 23.136058572300183,
|
|
"grad_norm": 3.1296660900115967,
|
|
"learning_rate": 0.0001695737704918033,
|
|
"loss": 0.2177,
|
|
"step": 4740
|
|
},
|
|
{
|
|
"epoch": 23.23367907260525,
|
|
"grad_norm": 3.503549575805664,
|
|
"learning_rate": 0.00016944262295081968,
|
|
"loss": 0.2174,
|
|
"step": 4760
|
|
},
|
|
{
|
|
"epoch": 23.33129957291031,
|
|
"grad_norm": 3.4126474857330322,
|
|
"learning_rate": 0.00016931147540983607,
|
|
"loss": 0.2354,
|
|
"step": 4780
|
|
},
|
|
{
|
|
"epoch": 23.428920073215377,
|
|
"grad_norm": 3.743910074234009,
|
|
"learning_rate": 0.00016918032786885249,
|
|
"loss": 0.2471,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 23.52654057352044,
|
|
"grad_norm": 3.852505683898926,
|
|
"learning_rate": 0.00016904918032786885,
|
|
"loss": 0.2558,
|
|
"step": 4820
|
|
},
|
|
{
|
|
"epoch": 23.624161073825505,
|
|
"grad_norm": 3.120332956314087,
|
|
"learning_rate": 0.00016891803278688524,
|
|
"loss": 0.2273,
|
|
"step": 4840
|
|
},
|
|
{
|
|
"epoch": 23.721781574130567,
|
|
"grad_norm": 3.634793758392334,
|
|
"learning_rate": 0.00016878688524590165,
|
|
"loss": 0.2452,
|
|
"step": 4860
|
|
},
|
|
{
|
|
"epoch": 23.819402074435633,
|
|
"grad_norm": 3.3393948078155518,
|
|
"learning_rate": 0.00016865573770491804,
|
|
"loss": 0.2503,
|
|
"step": 4880
|
|
},
|
|
{
|
|
"epoch": 23.917022574740695,
|
|
"grad_norm": 2.6347219944000244,
|
|
"learning_rate": 0.00016852459016393443,
|
|
"loss": 0.2315,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 24.01464307504576,
|
|
"grad_norm": 2.7050418853759766,
|
|
"learning_rate": 0.00016839344262295082,
|
|
"loss": 0.2638,
|
|
"step": 4920
|
|
},
|
|
{
|
|
"epoch": 24.112263575350823,
|
|
"grad_norm": 2.6256654262542725,
|
|
"learning_rate": 0.0001682622950819672,
|
|
"loss": 0.2156,
|
|
"step": 4940
|
|
},
|
|
{
|
|
"epoch": 24.20988407565589,
|
|
"grad_norm": 3.150125741958618,
|
|
"learning_rate": 0.00016813114754098363,
|
|
"loss": 0.2109,
|
|
"step": 4960
|
|
},
|
|
{
|
|
"epoch": 24.30750457596095,
|
|
"grad_norm": 2.8489086627960205,
|
|
"learning_rate": 0.000168,
|
|
"loss": 0.2092,
|
|
"step": 4980
|
|
},
|
|
{
|
|
"epoch": 24.405125076266017,
|
|
"grad_norm": 2.6634905338287354,
|
|
"learning_rate": 0.0001678688524590164,
|
|
"loss": 0.2152,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 24.50274557657108,
|
|
"grad_norm": 3.6262013912200928,
|
|
"learning_rate": 0.0001677377049180328,
|
|
"loss": 0.2023,
|
|
"step": 5020
|
|
},
|
|
{
|
|
"epoch": 24.600366076876146,
|
|
"grad_norm": 3.2124197483062744,
|
|
"learning_rate": 0.00016760655737704919,
|
|
"loss": 0.231,
|
|
"step": 5040
|
|
},
|
|
{
|
|
"epoch": 24.697986577181208,
|
|
"grad_norm": 3.997265100479126,
|
|
"learning_rate": 0.00016747540983606558,
|
|
"loss": 0.2249,
|
|
"step": 5060
|
|
},
|
|
{
|
|
"epoch": 24.795607077486274,
|
|
"grad_norm": 3.5564634799957275,
|
|
"learning_rate": 0.00016734426229508196,
|
|
"loss": 0.2337,
|
|
"step": 5080
|
|
},
|
|
{
|
|
"epoch": 24.893227577791336,
|
|
"grad_norm": 3.023794651031494,
|
|
"learning_rate": 0.00016721311475409838,
|
|
"loss": 0.2523,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 24.990848078096402,
|
|
"grad_norm": 3.3614046573638916,
|
|
"learning_rate": 0.00016708196721311474,
|
|
"loss": 0.2524,
|
|
"step": 5120
|
|
},
|
|
{
|
|
"epoch": 25.088468578401464,
|
|
"grad_norm": 2.63189435005188,
|
|
"learning_rate": 0.00016695081967213116,
|
|
"loss": 0.1942,
|
|
"step": 5140
|
|
},
|
|
{
|
|
"epoch": 25.18608907870653,
|
|
"grad_norm": 3.4099018573760986,
|
|
"learning_rate": 0.00016681967213114755,
|
|
"loss": 0.1751,
|
|
"step": 5160
|
|
},
|
|
{
|
|
"epoch": 25.283709579011592,
|
|
"grad_norm": 3.533768653869629,
|
|
"learning_rate": 0.00016668852459016397,
|
|
"loss": 0.1953,
|
|
"step": 5180
|
|
},
|
|
{
|
|
"epoch": 25.381330079316655,
|
|
"grad_norm": 3.330538511276245,
|
|
"learning_rate": 0.00016655737704918033,
|
|
"loss": 0.2124,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 25.47895057962172,
|
|
"grad_norm": 3.8602449893951416,
|
|
"learning_rate": 0.00016642622950819672,
|
|
"loss": 0.209,
|
|
"step": 5220
|
|
},
|
|
{
|
|
"epoch": 25.576571079926783,
|
|
"grad_norm": 3.290445566177368,
|
|
"learning_rate": 0.00016629508196721313,
|
|
"loss": 0.2002,
|
|
"step": 5240
|
|
},
|
|
{
|
|
"epoch": 25.67419158023185,
|
|
"grad_norm": 3.143129825592041,
|
|
"learning_rate": 0.0001661639344262295,
|
|
"loss": 0.2376,
|
|
"step": 5260
|
|
},
|
|
{
|
|
"epoch": 25.77181208053691,
|
|
"grad_norm": 4.425613880157471,
|
|
"learning_rate": 0.0001660327868852459,
|
|
"loss": 0.2255,
|
|
"step": 5280
|
|
},
|
|
{
|
|
"epoch": 25.869432580841977,
|
|
"grad_norm": 5.267921447753906,
|
|
"learning_rate": 0.0001659016393442623,
|
|
"loss": 0.2334,
|
|
"step": 5300
|
|
},
|
|
{
|
|
"epoch": 25.96705308114704,
|
|
"grad_norm": 3.738940715789795,
|
|
"learning_rate": 0.0001657704918032787,
|
|
"loss": 0.2354,
|
|
"step": 5320
|
|
},
|
|
{
|
|
"epoch": 26.064673581452105,
|
|
"grad_norm": 2.8138153553009033,
|
|
"learning_rate": 0.00016563934426229508,
|
|
"loss": 0.1706,
|
|
"step": 5340
|
|
},
|
|
{
|
|
"epoch": 26.162294081757167,
|
|
"grad_norm": 3.58404278755188,
|
|
"learning_rate": 0.00016550819672131147,
|
|
"loss": 0.199,
|
|
"step": 5360
|
|
},
|
|
{
|
|
"epoch": 26.259914582062233,
|
|
"grad_norm": 3.536153554916382,
|
|
"learning_rate": 0.00016537704918032789,
|
|
"loss": 0.1991,
|
|
"step": 5380
|
|
},
|
|
{
|
|
"epoch": 26.357535082367296,
|
|
"grad_norm": 2.7656655311584473,
|
|
"learning_rate": 0.00016524590163934428,
|
|
"loss": 0.179,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 26.45515558267236,
|
|
"grad_norm": 3.2351813316345215,
|
|
"learning_rate": 0.00016511475409836067,
|
|
"loss": 0.2142,
|
|
"step": 5420
|
|
},
|
|
{
|
|
"epoch": 26.552776082977424,
|
|
"grad_norm": 4.154785633087158,
|
|
"learning_rate": 0.00016498360655737705,
|
|
"loss": 0.2007,
|
|
"step": 5440
|
|
},
|
|
{
|
|
"epoch": 26.65039658328249,
|
|
"grad_norm": 3.327596664428711,
|
|
"learning_rate": 0.00016485245901639344,
|
|
"loss": 0.1934,
|
|
"step": 5460
|
|
},
|
|
{
|
|
"epoch": 26.748017083587552,
|
|
"grad_norm": 3.7125210762023926,
|
|
"learning_rate": 0.00016472131147540983,
|
|
"loss": 0.2013,
|
|
"step": 5480
|
|
},
|
|
{
|
|
"epoch": 26.845637583892618,
|
|
"grad_norm": 3.177676200866699,
|
|
"learning_rate": 0.00016459016393442622,
|
|
"loss": 0.2156,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 26.94325808419768,
|
|
"grad_norm": 5.258444786071777,
|
|
"learning_rate": 0.00016445901639344264,
|
|
"loss": 0.2068,
|
|
"step": 5520
|
|
},
|
|
{
|
|
"epoch": 27.040878584502746,
|
|
"grad_norm": 2.750441074371338,
|
|
"learning_rate": 0.00016432786885245903,
|
|
"loss": 0.2059,
|
|
"step": 5540
|
|
},
|
|
{
|
|
"epoch": 27.13849908480781,
|
|
"grad_norm": 2.5060107707977295,
|
|
"learning_rate": 0.00016419672131147542,
|
|
"loss": 0.1696,
|
|
"step": 5560
|
|
},
|
|
{
|
|
"epoch": 27.236119585112874,
|
|
"grad_norm": 3.026069164276123,
|
|
"learning_rate": 0.0001640655737704918,
|
|
"loss": 0.1874,
|
|
"step": 5580
|
|
},
|
|
{
|
|
"epoch": 27.333740085417936,
|
|
"grad_norm": 3.3759682178497314,
|
|
"learning_rate": 0.0001639344262295082,
|
|
"loss": 0.1936,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 27.431360585723002,
|
|
"grad_norm": 4.374515533447266,
|
|
"learning_rate": 0.0001638032786885246,
|
|
"loss": 0.1839,
|
|
"step": 5620
|
|
},
|
|
{
|
|
"epoch": 27.528981086028065,
|
|
"grad_norm": 3.855091094970703,
|
|
"learning_rate": 0.00016367213114754098,
|
|
"loss": 0.1998,
|
|
"step": 5640
|
|
},
|
|
{
|
|
"epoch": 27.62660158633313,
|
|
"grad_norm": 3.3890366554260254,
|
|
"learning_rate": 0.0001635409836065574,
|
|
"loss": 0.1956,
|
|
"step": 5660
|
|
},
|
|
{
|
|
"epoch": 27.724222086638193,
|
|
"grad_norm": 3.516590118408203,
|
|
"learning_rate": 0.00016340983606557378,
|
|
"loss": 0.1784,
|
|
"step": 5680
|
|
},
|
|
{
|
|
"epoch": 27.82184258694326,
|
|
"grad_norm": 3.740670680999756,
|
|
"learning_rate": 0.00016327868852459017,
|
|
"loss": 0.1957,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 27.91946308724832,
|
|
"grad_norm": 5.360738754272461,
|
|
"learning_rate": 0.00016314754098360656,
|
|
"loss": 0.219,
|
|
"step": 5720
|
|
},
|
|
{
|
|
"epoch": 28.017083587553387,
|
|
"grad_norm": 2.9260716438293457,
|
|
"learning_rate": 0.00016301639344262295,
|
|
"loss": 0.1956,
|
|
"step": 5740
|
|
},
|
|
{
|
|
"epoch": 28.11470408785845,
|
|
"grad_norm": 2.962049722671509,
|
|
"learning_rate": 0.00016288524590163937,
|
|
"loss": 0.1514,
|
|
"step": 5760
|
|
},
|
|
{
|
|
"epoch": 28.212324588163515,
|
|
"grad_norm": 3.02143931388855,
|
|
"learning_rate": 0.00016275409836065573,
|
|
"loss": 0.1586,
|
|
"step": 5780
|
|
},
|
|
{
|
|
"epoch": 28.309945088468577,
|
|
"grad_norm": 3.138089895248413,
|
|
"learning_rate": 0.00016262295081967214,
|
|
"loss": 0.1706,
|
|
"step": 5800
|
|
},
|
|
{
|
|
"epoch": 28.407565588773643,
|
|
"grad_norm": 3.2356488704681396,
|
|
"learning_rate": 0.00016249180327868853,
|
|
"loss": 0.1794,
|
|
"step": 5820
|
|
},
|
|
{
|
|
"epoch": 28.505186089078705,
|
|
"grad_norm": 4.481720447540283,
|
|
"learning_rate": 0.00016236065573770492,
|
|
"loss": 0.1871,
|
|
"step": 5840
|
|
},
|
|
{
|
|
"epoch": 28.60280658938377,
|
|
"grad_norm": 3.6736230850219727,
|
|
"learning_rate": 0.0001622295081967213,
|
|
"loss": 0.1649,
|
|
"step": 5860
|
|
},
|
|
{
|
|
"epoch": 28.700427089688834,
|
|
"grad_norm": 4.480273723602295,
|
|
"learning_rate": 0.0001620983606557377,
|
|
"loss": 0.1837,
|
|
"step": 5880
|
|
},
|
|
{
|
|
"epoch": 28.7980475899939,
|
|
"grad_norm": 3.7047078609466553,
|
|
"learning_rate": 0.00016196721311475412,
|
|
"loss": 0.2099,
|
|
"step": 5900
|
|
},
|
|
{
|
|
"epoch": 28.89566809029896,
|
|
"grad_norm": 3.0603742599487305,
|
|
"learning_rate": 0.00016183606557377048,
|
|
"loss": 0.2015,
|
|
"step": 5920
|
|
},
|
|
{
|
|
"epoch": 28.993288590604028,
|
|
"grad_norm": 3.1472814083099365,
|
|
"learning_rate": 0.0001617049180327869,
|
|
"loss": 0.2016,
|
|
"step": 5940
|
|
},
|
|
{
|
|
"epoch": 29.09090909090909,
|
|
"grad_norm": 2.6735455989837646,
|
|
"learning_rate": 0.0001615737704918033,
|
|
"loss": 0.1586,
|
|
"step": 5960
|
|
},
|
|
{
|
|
"epoch": 29.188529591214156,
|
|
"grad_norm": 3.0757932662963867,
|
|
"learning_rate": 0.00016144262295081968,
|
|
"loss": 0.1466,
|
|
"step": 5980
|
|
},
|
|
{
|
|
"epoch": 29.286150091519218,
|
|
"grad_norm": 3.696460008621216,
|
|
"learning_rate": 0.00016131147540983607,
|
|
"loss": 0.1411,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 29.383770591824284,
|
|
"grad_norm": 3.981374979019165,
|
|
"learning_rate": 0.00016118032786885245,
|
|
"loss": 0.1703,
|
|
"step": 6020
|
|
},
|
|
{
|
|
"epoch": 29.481391092129346,
|
|
"grad_norm": 3.1289327144622803,
|
|
"learning_rate": 0.00016104918032786887,
|
|
"loss": 0.1708,
|
|
"step": 6040
|
|
},
|
|
{
|
|
"epoch": 29.579011592434412,
|
|
"grad_norm": 2.825981855392456,
|
|
"learning_rate": 0.00016091803278688526,
|
|
"loss": 0.1731,
|
|
"step": 6060
|
|
},
|
|
{
|
|
"epoch": 29.676632092739474,
|
|
"grad_norm": 3.0257232189178467,
|
|
"learning_rate": 0.00016078688524590165,
|
|
"loss": 0.1799,
|
|
"step": 6080
|
|
},
|
|
{
|
|
"epoch": 29.77425259304454,
|
|
"grad_norm": 3.0449230670928955,
|
|
"learning_rate": 0.00016065573770491804,
|
|
"loss": 0.1776,
|
|
"step": 6100
|
|
},
|
|
{
|
|
"epoch": 29.871873093349603,
|
|
"grad_norm": 3.108060121536255,
|
|
"learning_rate": 0.00016052459016393443,
|
|
"loss": 0.1839,
|
|
"step": 6120
|
|
},
|
|
{
|
|
"epoch": 29.96949359365467,
|
|
"grad_norm": 3.475794792175293,
|
|
"learning_rate": 0.00016039344262295082,
|
|
"loss": 0.2044,
|
|
"step": 6140
|
|
},
|
|
{
|
|
"epoch": 30.06711409395973,
|
|
"grad_norm": 2.7312111854553223,
|
|
"learning_rate": 0.0001602622950819672,
|
|
"loss": 0.1509,
|
|
"step": 6160
|
|
},
|
|
{
|
|
"epoch": 30.164734594264797,
|
|
"grad_norm": 2.8779571056365967,
|
|
"learning_rate": 0.00016013114754098362,
|
|
"loss": 0.1387,
|
|
"step": 6180
|
|
},
|
|
{
|
|
"epoch": 30.26235509456986,
|
|
"grad_norm": 4.596887111663818,
|
|
"learning_rate": 0.00016,
|
|
"loss": 0.1625,
|
|
"step": 6200
|
|
},
|
|
{
|
|
"epoch": 30.359975594874925,
|
|
"grad_norm": 2.2877578735351562,
|
|
"learning_rate": 0.0001598688524590164,
|
|
"loss": 0.1611,
|
|
"step": 6220
|
|
},
|
|
{
|
|
"epoch": 30.457596095179987,
|
|
"grad_norm": 2.9951751232147217,
|
|
"learning_rate": 0.0001597377049180328,
|
|
"loss": 0.1579,
|
|
"step": 6240
|
|
},
|
|
{
|
|
"epoch": 30.555216595485053,
|
|
"grad_norm": 2.9729604721069336,
|
|
"learning_rate": 0.00015960655737704918,
|
|
"loss": 0.1522,
|
|
"step": 6260
|
|
},
|
|
{
|
|
"epoch": 30.652837095790115,
|
|
"grad_norm": 3.4268481731414795,
|
|
"learning_rate": 0.0001594754098360656,
|
|
"loss": 0.1673,
|
|
"step": 6280
|
|
},
|
|
{
|
|
"epoch": 30.75045759609518,
|
|
"grad_norm": 3.1322598457336426,
|
|
"learning_rate": 0.00015934426229508196,
|
|
"loss": 0.1769,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 30.848078096400243,
|
|
"grad_norm": 3.1607165336608887,
|
|
"learning_rate": 0.00015921311475409838,
|
|
"loss": 0.1795,
|
|
"step": 6320
|
|
},
|
|
{
|
|
"epoch": 30.94569859670531,
|
|
"grad_norm": 3.9670820236206055,
|
|
"learning_rate": 0.00015908196721311477,
|
|
"loss": 0.1714,
|
|
"step": 6340
|
|
},
|
|
{
|
|
"epoch": 31.04331909701037,
|
|
"grad_norm": 3.0311009883880615,
|
|
"learning_rate": 0.00015895081967213116,
|
|
"loss": 0.1546,
|
|
"step": 6360
|
|
},
|
|
{
|
|
"epoch": 31.140939597315437,
|
|
"grad_norm": 2.023759126663208,
|
|
"learning_rate": 0.00015881967213114754,
|
|
"loss": 0.1351,
|
|
"step": 6380
|
|
},
|
|
{
|
|
"epoch": 31.2385600976205,
|
|
"grad_norm": 2.746744155883789,
|
|
"learning_rate": 0.00015868852459016393,
|
|
"loss": 0.1446,
|
|
"step": 6400
|
|
},
|
|
{
|
|
"epoch": 31.336180597925566,
|
|
"grad_norm": 2.9396300315856934,
|
|
"learning_rate": 0.00015855737704918035,
|
|
"loss": 0.1505,
|
|
"step": 6420
|
|
},
|
|
{
|
|
"epoch": 31.433801098230628,
|
|
"grad_norm": 4.066432476043701,
|
|
"learning_rate": 0.0001584262295081967,
|
|
"loss": 0.1576,
|
|
"step": 6440
|
|
},
|
|
{
|
|
"epoch": 31.531421598535694,
|
|
"grad_norm": 3.7003040313720703,
|
|
"learning_rate": 0.00015829508196721313,
|
|
"loss": 0.1629,
|
|
"step": 6460
|
|
},
|
|
{
|
|
"epoch": 31.629042098840756,
|
|
"grad_norm": 3.1914453506469727,
|
|
"learning_rate": 0.00015816393442622952,
|
|
"loss": 0.1497,
|
|
"step": 6480
|
|
},
|
|
{
|
|
"epoch": 31.726662599145822,
|
|
"grad_norm": 3.582771062850952,
|
|
"learning_rate": 0.0001580327868852459,
|
|
"loss": 0.1535,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"epoch": 31.824283099450884,
|
|
"grad_norm": 2.601606845855713,
|
|
"learning_rate": 0.0001579016393442623,
|
|
"loss": 0.1692,
|
|
"step": 6520
|
|
},
|
|
{
|
|
"epoch": 31.92190359975595,
|
|
"grad_norm": 3.206125020980835,
|
|
"learning_rate": 0.0001577704918032787,
|
|
"loss": 0.1529,
|
|
"step": 6540
|
|
},
|
|
{
|
|
"epoch": 32.01952410006101,
|
|
"grad_norm": 2.8698012828826904,
|
|
"learning_rate": 0.0001576393442622951,
|
|
"loss": 0.1804,
|
|
"step": 6560
|
|
},
|
|
{
|
|
"epoch": 32.117144600366075,
|
|
"grad_norm": 2.53930401802063,
|
|
"learning_rate": 0.00015750819672131147,
|
|
"loss": 0.1227,
|
|
"step": 6580
|
|
},
|
|
{
|
|
"epoch": 32.214765100671144,
|
|
"grad_norm": 2.9354329109191895,
|
|
"learning_rate": 0.00015737704918032788,
|
|
"loss": 0.1299,
|
|
"step": 6600
|
|
},
|
|
{
|
|
"epoch": 32.31238560097621,
|
|
"grad_norm": 3.0723047256469727,
|
|
"learning_rate": 0.00015724590163934427,
|
|
"loss": 0.1467,
|
|
"step": 6620
|
|
},
|
|
{
|
|
"epoch": 32.41000610128127,
|
|
"grad_norm": 2.8421099185943604,
|
|
"learning_rate": 0.00015711475409836066,
|
|
"loss": 0.1468,
|
|
"step": 6640
|
|
},
|
|
{
|
|
"epoch": 32.50762660158633,
|
|
"grad_norm": 3.185232400894165,
|
|
"learning_rate": 0.00015698360655737705,
|
|
"loss": 0.1601,
|
|
"step": 6660
|
|
},
|
|
{
|
|
"epoch": 32.6052471018914,
|
|
"grad_norm": 3.1354596614837646,
|
|
"learning_rate": 0.00015685245901639344,
|
|
"loss": 0.1466,
|
|
"step": 6680
|
|
},
|
|
{
|
|
"epoch": 32.70286760219646,
|
|
"grad_norm": 3.695125102996826,
|
|
"learning_rate": 0.00015672131147540986,
|
|
"loss": 0.1535,
|
|
"step": 6700
|
|
},
|
|
{
|
|
"epoch": 32.800488102501525,
|
|
"grad_norm": 3.092844009399414,
|
|
"learning_rate": 0.00015659016393442622,
|
|
"loss": 0.1625,
|
|
"step": 6720
|
|
},
|
|
{
|
|
"epoch": 32.89810860280659,
|
|
"grad_norm": 3.8023808002471924,
|
|
"learning_rate": 0.00015645901639344263,
|
|
"loss": 0.1623,
|
|
"step": 6740
|
|
},
|
|
{
|
|
"epoch": 32.99572910311166,
|
|
"grad_norm": 2.934077739715576,
|
|
"learning_rate": 0.00015632786885245902,
|
|
"loss": 0.1629,
|
|
"step": 6760
|
|
},
|
|
{
|
|
"epoch": 33.09334960341672,
|
|
"grad_norm": 2.633199453353882,
|
|
"learning_rate": 0.0001561967213114754,
|
|
"loss": 0.1267,
|
|
"step": 6780
|
|
},
|
|
{
|
|
"epoch": 33.19097010372178,
|
|
"grad_norm": 2.3645660877227783,
|
|
"learning_rate": 0.0001560655737704918,
|
|
"loss": 0.124,
|
|
"step": 6800
|
|
},
|
|
{
|
|
"epoch": 33.288590604026844,
|
|
"grad_norm": 3.1801674365997314,
|
|
"learning_rate": 0.0001559344262295082,
|
|
"loss": 0.1207,
|
|
"step": 6820
|
|
},
|
|
{
|
|
"epoch": 33.38621110433191,
|
|
"grad_norm": 3.998974323272705,
|
|
"learning_rate": 0.0001558032786885246,
|
|
"loss": 0.1397,
|
|
"step": 6840
|
|
},
|
|
{
|
|
"epoch": 33.483831604636975,
|
|
"grad_norm": 2.8653316497802734,
|
|
"learning_rate": 0.000155672131147541,
|
|
"loss": 0.1366,
|
|
"step": 6860
|
|
},
|
|
{
|
|
"epoch": 33.58145210494204,
|
|
"grad_norm": 3.3324031829833984,
|
|
"learning_rate": 0.0001555409836065574,
|
|
"loss": 0.1418,
|
|
"step": 6880
|
|
},
|
|
{
|
|
"epoch": 33.6790726052471,
|
|
"grad_norm": 2.5941219329833984,
|
|
"learning_rate": 0.00015540983606557378,
|
|
"loss": 0.1431,
|
|
"step": 6900
|
|
},
|
|
{
|
|
"epoch": 33.77669310555217,
|
|
"grad_norm": 3.388183116912842,
|
|
"learning_rate": 0.00015527868852459017,
|
|
"loss": 0.15,
|
|
"step": 6920
|
|
},
|
|
{
|
|
"epoch": 33.87431360585723,
|
|
"grad_norm": 2.9687812328338623,
|
|
"learning_rate": 0.00015514754098360656,
|
|
"loss": 0.1471,
|
|
"step": 6940
|
|
},
|
|
{
|
|
"epoch": 33.971934106162294,
|
|
"grad_norm": 3.783984899520874,
|
|
"learning_rate": 0.00015501639344262294,
|
|
"loss": 0.1822,
|
|
"step": 6960
|
|
},
|
|
{
|
|
"epoch": 34.06955460646736,
|
|
"grad_norm": 2.6898393630981445,
|
|
"learning_rate": 0.00015488524590163936,
|
|
"loss": 0.1217,
|
|
"step": 6980
|
|
},
|
|
{
|
|
"epoch": 34.16717510677242,
|
|
"grad_norm": 2.337120532989502,
|
|
"learning_rate": 0.00015475409836065575,
|
|
"loss": 0.1261,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 34.26479560707749,
|
|
"grad_norm": 2.8725621700286865,
|
|
"learning_rate": 0.00015462295081967214,
|
|
"loss": 0.1376,
|
|
"step": 7020
|
|
},
|
|
{
|
|
"epoch": 34.36241610738255,
|
|
"grad_norm": 2.8058671951293945,
|
|
"learning_rate": 0.00015449180327868853,
|
|
"loss": 0.1307,
|
|
"step": 7040
|
|
},
|
|
{
|
|
"epoch": 34.46003660768761,
|
|
"grad_norm": 2.90006685256958,
|
|
"learning_rate": 0.00015436065573770492,
|
|
"loss": 0.1364,
|
|
"step": 7060
|
|
},
|
|
{
|
|
"epoch": 34.557657107992675,
|
|
"grad_norm": 4.001739978790283,
|
|
"learning_rate": 0.00015422950819672133,
|
|
"loss": 0.1416,
|
|
"step": 7080
|
|
},
|
|
{
|
|
"epoch": 34.655277608297745,
|
|
"grad_norm": 3.0421645641326904,
|
|
"learning_rate": 0.0001540983606557377,
|
|
"loss": 0.137,
|
|
"step": 7100
|
|
},
|
|
{
|
|
"epoch": 34.75289810860281,
|
|
"grad_norm": 3.429624080657959,
|
|
"learning_rate": 0.00015396721311475411,
|
|
"loss": 0.1524,
|
|
"step": 7120
|
|
},
|
|
{
|
|
"epoch": 34.85051860890787,
|
|
"grad_norm": 2.7907767295837402,
|
|
"learning_rate": 0.0001538360655737705,
|
|
"loss": 0.1344,
|
|
"step": 7140
|
|
},
|
|
{
|
|
"epoch": 34.94813910921293,
|
|
"grad_norm": 2.5428404808044434,
|
|
"learning_rate": 0.0001537049180327869,
|
|
"loss": 0.1329,
|
|
"step": 7160
|
|
},
|
|
{
|
|
"epoch": 35.045759609518,
|
|
"grad_norm": 3.0310592651367188,
|
|
"learning_rate": 0.00015357377049180328,
|
|
"loss": 0.1289,
|
|
"step": 7180
|
|
},
|
|
{
|
|
"epoch": 35.14338010982306,
|
|
"grad_norm": 2.757962226867676,
|
|
"learning_rate": 0.00015344262295081967,
|
|
"loss": 0.1148,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 35.241000610128125,
|
|
"grad_norm": 2.47044038772583,
|
|
"learning_rate": 0.0001533114754098361,
|
|
"loss": 0.1234,
|
|
"step": 7220
|
|
},
|
|
{
|
|
"epoch": 35.33862111043319,
|
|
"grad_norm": 2.766308546066284,
|
|
"learning_rate": 0.00015318032786885245,
|
|
"loss": 0.1154,
|
|
"step": 7240
|
|
},
|
|
{
|
|
"epoch": 35.43624161073826,
|
|
"grad_norm": 3.6302130222320557,
|
|
"learning_rate": 0.00015304918032786887,
|
|
"loss": 0.1323,
|
|
"step": 7260
|
|
},
|
|
{
|
|
"epoch": 35.53386211104332,
|
|
"grad_norm": 3.4185805320739746,
|
|
"learning_rate": 0.00015291803278688526,
|
|
"loss": 0.1409,
|
|
"step": 7280
|
|
},
|
|
{
|
|
"epoch": 35.63148261134838,
|
|
"grad_norm": 2.238168954849243,
|
|
"learning_rate": 0.00015278688524590165,
|
|
"loss": 0.1216,
|
|
"step": 7300
|
|
},
|
|
{
|
|
"epoch": 35.729103111653444,
|
|
"grad_norm": 3.636718988418579,
|
|
"learning_rate": 0.00015265573770491803,
|
|
"loss": 0.1431,
|
|
"step": 7320
|
|
},
|
|
{
|
|
"epoch": 35.82672361195851,
|
|
"grad_norm": 3.1300854682922363,
|
|
"learning_rate": 0.00015252459016393442,
|
|
"loss": 0.1472,
|
|
"step": 7340
|
|
},
|
|
{
|
|
"epoch": 35.924344112263576,
|
|
"grad_norm": 3.463369846343994,
|
|
"learning_rate": 0.00015239344262295084,
|
|
"loss": 0.1461,
|
|
"step": 7360
|
|
},
|
|
{
|
|
"epoch": 36.02196461256864,
|
|
"grad_norm": 4.020668983459473,
|
|
"learning_rate": 0.0001522622950819672,
|
|
"loss": 0.1276,
|
|
"step": 7380
|
|
},
|
|
{
|
|
"epoch": 36.1195851128737,
|
|
"grad_norm": 3.0597307682037354,
|
|
"learning_rate": 0.00015213114754098362,
|
|
"loss": 0.1137,
|
|
"step": 7400
|
|
},
|
|
{
|
|
"epoch": 36.21720561317877,
|
|
"grad_norm": 2.3329412937164307,
|
|
"learning_rate": 0.000152,
|
|
"loss": 0.1271,
|
|
"step": 7420
|
|
},
|
|
{
|
|
"epoch": 36.31482611348383,
|
|
"grad_norm": 3.5883936882019043,
|
|
"learning_rate": 0.0001518688524590164,
|
|
"loss": 0.1112,
|
|
"step": 7440
|
|
},
|
|
{
|
|
"epoch": 36.412446613788894,
|
|
"grad_norm": 2.7755954265594482,
|
|
"learning_rate": 0.0001517377049180328,
|
|
"loss": 0.1123,
|
|
"step": 7460
|
|
},
|
|
{
|
|
"epoch": 36.51006711409396,
|
|
"grad_norm": 3.594489336013794,
|
|
"learning_rate": 0.00015160655737704918,
|
|
"loss": 0.1338,
|
|
"step": 7480
|
|
},
|
|
{
|
|
"epoch": 36.607687614399026,
|
|
"grad_norm": 2.8571972846984863,
|
|
"learning_rate": 0.0001514754098360656,
|
|
"loss": 0.1256,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 36.70530811470409,
|
|
"grad_norm": 3.561603307723999,
|
|
"learning_rate": 0.00015134426229508198,
|
|
"loss": 0.1239,
|
|
"step": 7520
|
|
},
|
|
{
|
|
"epoch": 36.80292861500915,
|
|
"grad_norm": 3.265953302383423,
|
|
"learning_rate": 0.00015121311475409837,
|
|
"loss": 0.1348,
|
|
"step": 7540
|
|
},
|
|
{
|
|
"epoch": 36.90054911531421,
|
|
"grad_norm": 3.5381383895874023,
|
|
"learning_rate": 0.00015108196721311476,
|
|
"loss": 0.1402,
|
|
"step": 7560
|
|
},
|
|
{
|
|
"epoch": 36.99816961561928,
|
|
"grad_norm": 3.6995930671691895,
|
|
"learning_rate": 0.00015095081967213115,
|
|
"loss": 0.1323,
|
|
"step": 7580
|
|
},
|
|
{
|
|
"epoch": 37.095790115924345,
|
|
"grad_norm": 2.1127350330352783,
|
|
"learning_rate": 0.00015081967213114754,
|
|
"loss": 0.1045,
|
|
"step": 7600
|
|
},
|
|
{
|
|
"epoch": 37.19341061622941,
|
|
"grad_norm": 2.4780166149139404,
|
|
"learning_rate": 0.00015068852459016393,
|
|
"loss": 0.1173,
|
|
"step": 7620
|
|
},
|
|
{
|
|
"epoch": 37.29103111653447,
|
|
"grad_norm": 2.4735465049743652,
|
|
"learning_rate": 0.00015055737704918035,
|
|
"loss": 0.1158,
|
|
"step": 7640
|
|
},
|
|
{
|
|
"epoch": 37.38865161683954,
|
|
"grad_norm": 4.070529937744141,
|
|
"learning_rate": 0.00015042622950819673,
|
|
"loss": 0.1222,
|
|
"step": 7660
|
|
},
|
|
{
|
|
"epoch": 37.4862721171446,
|
|
"grad_norm": 2.87009596824646,
|
|
"learning_rate": 0.00015029508196721312,
|
|
"loss": 0.1237,
|
|
"step": 7680
|
|
},
|
|
{
|
|
"epoch": 37.58389261744966,
|
|
"grad_norm": 1.9312514066696167,
|
|
"learning_rate": 0.00015016393442622951,
|
|
"loss": 0.1277,
|
|
"step": 7700
|
|
},
|
|
{
|
|
"epoch": 37.681513117754726,
|
|
"grad_norm": 2.3069088459014893,
|
|
"learning_rate": 0.0001500327868852459,
|
|
"loss": 0.1204,
|
|
"step": 7720
|
|
},
|
|
{
|
|
"epoch": 37.779133618059795,
|
|
"grad_norm": 3.6415116786956787,
|
|
"learning_rate": 0.00014990163934426232,
|
|
"loss": 0.1251,
|
|
"step": 7740
|
|
},
|
|
{
|
|
"epoch": 37.87675411836486,
|
|
"grad_norm": 2.912848472595215,
|
|
"learning_rate": 0.00014977049180327868,
|
|
"loss": 0.1277,
|
|
"step": 7760
|
|
},
|
|
{
|
|
"epoch": 37.97437461866992,
|
|
"grad_norm": 3.3791749477386475,
|
|
"learning_rate": 0.0001496393442622951,
|
|
"loss": 0.1215,
|
|
"step": 7780
|
|
},
|
|
{
|
|
"epoch": 38.07199511897498,
|
|
"grad_norm": 3.6682169437408447,
|
|
"learning_rate": 0.0001495081967213115,
|
|
"loss": 0.1131,
|
|
"step": 7800
|
|
},
|
|
{
|
|
"epoch": 38.16961561928005,
|
|
"grad_norm": 2.891191005706787,
|
|
"learning_rate": 0.00014937704918032788,
|
|
"loss": 0.1105,
|
|
"step": 7820
|
|
},
|
|
{
|
|
"epoch": 38.267236119585114,
|
|
"grad_norm": 2.662431478500366,
|
|
"learning_rate": 0.00014924590163934427,
|
|
"loss": 0.1144,
|
|
"step": 7840
|
|
},
|
|
{
|
|
"epoch": 38.364856619890176,
|
|
"grad_norm": 3.193325996398926,
|
|
"learning_rate": 0.00014911475409836066,
|
|
"loss": 0.1102,
|
|
"step": 7860
|
|
},
|
|
{
|
|
"epoch": 38.46247712019524,
|
|
"grad_norm": 2.497185230255127,
|
|
"learning_rate": 0.00014898360655737707,
|
|
"loss": 0.101,
|
|
"step": 7880
|
|
},
|
|
{
|
|
"epoch": 38.56009762050031,
|
|
"grad_norm": 2.6551053524017334,
|
|
"learning_rate": 0.00014885245901639343,
|
|
"loss": 0.1178,
|
|
"step": 7900
|
|
},
|
|
{
|
|
"epoch": 38.65771812080537,
|
|
"grad_norm": 3.329808473587036,
|
|
"learning_rate": 0.00014872131147540985,
|
|
"loss": 0.11,
|
|
"step": 7920
|
|
},
|
|
{
|
|
"epoch": 38.75533862111043,
|
|
"grad_norm": 2.581897258758545,
|
|
"learning_rate": 0.00014859016393442624,
|
|
"loss": 0.1228,
|
|
"step": 7940
|
|
},
|
|
{
|
|
"epoch": 38.852959121415495,
|
|
"grad_norm": 2.805368423461914,
|
|
"learning_rate": 0.00014845901639344263,
|
|
"loss": 0.1249,
|
|
"step": 7960
|
|
},
|
|
{
|
|
"epoch": 38.950579621720564,
|
|
"grad_norm": 2.7007319927215576,
|
|
"learning_rate": 0.00014832786885245902,
|
|
"loss": 0.1319,
|
|
"step": 7980
|
|
},
|
|
{
|
|
"epoch": 39.04820012202563,
|
|
"grad_norm": 2.029555082321167,
|
|
"learning_rate": 0.0001481967213114754,
|
|
"loss": 0.1132,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 39.14582062233069,
|
|
"grad_norm": 1.6910994052886963,
|
|
"learning_rate": 0.00014806557377049182,
|
|
"loss": 0.0968,
|
|
"step": 8020
|
|
},
|
|
{
|
|
"epoch": 39.24344112263575,
|
|
"grad_norm": 2.706334114074707,
|
|
"learning_rate": 0.0001479344262295082,
|
|
"loss": 0.1014,
|
|
"step": 8040
|
|
},
|
|
{
|
|
"epoch": 39.34106162294082,
|
|
"grad_norm": 2.848043441772461,
|
|
"learning_rate": 0.0001478032786885246,
|
|
"loss": 0.114,
|
|
"step": 8060
|
|
},
|
|
{
|
|
"epoch": 39.43868212324588,
|
|
"grad_norm": 2.6262383460998535,
|
|
"learning_rate": 0.000147672131147541,
|
|
"loss": 0.1144,
|
|
"step": 8080
|
|
},
|
|
{
|
|
"epoch": 39.536302623550945,
|
|
"grad_norm": 2.2859811782836914,
|
|
"learning_rate": 0.00014754098360655738,
|
|
"loss": 0.1151,
|
|
"step": 8100
|
|
},
|
|
{
|
|
"epoch": 39.63392312385601,
|
|
"grad_norm": 2.8703114986419678,
|
|
"learning_rate": 0.00014740983606557377,
|
|
"loss": 0.1084,
|
|
"step": 8120
|
|
},
|
|
{
|
|
"epoch": 39.73154362416108,
|
|
"grad_norm": 2.1996986865997314,
|
|
"learning_rate": 0.00014727868852459016,
|
|
"loss": 0.1268,
|
|
"step": 8140
|
|
},
|
|
{
|
|
"epoch": 39.82916412446614,
|
|
"grad_norm": 2.9079976081848145,
|
|
"learning_rate": 0.00014714754098360658,
|
|
"loss": 0.1184,
|
|
"step": 8160
|
|
},
|
|
{
|
|
"epoch": 39.9267846247712,
|
|
"grad_norm": 3.584907293319702,
|
|
"learning_rate": 0.00014701639344262297,
|
|
"loss": 0.131,
|
|
"step": 8180
|
|
},
|
|
{
|
|
"epoch": 40.024405125076264,
|
|
"grad_norm": 2.2893424034118652,
|
|
"learning_rate": 0.00014688524590163936,
|
|
"loss": 0.1111,
|
|
"step": 8200
|
|
},
|
|
{
|
|
"epoch": 40.12202562538133,
|
|
"grad_norm": 3.5768001079559326,
|
|
"learning_rate": 0.00014675409836065575,
|
|
"loss": 0.0993,
|
|
"step": 8220
|
|
},
|
|
{
|
|
"epoch": 40.219646125686396,
|
|
"grad_norm": 4.004862308502197,
|
|
"learning_rate": 0.00014662295081967214,
|
|
"loss": 0.1048,
|
|
"step": 8240
|
|
},
|
|
{
|
|
"epoch": 40.31726662599146,
|
|
"grad_norm": 2.0014472007751465,
|
|
"learning_rate": 0.00014649180327868852,
|
|
"loss": 0.1003,
|
|
"step": 8260
|
|
},
|
|
{
|
|
"epoch": 40.41488712629652,
|
|
"grad_norm": 2.492799997329712,
|
|
"learning_rate": 0.00014636065573770491,
|
|
"loss": 0.102,
|
|
"step": 8280
|
|
},
|
|
{
|
|
"epoch": 40.51250762660159,
|
|
"grad_norm": 2.779287099838257,
|
|
"learning_rate": 0.00014622950819672133,
|
|
"loss": 0.1035,
|
|
"step": 8300
|
|
},
|
|
{
|
|
"epoch": 40.61012812690665,
|
|
"grad_norm": 3.0162174701690674,
|
|
"learning_rate": 0.00014609836065573772,
|
|
"loss": 0.1112,
|
|
"step": 8320
|
|
},
|
|
{
|
|
"epoch": 40.707748627211714,
|
|
"grad_norm": 3.1944477558135986,
|
|
"learning_rate": 0.0001459672131147541,
|
|
"loss": 0.1167,
|
|
"step": 8340
|
|
},
|
|
{
|
|
"epoch": 40.80536912751678,
|
|
"grad_norm": 2.849069833755493,
|
|
"learning_rate": 0.0001458360655737705,
|
|
"loss": 0.1111,
|
|
"step": 8360
|
|
},
|
|
{
|
|
"epoch": 40.902989627821846,
|
|
"grad_norm": 3.3523714542388916,
|
|
"learning_rate": 0.0001457049180327869,
|
|
"loss": 0.1157,
|
|
"step": 8380
|
|
},
|
|
{
|
|
"epoch": 41.00061012812691,
|
|
"grad_norm": 3.1365504264831543,
|
|
"learning_rate": 0.0001455737704918033,
|
|
"loss": 0.1228,
|
|
"step": 8400
|
|
},
|
|
{
|
|
"epoch": 41.09823062843197,
|
|
"grad_norm": 2.1375391483306885,
|
|
"learning_rate": 0.00014544262295081967,
|
|
"loss": 0.0856,
|
|
"step": 8420
|
|
},
|
|
{
|
|
"epoch": 41.19585112873703,
|
|
"grad_norm": 2.6286661624908447,
|
|
"learning_rate": 0.00014531147540983608,
|
|
"loss": 0.0944,
|
|
"step": 8440
|
|
},
|
|
{
|
|
"epoch": 41.2934716290421,
|
|
"grad_norm": 1.7328729629516602,
|
|
"learning_rate": 0.00014518032786885247,
|
|
"loss": 0.0933,
|
|
"step": 8460
|
|
},
|
|
{
|
|
"epoch": 41.391092129347165,
|
|
"grad_norm": 2.313880205154419,
|
|
"learning_rate": 0.00014504918032786886,
|
|
"loss": 0.1063,
|
|
"step": 8480
|
|
},
|
|
{
|
|
"epoch": 41.48871262965223,
|
|
"grad_norm": 2.3604514598846436,
|
|
"learning_rate": 0.00014491803278688525,
|
|
"loss": 0.0963,
|
|
"step": 8500
|
|
},
|
|
{
|
|
"epoch": 41.58633312995729,
|
|
"grad_norm": 2.1344621181488037,
|
|
"learning_rate": 0.00014478688524590164,
|
|
"loss": 0.1084,
|
|
"step": 8520
|
|
},
|
|
{
|
|
"epoch": 41.68395363026236,
|
|
"grad_norm": 3.828547239303589,
|
|
"learning_rate": 0.00014465573770491806,
|
|
"loss": 0.112,
|
|
"step": 8540
|
|
},
|
|
{
|
|
"epoch": 41.78157413056742,
|
|
"grad_norm": 2.653571367263794,
|
|
"learning_rate": 0.00014452459016393442,
|
|
"loss": 0.1241,
|
|
"step": 8560
|
|
},
|
|
{
|
|
"epoch": 41.87919463087248,
|
|
"grad_norm": 2.482323408126831,
|
|
"learning_rate": 0.00014439344262295084,
|
|
"loss": 0.1096,
|
|
"step": 8580
|
|
},
|
|
{
|
|
"epoch": 41.976815131177545,
|
|
"grad_norm": 2.309413433074951,
|
|
"learning_rate": 0.00014426229508196722,
|
|
"loss": 0.1191,
|
|
"step": 8600
|
|
},
|
|
{
|
|
"epoch": 42.074435631482615,
|
|
"grad_norm": 2.5040862560272217,
|
|
"learning_rate": 0.00014413114754098361,
|
|
"loss": 0.0932,
|
|
"step": 8620
|
|
},
|
|
{
|
|
"epoch": 42.17205613178768,
|
|
"grad_norm": 2.5942790508270264,
|
|
"learning_rate": 0.000144,
|
|
"loss": 0.0934,
|
|
"step": 8640
|
|
},
|
|
{
|
|
"epoch": 42.26967663209274,
|
|
"grad_norm": 2.0345799922943115,
|
|
"learning_rate": 0.0001438688524590164,
|
|
"loss": 0.0913,
|
|
"step": 8660
|
|
},
|
|
{
|
|
"epoch": 42.3672971323978,
|
|
"grad_norm": 2.216151714324951,
|
|
"learning_rate": 0.0001437377049180328,
|
|
"loss": 0.0966,
|
|
"step": 8680
|
|
},
|
|
{
|
|
"epoch": 42.464917632702864,
|
|
"grad_norm": 2.8628721237182617,
|
|
"learning_rate": 0.00014360655737704917,
|
|
"loss": 0.102,
|
|
"step": 8700
|
|
},
|
|
{
|
|
"epoch": 42.56253813300793,
|
|
"grad_norm": 3.280900239944458,
|
|
"learning_rate": 0.0001434754098360656,
|
|
"loss": 0.0922,
|
|
"step": 8720
|
|
},
|
|
{
|
|
"epoch": 42.660158633312996,
|
|
"grad_norm": 2.90677809715271,
|
|
"learning_rate": 0.00014334426229508198,
|
|
"loss": 0.1027,
|
|
"step": 8740
|
|
},
|
|
{
|
|
"epoch": 42.75777913361806,
|
|
"grad_norm": 3.169517755508423,
|
|
"learning_rate": 0.00014321311475409837,
|
|
"loss": 0.112,
|
|
"step": 8760
|
|
},
|
|
{
|
|
"epoch": 42.85539963392312,
|
|
"grad_norm": 2.617112159729004,
|
|
"learning_rate": 0.00014308196721311476,
|
|
"loss": 0.1063,
|
|
"step": 8780
|
|
},
|
|
{
|
|
"epoch": 42.95302013422819,
|
|
"grad_norm": 2.3260226249694824,
|
|
"learning_rate": 0.00014295081967213115,
|
|
"loss": 0.1129,
|
|
"step": 8800
|
|
},
|
|
{
|
|
"epoch": 43.05064063453325,
|
|
"grad_norm": 2.255563735961914,
|
|
"learning_rate": 0.00014281967213114756,
|
|
"loss": 0.0943,
|
|
"step": 8820
|
|
},
|
|
{
|
|
"epoch": 43.148261134838314,
|
|
"grad_norm": 2.7513527870178223,
|
|
"learning_rate": 0.00014268852459016395,
|
|
"loss": 0.0929,
|
|
"step": 8840
|
|
},
|
|
{
|
|
"epoch": 43.24588163514338,
|
|
"grad_norm": 1.92955482006073,
|
|
"learning_rate": 0.00014255737704918034,
|
|
"loss": 0.0871,
|
|
"step": 8860
|
|
},
|
|
{
|
|
"epoch": 43.343502135448446,
|
|
"grad_norm": 2.5168724060058594,
|
|
"learning_rate": 0.00014242622950819673,
|
|
"loss": 0.0915,
|
|
"step": 8880
|
|
},
|
|
{
|
|
"epoch": 43.44112263575351,
|
|
"grad_norm": 2.497450351715088,
|
|
"learning_rate": 0.00014229508196721312,
|
|
"loss": 0.094,
|
|
"step": 8900
|
|
},
|
|
{
|
|
"epoch": 43.53874313605857,
|
|
"grad_norm": 2.645716667175293,
|
|
"learning_rate": 0.0001421639344262295,
|
|
"loss": 0.1039,
|
|
"step": 8920
|
|
},
|
|
{
|
|
"epoch": 43.63636363636363,
|
|
"grad_norm": 2.7844698429107666,
|
|
"learning_rate": 0.0001420327868852459,
|
|
"loss": 0.0979,
|
|
"step": 8940
|
|
},
|
|
{
|
|
"epoch": 43.7339841366687,
|
|
"grad_norm": 2.3445496559143066,
|
|
"learning_rate": 0.00014190163934426231,
|
|
"loss": 0.0936,
|
|
"step": 8960
|
|
},
|
|
{
|
|
"epoch": 43.831604636973765,
|
|
"grad_norm": 3.3879520893096924,
|
|
"learning_rate": 0.0001417704918032787,
|
|
"loss": 0.1147,
|
|
"step": 8980
|
|
},
|
|
{
|
|
"epoch": 43.92922513727883,
|
|
"grad_norm": 2.5144670009613037,
|
|
"learning_rate": 0.0001416393442622951,
|
|
"loss": 0.1064,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 44.02684563758389,
|
|
"grad_norm": 1.741097092628479,
|
|
"learning_rate": 0.00014150819672131148,
|
|
"loss": 0.0966,
|
|
"step": 9020
|
|
},
|
|
{
|
|
"epoch": 44.12446613788896,
|
|
"grad_norm": 2.3388848304748535,
|
|
"learning_rate": 0.00014137704918032787,
|
|
"loss": 0.0877,
|
|
"step": 9040
|
|
},
|
|
{
|
|
"epoch": 44.22208663819402,
|
|
"grad_norm": 1.8241106271743774,
|
|
"learning_rate": 0.00014124590163934426,
|
|
"loss": 0.0937,
|
|
"step": 9060
|
|
},
|
|
{
|
|
"epoch": 44.31970713849908,
|
|
"grad_norm": 2.50146222114563,
|
|
"learning_rate": 0.00014111475409836065,
|
|
"loss": 0.0877,
|
|
"step": 9080
|
|
},
|
|
{
|
|
"epoch": 44.417327638804146,
|
|
"grad_norm": 2.029155969619751,
|
|
"learning_rate": 0.00014098360655737707,
|
|
"loss": 0.0841,
|
|
"step": 9100
|
|
},
|
|
{
|
|
"epoch": 44.514948139109215,
|
|
"grad_norm": 3.042222738265991,
|
|
"learning_rate": 0.00014085245901639346,
|
|
"loss": 0.0956,
|
|
"step": 9120
|
|
},
|
|
{
|
|
"epoch": 44.61256863941428,
|
|
"grad_norm": 3.1099298000335693,
|
|
"learning_rate": 0.00014072131147540985,
|
|
"loss": 0.1026,
|
|
"step": 9140
|
|
},
|
|
{
|
|
"epoch": 44.71018913971934,
|
|
"grad_norm": 2.7761776447296143,
|
|
"learning_rate": 0.00014059016393442624,
|
|
"loss": 0.0994,
|
|
"step": 9160
|
|
},
|
|
{
|
|
"epoch": 44.8078096400244,
|
|
"grad_norm": 4.281795501708984,
|
|
"learning_rate": 0.00014045901639344262,
|
|
"loss": 0.0995,
|
|
"step": 9180
|
|
},
|
|
{
|
|
"epoch": 44.90543014032947,
|
|
"grad_norm": 1.9752144813537598,
|
|
"learning_rate": 0.00014032786885245904,
|
|
"loss": 0.1037,
|
|
"step": 9200
|
|
},
|
|
{
|
|
"epoch": 45.003050640634534,
|
|
"grad_norm": 2.5847785472869873,
|
|
"learning_rate": 0.0001401967213114754,
|
|
"loss": 0.1025,
|
|
"step": 9220
|
|
},
|
|
{
|
|
"epoch": 45.100671140939596,
|
|
"grad_norm": 3.0032763481140137,
|
|
"learning_rate": 0.00014006557377049182,
|
|
"loss": 0.0794,
|
|
"step": 9240
|
|
},
|
|
{
|
|
"epoch": 45.19829164124466,
|
|
"grad_norm": 3.35851788520813,
|
|
"learning_rate": 0.0001399344262295082,
|
|
"loss": 0.091,
|
|
"step": 9260
|
|
},
|
|
{
|
|
"epoch": 45.29591214154973,
|
|
"grad_norm": 2.5405397415161133,
|
|
"learning_rate": 0.0001398032786885246,
|
|
"loss": 0.0839,
|
|
"step": 9280
|
|
},
|
|
{
|
|
"epoch": 45.39353264185479,
|
|
"grad_norm": 3.7618603706359863,
|
|
"learning_rate": 0.000139672131147541,
|
|
"loss": 0.0909,
|
|
"step": 9300
|
|
},
|
|
{
|
|
"epoch": 45.49115314215985,
|
|
"grad_norm": 2.1094727516174316,
|
|
"learning_rate": 0.00013954098360655738,
|
|
"loss": 0.0884,
|
|
"step": 9320
|
|
},
|
|
{
|
|
"epoch": 45.588773642464915,
|
|
"grad_norm": 2.060983657836914,
|
|
"learning_rate": 0.0001394098360655738,
|
|
"loss": 0.0918,
|
|
"step": 9340
|
|
},
|
|
{
|
|
"epoch": 45.686394142769984,
|
|
"grad_norm": 2.426604747772217,
|
|
"learning_rate": 0.00013927868852459016,
|
|
"loss": 0.0943,
|
|
"step": 9360
|
|
},
|
|
{
|
|
"epoch": 45.78401464307505,
|
|
"grad_norm": 2.716404438018799,
|
|
"learning_rate": 0.00013914754098360657,
|
|
"loss": 0.0985,
|
|
"step": 9380
|
|
},
|
|
{
|
|
"epoch": 45.88163514338011,
|
|
"grad_norm": 2.1184298992156982,
|
|
"learning_rate": 0.00013901639344262296,
|
|
"loss": 0.0941,
|
|
"step": 9400
|
|
},
|
|
{
|
|
"epoch": 45.97925564368517,
|
|
"grad_norm": 2.4884073734283447,
|
|
"learning_rate": 0.00013888524590163935,
|
|
"loss": 0.1097,
|
|
"step": 9420
|
|
},
|
|
{
|
|
"epoch": 46.07687614399024,
|
|
"grad_norm": 2.272751808166504,
|
|
"learning_rate": 0.00013875409836065574,
|
|
"loss": 0.088,
|
|
"step": 9440
|
|
},
|
|
{
|
|
"epoch": 46.1744966442953,
|
|
"grad_norm": 2.296656847000122,
|
|
"learning_rate": 0.00013862295081967213,
|
|
"loss": 0.0813,
|
|
"step": 9460
|
|
},
|
|
{
|
|
"epoch": 46.272117144600365,
|
|
"grad_norm": 2.6703407764434814,
|
|
"learning_rate": 0.00013849180327868855,
|
|
"loss": 0.0837,
|
|
"step": 9480
|
|
},
|
|
{
|
|
"epoch": 46.36973764490543,
|
|
"grad_norm": 3.1443161964416504,
|
|
"learning_rate": 0.0001383606557377049,
|
|
"loss": 0.0876,
|
|
"step": 9500
|
|
},
|
|
{
|
|
"epoch": 46.4673581452105,
|
|
"grad_norm": 2.6503586769104004,
|
|
"learning_rate": 0.00013822950819672133,
|
|
"loss": 0.0835,
|
|
"step": 9520
|
|
},
|
|
{
|
|
"epoch": 46.56497864551556,
|
|
"grad_norm": 2.3241240978240967,
|
|
"learning_rate": 0.00013809836065573771,
|
|
"loss": 0.0996,
|
|
"step": 9540
|
|
},
|
|
{
|
|
"epoch": 46.66259914582062,
|
|
"grad_norm": 2.8960094451904297,
|
|
"learning_rate": 0.0001379672131147541,
|
|
"loss": 0.0904,
|
|
"step": 9560
|
|
},
|
|
{
|
|
"epoch": 46.760219646125684,
|
|
"grad_norm": 3.4559171199798584,
|
|
"learning_rate": 0.0001378360655737705,
|
|
"loss": 0.0906,
|
|
"step": 9580
|
|
},
|
|
{
|
|
"epoch": 46.85784014643075,
|
|
"grad_norm": 2.325892925262451,
|
|
"learning_rate": 0.00013770491803278688,
|
|
"loss": 0.1012,
|
|
"step": 9600
|
|
},
|
|
{
|
|
"epoch": 46.955460646735816,
|
|
"grad_norm": 2.833704948425293,
|
|
"learning_rate": 0.0001375737704918033,
|
|
"loss": 0.0947,
|
|
"step": 9620
|
|
},
|
|
{
|
|
"epoch": 47.05308114704088,
|
|
"grad_norm": 2.060310125350952,
|
|
"learning_rate": 0.0001374426229508197,
|
|
"loss": 0.0861,
|
|
"step": 9640
|
|
},
|
|
{
|
|
"epoch": 47.15070164734594,
|
|
"grad_norm": 2.1415886878967285,
|
|
"learning_rate": 0.00013731147540983608,
|
|
"loss": 0.0775,
|
|
"step": 9660
|
|
},
|
|
{
|
|
"epoch": 47.24832214765101,
|
|
"grad_norm": 1.9478198289871216,
|
|
"learning_rate": 0.00013718032786885247,
|
|
"loss": 0.082,
|
|
"step": 9680
|
|
},
|
|
{
|
|
"epoch": 47.34594264795607,
|
|
"grad_norm": 4.712246417999268,
|
|
"learning_rate": 0.00013704918032786886,
|
|
"loss": 0.0753,
|
|
"step": 9700
|
|
},
|
|
{
|
|
"epoch": 47.443563148261134,
|
|
"grad_norm": 2.2505266666412354,
|
|
"learning_rate": 0.00013691803278688525,
|
|
"loss": 0.0809,
|
|
"step": 9720
|
|
},
|
|
{
|
|
"epoch": 47.5411836485662,
|
|
"grad_norm": 2.4680440425872803,
|
|
"learning_rate": 0.00013678688524590164,
|
|
"loss": 0.0923,
|
|
"step": 9740
|
|
},
|
|
{
|
|
"epoch": 47.638804148871266,
|
|
"grad_norm": 2.5806798934936523,
|
|
"learning_rate": 0.00013665573770491805,
|
|
"loss": 0.0938,
|
|
"step": 9760
|
|
},
|
|
{
|
|
"epoch": 47.73642464917633,
|
|
"grad_norm": 2.5440475940704346,
|
|
"learning_rate": 0.00013652459016393444,
|
|
"loss": 0.0911,
|
|
"step": 9780
|
|
},
|
|
{
|
|
"epoch": 47.83404514948139,
|
|
"grad_norm": 2.316375732421875,
|
|
"learning_rate": 0.00013639344262295083,
|
|
"loss": 0.0958,
|
|
"step": 9800
|
|
},
|
|
{
|
|
"epoch": 47.93166564978645,
|
|
"grad_norm": 2.1451222896575928,
|
|
"learning_rate": 0.00013626229508196722,
|
|
"loss": 0.0971,
|
|
"step": 9820
|
|
},
|
|
{
|
|
"epoch": 48.02928615009152,
|
|
"grad_norm": 2.381439685821533,
|
|
"learning_rate": 0.0001361311475409836,
|
|
"loss": 0.0944,
|
|
"step": 9840
|
|
},
|
|
{
|
|
"epoch": 48.126906650396585,
|
|
"grad_norm": 2.9612882137298584,
|
|
"learning_rate": 0.00013600000000000003,
|
|
"loss": 0.0676,
|
|
"step": 9860
|
|
},
|
|
{
|
|
"epoch": 48.22452715070165,
|
|
"grad_norm": 1.7020941972732544,
|
|
"learning_rate": 0.0001358688524590164,
|
|
"loss": 0.0835,
|
|
"step": 9880
|
|
},
|
|
{
|
|
"epoch": 48.32214765100671,
|
|
"grad_norm": 1.820823311805725,
|
|
"learning_rate": 0.0001357377049180328,
|
|
"loss": 0.0773,
|
|
"step": 9900
|
|
},
|
|
{
|
|
"epoch": 48.41976815131178,
|
|
"grad_norm": 1.7456953525543213,
|
|
"learning_rate": 0.0001356065573770492,
|
|
"loss": 0.0847,
|
|
"step": 9920
|
|
},
|
|
{
|
|
"epoch": 48.51738865161684,
|
|
"grad_norm": 2.277359962463379,
|
|
"learning_rate": 0.00013547540983606556,
|
|
"loss": 0.0879,
|
|
"step": 9940
|
|
},
|
|
{
|
|
"epoch": 48.6150091519219,
|
|
"grad_norm": 1.9201923608779907,
|
|
"learning_rate": 0.00013534426229508197,
|
|
"loss": 0.0886,
|
|
"step": 9960
|
|
},
|
|
{
|
|
"epoch": 48.712629652226966,
|
|
"grad_norm": 2.5674726963043213,
|
|
"learning_rate": 0.00013521311475409836,
|
|
"loss": 0.0828,
|
|
"step": 9980
|
|
},
|
|
{
|
|
"epoch": 48.810250152532035,
|
|
"grad_norm": 2.4883131980895996,
|
|
"learning_rate": 0.00013508196721311478,
|
|
"loss": 0.0949,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 48.9078706528371,
|
|
"grad_norm": 2.472350835800171,
|
|
"learning_rate": 0.00013495081967213114,
|
|
"loss": 0.0868,
|
|
"step": 10020
|
|
},
|
|
{
|
|
"epoch": 49.00549115314216,
|
|
"grad_norm": 2.2226836681365967,
|
|
"learning_rate": 0.00013481967213114756,
|
|
"loss": 0.0984,
|
|
"step": 10040
|
|
},
|
|
{
|
|
"epoch": 49.10311165344722,
|
|
"grad_norm": 1.5109550952911377,
|
|
"learning_rate": 0.00013468852459016395,
|
|
"loss": 0.0608,
|
|
"step": 10060
|
|
},
|
|
{
|
|
"epoch": 49.20073215375229,
|
|
"grad_norm": 2.532590627670288,
|
|
"learning_rate": 0.00013455737704918034,
|
|
"loss": 0.0695,
|
|
"step": 10080
|
|
},
|
|
{
|
|
"epoch": 49.298352654057354,
|
|
"grad_norm": 2.6609067916870117,
|
|
"learning_rate": 0.00013442622950819673,
|
|
"loss": 0.082,
|
|
"step": 10100
|
|
},
|
|
{
|
|
"epoch": 49.395973154362416,
|
|
"grad_norm": 2.5846164226531982,
|
|
"learning_rate": 0.00013429508196721311,
|
|
"loss": 0.08,
|
|
"step": 10120
|
|
},
|
|
{
|
|
"epoch": 49.49359365466748,
|
|
"grad_norm": 2.4516260623931885,
|
|
"learning_rate": 0.00013416393442622953,
|
|
"loss": 0.0796,
|
|
"step": 10140
|
|
},
|
|
{
|
|
"epoch": 49.59121415497255,
|
|
"grad_norm": 2.357771158218384,
|
|
"learning_rate": 0.0001340327868852459,
|
|
"loss": 0.0823,
|
|
"step": 10160
|
|
},
|
|
{
|
|
"epoch": 49.68883465527761,
|
|
"grad_norm": 2.575505018234253,
|
|
"learning_rate": 0.0001339016393442623,
|
|
"loss": 0.0899,
|
|
"step": 10180
|
|
},
|
|
{
|
|
"epoch": 49.78645515558267,
|
|
"grad_norm": 2.2076785564422607,
|
|
"learning_rate": 0.0001337704918032787,
|
|
"loss": 0.0822,
|
|
"step": 10200
|
|
},
|
|
{
|
|
"epoch": 49.884075655887735,
|
|
"grad_norm": 2.361694097518921,
|
|
"learning_rate": 0.0001336393442622951,
|
|
"loss": 0.0913,
|
|
"step": 10220
|
|
},
|
|
{
|
|
"epoch": 49.981696156192804,
|
|
"grad_norm": 2.3063478469848633,
|
|
"learning_rate": 0.00013350819672131148,
|
|
"loss": 0.0919,
|
|
"step": 10240
|
|
},
|
|
{
|
|
"epoch": 50.079316656497866,
|
|
"grad_norm": 1.9585156440734863,
|
|
"learning_rate": 0.00013337704918032787,
|
|
"loss": 0.0767,
|
|
"step": 10260
|
|
},
|
|
{
|
|
"epoch": 50.17693715680293,
|
|
"grad_norm": 3.1279983520507812,
|
|
"learning_rate": 0.00013324590163934428,
|
|
"loss": 0.0755,
|
|
"step": 10280
|
|
},
|
|
{
|
|
"epoch": 50.27455765710799,
|
|
"grad_norm": 2.0095512866973877,
|
|
"learning_rate": 0.00013311475409836067,
|
|
"loss": 0.0735,
|
|
"step": 10300
|
|
},
|
|
{
|
|
"epoch": 50.37217815741306,
|
|
"grad_norm": 3.011828899383545,
|
|
"learning_rate": 0.00013298360655737706,
|
|
"loss": 0.0766,
|
|
"step": 10320
|
|
},
|
|
{
|
|
"epoch": 50.46979865771812,
|
|
"grad_norm": 2.0386946201324463,
|
|
"learning_rate": 0.00013285245901639345,
|
|
"loss": 0.0738,
|
|
"step": 10340
|
|
},
|
|
{
|
|
"epoch": 50.567419158023185,
|
|
"grad_norm": 2.0549607276916504,
|
|
"learning_rate": 0.00013272131147540984,
|
|
"loss": 0.089,
|
|
"step": 10360
|
|
},
|
|
{
|
|
"epoch": 50.66503965832825,
|
|
"grad_norm": 2.816814422607422,
|
|
"learning_rate": 0.00013259016393442623,
|
|
"loss": 0.0849,
|
|
"step": 10380
|
|
},
|
|
{
|
|
"epoch": 50.76266015863331,
|
|
"grad_norm": 2.2058887481689453,
|
|
"learning_rate": 0.00013245901639344262,
|
|
"loss": 0.0842,
|
|
"step": 10400
|
|
},
|
|
{
|
|
"epoch": 50.86028065893838,
|
|
"grad_norm": 2.2805745601654053,
|
|
"learning_rate": 0.00013232786885245904,
|
|
"loss": 0.0888,
|
|
"step": 10420
|
|
},
|
|
{
|
|
"epoch": 50.95790115924344,
|
|
"grad_norm": 2.602515697479248,
|
|
"learning_rate": 0.00013219672131147543,
|
|
"loss": 0.0867,
|
|
"step": 10440
|
|
},
|
|
{
|
|
"epoch": 51.0555216595485,
|
|
"grad_norm": 1.9022042751312256,
|
|
"learning_rate": 0.0001320655737704918,
|
|
"loss": 0.0748,
|
|
"step": 10460
|
|
},
|
|
{
|
|
"epoch": 51.153142159853566,
|
|
"grad_norm": 2.4302797317504883,
|
|
"learning_rate": 0.0001319344262295082,
|
|
"loss": 0.0693,
|
|
"step": 10480
|
|
},
|
|
{
|
|
"epoch": 51.250762660158635,
|
|
"grad_norm": 1.5306838750839233,
|
|
"learning_rate": 0.0001318032786885246,
|
|
"loss": 0.0748,
|
|
"step": 10500
|
|
},
|
|
{
|
|
"epoch": 51.3483831604637,
|
|
"grad_norm": 2.035921812057495,
|
|
"learning_rate": 0.000131672131147541,
|
|
"loss": 0.0728,
|
|
"step": 10520
|
|
},
|
|
{
|
|
"epoch": 51.44600366076876,
|
|
"grad_norm": 1.8543293476104736,
|
|
"learning_rate": 0.00013154098360655737,
|
|
"loss": 0.0752,
|
|
"step": 10540
|
|
},
|
|
{
|
|
"epoch": 51.54362416107382,
|
|
"grad_norm": 2.405411720275879,
|
|
"learning_rate": 0.0001314098360655738,
|
|
"loss": 0.0775,
|
|
"step": 10560
|
|
},
|
|
{
|
|
"epoch": 51.64124466137889,
|
|
"grad_norm": 2.767051935195923,
|
|
"learning_rate": 0.00013127868852459018,
|
|
"loss": 0.075,
|
|
"step": 10580
|
|
},
|
|
{
|
|
"epoch": 51.738865161683954,
|
|
"grad_norm": 2.200438976287842,
|
|
"learning_rate": 0.00013114754098360654,
|
|
"loss": 0.084,
|
|
"step": 10600
|
|
},
|
|
{
|
|
"epoch": 51.836485661989016,
|
|
"grad_norm": 2.7286131381988525,
|
|
"learning_rate": 0.00013101639344262296,
|
|
"loss": 0.0863,
|
|
"step": 10620
|
|
},
|
|
{
|
|
"epoch": 51.93410616229408,
|
|
"grad_norm": 1.557892084121704,
|
|
"learning_rate": 0.00013088524590163935,
|
|
"loss": 0.0832,
|
|
"step": 10640
|
|
},
|
|
{
|
|
"epoch": 52.03172666259915,
|
|
"grad_norm": 1.6213957071304321,
|
|
"learning_rate": 0.00013075409836065576,
|
|
"loss": 0.0835,
|
|
"step": 10660
|
|
},
|
|
{
|
|
"epoch": 52.12934716290421,
|
|
"grad_norm": 1.9538192749023438,
|
|
"learning_rate": 0.00013062295081967213,
|
|
"loss": 0.0632,
|
|
"step": 10680
|
|
},
|
|
{
|
|
"epoch": 52.22696766320927,
|
|
"grad_norm": 1.5224443674087524,
|
|
"learning_rate": 0.00013049180327868854,
|
|
"loss": 0.0691,
|
|
"step": 10700
|
|
},
|
|
{
|
|
"epoch": 52.324588163514335,
|
|
"grad_norm": 2.0320827960968018,
|
|
"learning_rate": 0.00013036065573770493,
|
|
"loss": 0.0777,
|
|
"step": 10720
|
|
},
|
|
{
|
|
"epoch": 52.422208663819404,
|
|
"grad_norm": 2.9247395992279053,
|
|
"learning_rate": 0.00013022950819672132,
|
|
"loss": 0.0738,
|
|
"step": 10740
|
|
},
|
|
{
|
|
"epoch": 52.51982916412447,
|
|
"grad_norm": 2.0458271503448486,
|
|
"learning_rate": 0.0001300983606557377,
|
|
"loss": 0.0716,
|
|
"step": 10760
|
|
},
|
|
{
|
|
"epoch": 52.61744966442953,
|
|
"grad_norm": 2.1657023429870605,
|
|
"learning_rate": 0.0001299672131147541,
|
|
"loss": 0.0798,
|
|
"step": 10780
|
|
},
|
|
{
|
|
"epoch": 52.71507016473459,
|
|
"grad_norm": 2.819549798965454,
|
|
"learning_rate": 0.00012983606557377052,
|
|
"loss": 0.0869,
|
|
"step": 10800
|
|
},
|
|
{
|
|
"epoch": 52.81269066503966,
|
|
"grad_norm": 2.455775737762451,
|
|
"learning_rate": 0.00012970491803278688,
|
|
"loss": 0.0784,
|
|
"step": 10820
|
|
},
|
|
{
|
|
"epoch": 52.91031116534472,
|
|
"grad_norm": 2.803501844406128,
|
|
"learning_rate": 0.0001295737704918033,
|
|
"loss": 0.0802,
|
|
"step": 10840
|
|
},
|
|
{
|
|
"epoch": 53.007931665649785,
|
|
"grad_norm": 2.2946598529815674,
|
|
"learning_rate": 0.00012944262295081968,
|
|
"loss": 0.0767,
|
|
"step": 10860
|
|
},
|
|
{
|
|
"epoch": 53.10555216595485,
|
|
"grad_norm": 1.9233254194259644,
|
|
"learning_rate": 0.00012931147540983607,
|
|
"loss": 0.0652,
|
|
"step": 10880
|
|
},
|
|
{
|
|
"epoch": 53.20317266625992,
|
|
"grad_norm": 1.8651479482650757,
|
|
"learning_rate": 0.00012918032786885246,
|
|
"loss": 0.0675,
|
|
"step": 10900
|
|
},
|
|
{
|
|
"epoch": 53.30079316656498,
|
|
"grad_norm": 2.216233253479004,
|
|
"learning_rate": 0.00012904918032786885,
|
|
"loss": 0.0716,
|
|
"step": 10920
|
|
},
|
|
{
|
|
"epoch": 53.39841366687004,
|
|
"grad_norm": 2.591519355773926,
|
|
"learning_rate": 0.00012891803278688527,
|
|
"loss": 0.073,
|
|
"step": 10940
|
|
},
|
|
{
|
|
"epoch": 53.496034167175104,
|
|
"grad_norm": 2.490187406539917,
|
|
"learning_rate": 0.00012878688524590166,
|
|
"loss": 0.0715,
|
|
"step": 10960
|
|
},
|
|
{
|
|
"epoch": 53.59365466748017,
|
|
"grad_norm": 2.2473397254943848,
|
|
"learning_rate": 0.00012865573770491802,
|
|
"loss": 0.0715,
|
|
"step": 10980
|
|
},
|
|
{
|
|
"epoch": 53.691275167785236,
|
|
"grad_norm": 1.9002201557159424,
|
|
"learning_rate": 0.00012852459016393444,
|
|
"loss": 0.0762,
|
|
"step": 11000
|
|
},
|
|
{
|
|
"epoch": 53.7888956680903,
|
|
"grad_norm": 2.4991517066955566,
|
|
"learning_rate": 0.00012839344262295083,
|
|
"loss": 0.0869,
|
|
"step": 11020
|
|
},
|
|
{
|
|
"epoch": 53.88651616839536,
|
|
"grad_norm": 2.5355913639068604,
|
|
"learning_rate": 0.00012826229508196722,
|
|
"loss": 0.0801,
|
|
"step": 11040
|
|
},
|
|
{
|
|
"epoch": 53.98413666870043,
|
|
"grad_norm": 1.895386815071106,
|
|
"learning_rate": 0.0001281311475409836,
|
|
"loss": 0.0747,
|
|
"step": 11060
|
|
},
|
|
{
|
|
"epoch": 54.08175716900549,
|
|
"grad_norm": 2.150261163711548,
|
|
"learning_rate": 0.00012800000000000002,
|
|
"loss": 0.0646,
|
|
"step": 11080
|
|
},
|
|
{
|
|
"epoch": 54.179377669310554,
|
|
"grad_norm": 4.12515926361084,
|
|
"learning_rate": 0.0001278688524590164,
|
|
"loss": 0.0693,
|
|
"step": 11100
|
|
},
|
|
{
|
|
"epoch": 54.27699816961562,
|
|
"grad_norm": 1.3180917501449585,
|
|
"learning_rate": 0.00012773770491803277,
|
|
"loss": 0.069,
|
|
"step": 11120
|
|
},
|
|
{
|
|
"epoch": 54.374618669920686,
|
|
"grad_norm": 1.714634895324707,
|
|
"learning_rate": 0.0001276065573770492,
|
|
"loss": 0.0701,
|
|
"step": 11140
|
|
},
|
|
{
|
|
"epoch": 54.47223917022575,
|
|
"grad_norm": 1.93068265914917,
|
|
"learning_rate": 0.00012747540983606558,
|
|
"loss": 0.0682,
|
|
"step": 11160
|
|
},
|
|
{
|
|
"epoch": 54.56985967053081,
|
|
"grad_norm": 1.9187488555908203,
|
|
"learning_rate": 0.000127344262295082,
|
|
"loss": 0.0778,
|
|
"step": 11180
|
|
},
|
|
{
|
|
"epoch": 54.66748017083587,
|
|
"grad_norm": 2.3723368644714355,
|
|
"learning_rate": 0.00012721311475409836,
|
|
"loss": 0.0769,
|
|
"step": 11200
|
|
},
|
|
{
|
|
"epoch": 54.76510067114094,
|
|
"grad_norm": 1.7806050777435303,
|
|
"learning_rate": 0.00012708196721311477,
|
|
"loss": 0.0729,
|
|
"step": 11220
|
|
},
|
|
{
|
|
"epoch": 54.862721171446005,
|
|
"grad_norm": 1.839896321296692,
|
|
"learning_rate": 0.00012695081967213116,
|
|
"loss": 0.0711,
|
|
"step": 11240
|
|
},
|
|
{
|
|
"epoch": 54.96034167175107,
|
|
"grad_norm": 3.055255889892578,
|
|
"learning_rate": 0.00012681967213114753,
|
|
"loss": 0.077,
|
|
"step": 11260
|
|
},
|
|
{
|
|
"epoch": 55.05796217205613,
|
|
"grad_norm": 1.504418134689331,
|
|
"learning_rate": 0.00012668852459016394,
|
|
"loss": 0.0711,
|
|
"step": 11280
|
|
},
|
|
{
|
|
"epoch": 55.1555826723612,
|
|
"grad_norm": 3.407508373260498,
|
|
"learning_rate": 0.00012655737704918033,
|
|
"loss": 0.0762,
|
|
"step": 11300
|
|
},
|
|
{
|
|
"epoch": 55.25320317266626,
|
|
"grad_norm": 1.7444705963134766,
|
|
"learning_rate": 0.00012642622950819675,
|
|
"loss": 0.0676,
|
|
"step": 11320
|
|
},
|
|
{
|
|
"epoch": 55.35082367297132,
|
|
"grad_norm": 1.7877843379974365,
|
|
"learning_rate": 0.0001262950819672131,
|
|
"loss": 0.0611,
|
|
"step": 11340
|
|
},
|
|
{
|
|
"epoch": 55.448444173276386,
|
|
"grad_norm": 2.1501216888427734,
|
|
"learning_rate": 0.0001261639344262295,
|
|
"loss": 0.0702,
|
|
"step": 11360
|
|
},
|
|
{
|
|
"epoch": 55.546064673581455,
|
|
"grad_norm": 1.4569560289382935,
|
|
"learning_rate": 0.00012603278688524592,
|
|
"loss": 0.0651,
|
|
"step": 11380
|
|
},
|
|
{
|
|
"epoch": 55.64368517388652,
|
|
"grad_norm": 1.5365070104599,
|
|
"learning_rate": 0.0001259016393442623,
|
|
"loss": 0.0708,
|
|
"step": 11400
|
|
},
|
|
{
|
|
"epoch": 55.74130567419158,
|
|
"grad_norm": 2.2428839206695557,
|
|
"learning_rate": 0.0001257704918032787,
|
|
"loss": 0.0694,
|
|
"step": 11420
|
|
},
|
|
{
|
|
"epoch": 55.83892617449664,
|
|
"grad_norm": 2.054964542388916,
|
|
"learning_rate": 0.00012563934426229508,
|
|
"loss": 0.0754,
|
|
"step": 11440
|
|
},
|
|
{
|
|
"epoch": 55.93654667480171,
|
|
"grad_norm": 3.42903733253479,
|
|
"learning_rate": 0.0001255081967213115,
|
|
"loss": 0.0752,
|
|
"step": 11460
|
|
},
|
|
{
|
|
"epoch": 56.034167175106774,
|
|
"grad_norm": 1.6482346057891846,
|
|
"learning_rate": 0.00012537704918032786,
|
|
"loss": 0.068,
|
|
"step": 11480
|
|
},
|
|
{
|
|
"epoch": 56.131787675411836,
|
|
"grad_norm": 2.0840466022491455,
|
|
"learning_rate": 0.00012524590163934425,
|
|
"loss": 0.0624,
|
|
"step": 11500
|
|
},
|
|
{
|
|
"epoch": 56.2294081757169,
|
|
"grad_norm": 2.7554879188537598,
|
|
"learning_rate": 0.00012511475409836067,
|
|
"loss": 0.0592,
|
|
"step": 11520
|
|
},
|
|
{
|
|
"epoch": 56.32702867602197,
|
|
"grad_norm": 2.8247487545013428,
|
|
"learning_rate": 0.00012498360655737706,
|
|
"loss": 0.0661,
|
|
"step": 11540
|
|
},
|
|
{
|
|
"epoch": 56.42464917632703,
|
|
"grad_norm": 2.2485480308532715,
|
|
"learning_rate": 0.00012485245901639345,
|
|
"loss": 0.0712,
|
|
"step": 11560
|
|
},
|
|
{
|
|
"epoch": 56.52226967663209,
|
|
"grad_norm": 2.1642680168151855,
|
|
"learning_rate": 0.00012472131147540984,
|
|
"loss": 0.0752,
|
|
"step": 11580
|
|
},
|
|
{
|
|
"epoch": 56.619890176937155,
|
|
"grad_norm": 1.827771544456482,
|
|
"learning_rate": 0.00012459016393442625,
|
|
"loss": 0.0707,
|
|
"step": 11600
|
|
},
|
|
{
|
|
"epoch": 56.717510677242224,
|
|
"grad_norm": 1.9134409427642822,
|
|
"learning_rate": 0.00012445901639344262,
|
|
"loss": 0.0696,
|
|
"step": 11620
|
|
},
|
|
{
|
|
"epoch": 56.815131177547286,
|
|
"grad_norm": 2.3444812297821045,
|
|
"learning_rate": 0.000124327868852459,
|
|
"loss": 0.065,
|
|
"step": 11640
|
|
},
|
|
{
|
|
"epoch": 56.91275167785235,
|
|
"grad_norm": 2.0354135036468506,
|
|
"learning_rate": 0.00012419672131147542,
|
|
"loss": 0.0746,
|
|
"step": 11660
|
|
},
|
|
{
|
|
"epoch": 57.01037217815741,
|
|
"grad_norm": 1.5275322198867798,
|
|
"learning_rate": 0.0001240655737704918,
|
|
"loss": 0.0756,
|
|
"step": 11680
|
|
},
|
|
{
|
|
"epoch": 57.10799267846248,
|
|
"grad_norm": 2.310797929763794,
|
|
"learning_rate": 0.0001239344262295082,
|
|
"loss": 0.057,
|
|
"step": 11700
|
|
},
|
|
{
|
|
"epoch": 57.20561317876754,
|
|
"grad_norm": 1.903204083442688,
|
|
"learning_rate": 0.0001238032786885246,
|
|
"loss": 0.0657,
|
|
"step": 11720
|
|
},
|
|
{
|
|
"epoch": 57.303233679072605,
|
|
"grad_norm": 2.115591049194336,
|
|
"learning_rate": 0.000123672131147541,
|
|
"loss": 0.0624,
|
|
"step": 11740
|
|
},
|
|
{
|
|
"epoch": 57.40085417937767,
|
|
"grad_norm": 2.1732656955718994,
|
|
"learning_rate": 0.0001235409836065574,
|
|
"loss": 0.0636,
|
|
"step": 11760
|
|
},
|
|
{
|
|
"epoch": 57.49847467968274,
|
|
"grad_norm": 2.2554845809936523,
|
|
"learning_rate": 0.00012340983606557376,
|
|
"loss": 0.0691,
|
|
"step": 11780
|
|
},
|
|
{
|
|
"epoch": 57.5960951799878,
|
|
"grad_norm": 2.994563341140747,
|
|
"learning_rate": 0.00012327868852459017,
|
|
"loss": 0.0724,
|
|
"step": 11800
|
|
},
|
|
{
|
|
"epoch": 57.69371568029286,
|
|
"grad_norm": 2.824934482574463,
|
|
"learning_rate": 0.00012314754098360656,
|
|
"loss": 0.07,
|
|
"step": 11820
|
|
},
|
|
{
|
|
"epoch": 57.79133618059792,
|
|
"grad_norm": 2.435310125350952,
|
|
"learning_rate": 0.00012301639344262295,
|
|
"loss": 0.0753,
|
|
"step": 11840
|
|
},
|
|
{
|
|
"epoch": 57.88895668090299,
|
|
"grad_norm": 2.140200138092041,
|
|
"learning_rate": 0.00012288524590163934,
|
|
"loss": 0.0762,
|
|
"step": 11860
|
|
},
|
|
{
|
|
"epoch": 57.986577181208055,
|
|
"grad_norm": 2.013983726501465,
|
|
"learning_rate": 0.00012275409836065573,
|
|
"loss": 0.067,
|
|
"step": 11880
|
|
},
|
|
{
|
|
"epoch": 58.08419768151312,
|
|
"grad_norm": 1.8855304718017578,
|
|
"learning_rate": 0.00012262295081967215,
|
|
"loss": 0.0606,
|
|
"step": 11900
|
|
},
|
|
{
|
|
"epoch": 58.18181818181818,
|
|
"grad_norm": 2.3649916648864746,
|
|
"learning_rate": 0.0001224918032786885,
|
|
"loss": 0.0626,
|
|
"step": 11920
|
|
},
|
|
{
|
|
"epoch": 58.27943868212325,
|
|
"grad_norm": 2.566420078277588,
|
|
"learning_rate": 0.00012236065573770493,
|
|
"loss": 0.059,
|
|
"step": 11940
|
|
},
|
|
{
|
|
"epoch": 58.37705918242831,
|
|
"grad_norm": 2.3648924827575684,
|
|
"learning_rate": 0.00012222950819672132,
|
|
"loss": 0.0644,
|
|
"step": 11960
|
|
},
|
|
{
|
|
"epoch": 58.474679682733374,
|
|
"grad_norm": 1.7778581380844116,
|
|
"learning_rate": 0.00012209836065573773,
|
|
"loss": 0.0691,
|
|
"step": 11980
|
|
},
|
|
{
|
|
"epoch": 58.572300183038436,
|
|
"grad_norm": 1.8090124130249023,
|
|
"learning_rate": 0.0001219672131147541,
|
|
"loss": 0.0685,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 58.669920683343506,
|
|
"grad_norm": 2.2762486934661865,
|
|
"learning_rate": 0.0001218360655737705,
|
|
"loss": 0.0634,
|
|
"step": 12020
|
|
},
|
|
{
|
|
"epoch": 58.76754118364857,
|
|
"grad_norm": 2.279273509979248,
|
|
"learning_rate": 0.0001217049180327869,
|
|
"loss": 0.0681,
|
|
"step": 12040
|
|
},
|
|
{
|
|
"epoch": 58.86516168395363,
|
|
"grad_norm": 1.5724430084228516,
|
|
"learning_rate": 0.00012157377049180328,
|
|
"loss": 0.0712,
|
|
"step": 12060
|
|
},
|
|
{
|
|
"epoch": 58.96278218425869,
|
|
"grad_norm": 1.7655867338180542,
|
|
"learning_rate": 0.00012144262295081968,
|
|
"loss": 0.0711,
|
|
"step": 12080
|
|
},
|
|
{
|
|
"epoch": 59.060402684563755,
|
|
"grad_norm": 1.0987945795059204,
|
|
"learning_rate": 0.00012131147540983607,
|
|
"loss": 0.0614,
|
|
"step": 12100
|
|
},
|
|
{
|
|
"epoch": 59.158023184868824,
|
|
"grad_norm": 1.433910608291626,
|
|
"learning_rate": 0.00012118032786885247,
|
|
"loss": 0.0522,
|
|
"step": 12120
|
|
},
|
|
{
|
|
"epoch": 59.25564368517389,
|
|
"grad_norm": 1.4189090728759766,
|
|
"learning_rate": 0.00012104918032786885,
|
|
"loss": 0.0603,
|
|
"step": 12140
|
|
},
|
|
{
|
|
"epoch": 59.35326418547895,
|
|
"grad_norm": 1.915209174156189,
|
|
"learning_rate": 0.00012091803278688525,
|
|
"loss": 0.0631,
|
|
"step": 12160
|
|
},
|
|
{
|
|
"epoch": 59.45088468578401,
|
|
"grad_norm": 2.0415852069854736,
|
|
"learning_rate": 0.00012078688524590165,
|
|
"loss": 0.065,
|
|
"step": 12180
|
|
},
|
|
{
|
|
"epoch": 59.54850518608908,
|
|
"grad_norm": 1.8674949407577515,
|
|
"learning_rate": 0.00012065573770491804,
|
|
"loss": 0.0636,
|
|
"step": 12200
|
|
},
|
|
{
|
|
"epoch": 59.64612568639414,
|
|
"grad_norm": 3.768343687057495,
|
|
"learning_rate": 0.00012052459016393443,
|
|
"loss": 0.0666,
|
|
"step": 12220
|
|
},
|
|
{
|
|
"epoch": 59.743746186699205,
|
|
"grad_norm": 1.8944804668426514,
|
|
"learning_rate": 0.00012039344262295082,
|
|
"loss": 0.0698,
|
|
"step": 12240
|
|
},
|
|
{
|
|
"epoch": 59.84136668700427,
|
|
"grad_norm": 2.296515464782715,
|
|
"learning_rate": 0.00012026229508196722,
|
|
"loss": 0.0715,
|
|
"step": 12260
|
|
},
|
|
{
|
|
"epoch": 59.93898718730934,
|
|
"grad_norm": 1.957593560218811,
|
|
"learning_rate": 0.0001201311475409836,
|
|
"loss": 0.0681,
|
|
"step": 12280
|
|
},
|
|
{
|
|
"epoch": 60.0366076876144,
|
|
"grad_norm": 1.8776521682739258,
|
|
"learning_rate": 0.00012,
|
|
"loss": 0.0676,
|
|
"step": 12300
|
|
},
|
|
{
|
|
"epoch": 60.13422818791946,
|
|
"grad_norm": 1.3199673891067505,
|
|
"learning_rate": 0.0001198688524590164,
|
|
"loss": 0.0515,
|
|
"step": 12320
|
|
},
|
|
{
|
|
"epoch": 60.231848688224524,
|
|
"grad_norm": 1.8994742631912231,
|
|
"learning_rate": 0.0001197377049180328,
|
|
"loss": 0.0566,
|
|
"step": 12340
|
|
},
|
|
{
|
|
"epoch": 60.32946918852959,
|
|
"grad_norm": 1.8091648817062378,
|
|
"learning_rate": 0.00011960655737704917,
|
|
"loss": 0.0628,
|
|
"step": 12360
|
|
},
|
|
{
|
|
"epoch": 60.427089688834656,
|
|
"grad_norm": 1.7513519525527954,
|
|
"learning_rate": 0.00011947540983606557,
|
|
"loss": 0.0652,
|
|
"step": 12380
|
|
},
|
|
{
|
|
"epoch": 60.52471018913972,
|
|
"grad_norm": 1.8451545238494873,
|
|
"learning_rate": 0.00011934426229508198,
|
|
"loss": 0.063,
|
|
"step": 12400
|
|
},
|
|
{
|
|
"epoch": 60.62233068944478,
|
|
"grad_norm": 1.6873937845230103,
|
|
"learning_rate": 0.00011921311475409838,
|
|
"loss": 0.0666,
|
|
"step": 12420
|
|
},
|
|
{
|
|
"epoch": 60.71995118974985,
|
|
"grad_norm": 2.2382214069366455,
|
|
"learning_rate": 0.00011908196721311476,
|
|
"loss": 0.0593,
|
|
"step": 12440
|
|
},
|
|
{
|
|
"epoch": 60.81757169005491,
|
|
"grad_norm": 1.4879833459854126,
|
|
"learning_rate": 0.00011895081967213116,
|
|
"loss": 0.0718,
|
|
"step": 12460
|
|
},
|
|
{
|
|
"epoch": 60.915192190359974,
|
|
"grad_norm": 1.8051949739456177,
|
|
"learning_rate": 0.00011881967213114755,
|
|
"loss": 0.0659,
|
|
"step": 12480
|
|
},
|
|
{
|
|
"epoch": 61.01281269066504,
|
|
"grad_norm": 2.070537567138672,
|
|
"learning_rate": 0.00011868852459016392,
|
|
"loss": 0.0647,
|
|
"step": 12500
|
|
},
|
|
{
|
|
"epoch": 61.110433190970106,
|
|
"grad_norm": 2.195227861404419,
|
|
"learning_rate": 0.00011855737704918033,
|
|
"loss": 0.0516,
|
|
"step": 12520
|
|
},
|
|
{
|
|
"epoch": 61.20805369127517,
|
|
"grad_norm": 2.3762006759643555,
|
|
"learning_rate": 0.00011842622950819673,
|
|
"loss": 0.056,
|
|
"step": 12540
|
|
},
|
|
{
|
|
"epoch": 61.30567419158023,
|
|
"grad_norm": 1.836585521697998,
|
|
"learning_rate": 0.00011829508196721313,
|
|
"loss": 0.0624,
|
|
"step": 12560
|
|
},
|
|
{
|
|
"epoch": 61.40329469188529,
|
|
"grad_norm": 1.6185029745101929,
|
|
"learning_rate": 0.00011816393442622951,
|
|
"loss": 0.0591,
|
|
"step": 12580
|
|
},
|
|
{
|
|
"epoch": 61.50091519219036,
|
|
"grad_norm": 2.116314649581909,
|
|
"learning_rate": 0.00011803278688524591,
|
|
"loss": 0.0615,
|
|
"step": 12600
|
|
},
|
|
{
|
|
"epoch": 61.598535692495425,
|
|
"grad_norm": 2.873701333999634,
|
|
"learning_rate": 0.0001179016393442623,
|
|
"loss": 0.0624,
|
|
"step": 12620
|
|
},
|
|
{
|
|
"epoch": 61.69615619280049,
|
|
"grad_norm": 1.8966227769851685,
|
|
"learning_rate": 0.0001177704918032787,
|
|
"loss": 0.0643,
|
|
"step": 12640
|
|
},
|
|
{
|
|
"epoch": 61.79377669310555,
|
|
"grad_norm": 2.343330144882202,
|
|
"learning_rate": 0.00011763934426229508,
|
|
"loss": 0.0637,
|
|
"step": 12660
|
|
},
|
|
{
|
|
"epoch": 61.89139719341062,
|
|
"grad_norm": 2.882538318634033,
|
|
"learning_rate": 0.00011750819672131148,
|
|
"loss": 0.0678,
|
|
"step": 12680
|
|
},
|
|
{
|
|
"epoch": 61.98901769371568,
|
|
"grad_norm": 3.441783905029297,
|
|
"learning_rate": 0.00011737704918032789,
|
|
"loss": 0.0667,
|
|
"step": 12700
|
|
},
|
|
{
|
|
"epoch": 62.08663819402074,
|
|
"grad_norm": 1.9193288087844849,
|
|
"learning_rate": 0.00011724590163934426,
|
|
"loss": 0.0512,
|
|
"step": 12720
|
|
},
|
|
{
|
|
"epoch": 62.184258694325806,
|
|
"grad_norm": 1.326977252960205,
|
|
"learning_rate": 0.00011711475409836066,
|
|
"loss": 0.0602,
|
|
"step": 12740
|
|
},
|
|
{
|
|
"epoch": 62.281879194630875,
|
|
"grad_norm": 1.6590323448181152,
|
|
"learning_rate": 0.00011698360655737705,
|
|
"loss": 0.0564,
|
|
"step": 12760
|
|
},
|
|
{
|
|
"epoch": 62.37949969493594,
|
|
"grad_norm": 1.5968765020370483,
|
|
"learning_rate": 0.00011685245901639346,
|
|
"loss": 0.0591,
|
|
"step": 12780
|
|
},
|
|
{
|
|
"epoch": 62.477120195241,
|
|
"grad_norm": 1.674349069595337,
|
|
"learning_rate": 0.00011672131147540983,
|
|
"loss": 0.0603,
|
|
"step": 12800
|
|
},
|
|
{
|
|
"epoch": 62.57474069554606,
|
|
"grad_norm": 2.2285475730895996,
|
|
"learning_rate": 0.00011659016393442623,
|
|
"loss": 0.0578,
|
|
"step": 12820
|
|
},
|
|
{
|
|
"epoch": 62.67236119585113,
|
|
"grad_norm": 1.4100092649459839,
|
|
"learning_rate": 0.00011645901639344264,
|
|
"loss": 0.0652,
|
|
"step": 12840
|
|
},
|
|
{
|
|
"epoch": 62.769981696156194,
|
|
"grad_norm": 2.5096209049224854,
|
|
"learning_rate": 0.00011632786885245903,
|
|
"loss": 0.0551,
|
|
"step": 12860
|
|
},
|
|
{
|
|
"epoch": 62.867602196461256,
|
|
"grad_norm": 3.1331403255462646,
|
|
"learning_rate": 0.0001161967213114754,
|
|
"loss": 0.0662,
|
|
"step": 12880
|
|
},
|
|
{
|
|
"epoch": 62.96522269676632,
|
|
"grad_norm": 1.8345986604690552,
|
|
"learning_rate": 0.0001160655737704918,
|
|
"loss": 0.0671,
|
|
"step": 12900
|
|
},
|
|
{
|
|
"epoch": 63.06284319707139,
|
|
"grad_norm": 1.7236154079437256,
|
|
"learning_rate": 0.00011593442622950821,
|
|
"loss": 0.0553,
|
|
"step": 12920
|
|
},
|
|
{
|
|
"epoch": 63.16046369737645,
|
|
"grad_norm": 1.664116621017456,
|
|
"learning_rate": 0.00011580327868852458,
|
|
"loss": 0.0554,
|
|
"step": 12940
|
|
},
|
|
{
|
|
"epoch": 63.25808419768151,
|
|
"grad_norm": 1.4789968729019165,
|
|
"learning_rate": 0.00011567213114754099,
|
|
"loss": 0.0562,
|
|
"step": 12960
|
|
},
|
|
{
|
|
"epoch": 63.355704697986575,
|
|
"grad_norm": 2.4718666076660156,
|
|
"learning_rate": 0.00011554098360655739,
|
|
"loss": 0.0625,
|
|
"step": 12980
|
|
},
|
|
{
|
|
"epoch": 63.453325198291644,
|
|
"grad_norm": 2.611966133117676,
|
|
"learning_rate": 0.00011540983606557378,
|
|
"loss": 0.0564,
|
|
"step": 13000
|
|
},
|
|
{
|
|
"epoch": 63.550945698596706,
|
|
"grad_norm": 2.4028069972991943,
|
|
"learning_rate": 0.00011527868852459016,
|
|
"loss": 0.0629,
|
|
"step": 13020
|
|
},
|
|
{
|
|
"epoch": 63.64856619890177,
|
|
"grad_norm": 1.251297950744629,
|
|
"learning_rate": 0.00011514754098360656,
|
|
"loss": 0.0608,
|
|
"step": 13040
|
|
},
|
|
{
|
|
"epoch": 63.74618669920683,
|
|
"grad_norm": 2.8238635063171387,
|
|
"learning_rate": 0.00011501639344262296,
|
|
"loss": 0.0619,
|
|
"step": 13060
|
|
},
|
|
{
|
|
"epoch": 63.8438071995119,
|
|
"grad_norm": 2.705125570297241,
|
|
"learning_rate": 0.00011488524590163936,
|
|
"loss": 0.0631,
|
|
"step": 13080
|
|
},
|
|
{
|
|
"epoch": 63.94142769981696,
|
|
"grad_norm": 1.7994593381881714,
|
|
"learning_rate": 0.00011475409836065574,
|
|
"loss": 0.0664,
|
|
"step": 13100
|
|
},
|
|
{
|
|
"epoch": 64.03904820012202,
|
|
"grad_norm": 3.055631399154663,
|
|
"learning_rate": 0.00011462295081967214,
|
|
"loss": 0.0561,
|
|
"step": 13120
|
|
},
|
|
{
|
|
"epoch": 64.1366687004271,
|
|
"grad_norm": 1.7646046876907349,
|
|
"learning_rate": 0.00011449180327868853,
|
|
"loss": 0.0542,
|
|
"step": 13140
|
|
},
|
|
{
|
|
"epoch": 64.23428920073215,
|
|
"grad_norm": 1.33475923538208,
|
|
"learning_rate": 0.00011436065573770491,
|
|
"loss": 0.0535,
|
|
"step": 13160
|
|
},
|
|
{
|
|
"epoch": 64.33190970103722,
|
|
"grad_norm": 1.4737622737884521,
|
|
"learning_rate": 0.00011422950819672131,
|
|
"loss": 0.0527,
|
|
"step": 13180
|
|
},
|
|
{
|
|
"epoch": 64.42953020134229,
|
|
"grad_norm": 2.2712624073028564,
|
|
"learning_rate": 0.00011409836065573771,
|
|
"loss": 0.0563,
|
|
"step": 13200
|
|
},
|
|
{
|
|
"epoch": 64.52715070164734,
|
|
"grad_norm": 1.3113006353378296,
|
|
"learning_rate": 0.00011396721311475412,
|
|
"loss": 0.0529,
|
|
"step": 13220
|
|
},
|
|
{
|
|
"epoch": 64.62477120195241,
|
|
"grad_norm": 1.557981014251709,
|
|
"learning_rate": 0.00011383606557377049,
|
|
"loss": 0.0625,
|
|
"step": 13240
|
|
},
|
|
{
|
|
"epoch": 64.72239170225747,
|
|
"grad_norm": 1.8564097881317139,
|
|
"learning_rate": 0.00011370491803278688,
|
|
"loss": 0.0638,
|
|
"step": 13260
|
|
},
|
|
{
|
|
"epoch": 64.82001220256254,
|
|
"grad_norm": 1.741255521774292,
|
|
"learning_rate": 0.00011357377049180329,
|
|
"loss": 0.0591,
|
|
"step": 13280
|
|
},
|
|
{
|
|
"epoch": 64.9176327028676,
|
|
"grad_norm": 1.85543692111969,
|
|
"learning_rate": 0.00011344262295081969,
|
|
"loss": 0.0628,
|
|
"step": 13300
|
|
},
|
|
{
|
|
"epoch": 65.01525320317266,
|
|
"grad_norm": 1.9299827814102173,
|
|
"learning_rate": 0.00011331147540983606,
|
|
"loss": 0.0553,
|
|
"step": 13320
|
|
},
|
|
{
|
|
"epoch": 65.11287370347773,
|
|
"grad_norm": 1.5189353227615356,
|
|
"learning_rate": 0.00011318032786885247,
|
|
"loss": 0.0502,
|
|
"step": 13340
|
|
},
|
|
{
|
|
"epoch": 65.2104942037828,
|
|
"grad_norm": 1.8111690282821655,
|
|
"learning_rate": 0.00011304918032786887,
|
|
"loss": 0.0514,
|
|
"step": 13360
|
|
},
|
|
{
|
|
"epoch": 65.30811470408786,
|
|
"grad_norm": 2.5628137588500977,
|
|
"learning_rate": 0.00011291803278688525,
|
|
"loss": 0.0479,
|
|
"step": 13380
|
|
},
|
|
{
|
|
"epoch": 65.40573520439293,
|
|
"grad_norm": 1.4838755130767822,
|
|
"learning_rate": 0.00011278688524590164,
|
|
"loss": 0.0591,
|
|
"step": 13400
|
|
},
|
|
{
|
|
"epoch": 65.50335570469798,
|
|
"grad_norm": 1.512497067451477,
|
|
"learning_rate": 0.00011265573770491804,
|
|
"loss": 0.0554,
|
|
"step": 13420
|
|
},
|
|
{
|
|
"epoch": 65.60097620500305,
|
|
"grad_norm": 2.546452283859253,
|
|
"learning_rate": 0.00011252459016393444,
|
|
"loss": 0.0567,
|
|
"step": 13440
|
|
},
|
|
{
|
|
"epoch": 65.69859670530812,
|
|
"grad_norm": 3.046600580215454,
|
|
"learning_rate": 0.00011239344262295082,
|
|
"loss": 0.0614,
|
|
"step": 13460
|
|
},
|
|
{
|
|
"epoch": 65.79621720561317,
|
|
"grad_norm": 2.1981520652770996,
|
|
"learning_rate": 0.00011226229508196722,
|
|
"loss": 0.0631,
|
|
"step": 13480
|
|
},
|
|
{
|
|
"epoch": 65.89383770591824,
|
|
"grad_norm": 2.0264618396759033,
|
|
"learning_rate": 0.00011213114754098362,
|
|
"loss": 0.0613,
|
|
"step": 13500
|
|
},
|
|
{
|
|
"epoch": 65.99145820622331,
|
|
"grad_norm": 5.12288236618042,
|
|
"learning_rate": 0.00011200000000000001,
|
|
"loss": 0.06,
|
|
"step": 13520
|
|
},
|
|
{
|
|
"epoch": 66.08907870652837,
|
|
"grad_norm": 1.6284067630767822,
|
|
"learning_rate": 0.00011186885245901639,
|
|
"loss": 0.0492,
|
|
"step": 13540
|
|
},
|
|
{
|
|
"epoch": 66.18669920683344,
|
|
"grad_norm": 1.5839916467666626,
|
|
"learning_rate": 0.00011173770491803279,
|
|
"loss": 0.0522,
|
|
"step": 13560
|
|
},
|
|
{
|
|
"epoch": 66.2843197071385,
|
|
"grad_norm": 2.381410598754883,
|
|
"learning_rate": 0.0001116065573770492,
|
|
"loss": 0.054,
|
|
"step": 13580
|
|
},
|
|
{
|
|
"epoch": 66.38194020744356,
|
|
"grad_norm": 2.1883935928344727,
|
|
"learning_rate": 0.00011147540983606557,
|
|
"loss": 0.0539,
|
|
"step": 13600
|
|
},
|
|
{
|
|
"epoch": 66.47956070774863,
|
|
"grad_norm": 1.3095104694366455,
|
|
"learning_rate": 0.00011134426229508197,
|
|
"loss": 0.0548,
|
|
"step": 13620
|
|
},
|
|
{
|
|
"epoch": 66.57718120805369,
|
|
"grad_norm": 3.0383543968200684,
|
|
"learning_rate": 0.00011121311475409838,
|
|
"loss": 0.0559,
|
|
"step": 13640
|
|
},
|
|
{
|
|
"epoch": 66.67480170835876,
|
|
"grad_norm": 1.848026990890503,
|
|
"learning_rate": 0.00011108196721311476,
|
|
"loss": 0.0582,
|
|
"step": 13660
|
|
},
|
|
{
|
|
"epoch": 66.77242220866383,
|
|
"grad_norm": 3.3856444358825684,
|
|
"learning_rate": 0.00011095081967213114,
|
|
"loss": 0.0583,
|
|
"step": 13680
|
|
},
|
|
{
|
|
"epoch": 66.87004270896888,
|
|
"grad_norm": 2.1376798152923584,
|
|
"learning_rate": 0.00011081967213114754,
|
|
"loss": 0.0542,
|
|
"step": 13700
|
|
},
|
|
{
|
|
"epoch": 66.96766320927395,
|
|
"grad_norm": 1.6410188674926758,
|
|
"learning_rate": 0.00011068852459016395,
|
|
"loss": 0.0648,
|
|
"step": 13720
|
|
},
|
|
{
|
|
"epoch": 67.065283709579,
|
|
"grad_norm": 2.1899428367614746,
|
|
"learning_rate": 0.00011055737704918035,
|
|
"loss": 0.0518,
|
|
"step": 13740
|
|
},
|
|
{
|
|
"epoch": 67.16290420988408,
|
|
"grad_norm": 1.662888526916504,
|
|
"learning_rate": 0.00011042622950819672,
|
|
"loss": 0.0481,
|
|
"step": 13760
|
|
},
|
|
{
|
|
"epoch": 67.26052471018915,
|
|
"grad_norm": 1.8363763093948364,
|
|
"learning_rate": 0.00011029508196721311,
|
|
"loss": 0.0521,
|
|
"step": 13780
|
|
},
|
|
{
|
|
"epoch": 67.3581452104942,
|
|
"grad_norm": 1.5296106338500977,
|
|
"learning_rate": 0.00011016393442622952,
|
|
"loss": 0.0569,
|
|
"step": 13800
|
|
},
|
|
{
|
|
"epoch": 67.45576571079927,
|
|
"grad_norm": 3.030101776123047,
|
|
"learning_rate": 0.00011003278688524589,
|
|
"loss": 0.0542,
|
|
"step": 13820
|
|
},
|
|
{
|
|
"epoch": 67.55338621110434,
|
|
"grad_norm": 1.4792683124542236,
|
|
"learning_rate": 0.0001099016393442623,
|
|
"loss": 0.0571,
|
|
"step": 13840
|
|
},
|
|
{
|
|
"epoch": 67.6510067114094,
|
|
"grad_norm": 1.8390675783157349,
|
|
"learning_rate": 0.0001097704918032787,
|
|
"loss": 0.0562,
|
|
"step": 13860
|
|
},
|
|
{
|
|
"epoch": 67.74862721171446,
|
|
"grad_norm": 1.6094483137130737,
|
|
"learning_rate": 0.0001096393442622951,
|
|
"loss": 0.0529,
|
|
"step": 13880
|
|
},
|
|
{
|
|
"epoch": 67.84624771201952,
|
|
"grad_norm": 2.030127763748169,
|
|
"learning_rate": 0.00010950819672131148,
|
|
"loss": 0.0572,
|
|
"step": 13900
|
|
},
|
|
{
|
|
"epoch": 67.94386821232459,
|
|
"grad_norm": 1.7216037511825562,
|
|
"learning_rate": 0.00010937704918032787,
|
|
"loss": 0.0618,
|
|
"step": 13920
|
|
},
|
|
{
|
|
"epoch": 68.04148871262966,
|
|
"grad_norm": 2.081637382507324,
|
|
"learning_rate": 0.00010924590163934427,
|
|
"loss": 0.0524,
|
|
"step": 13940
|
|
},
|
|
{
|
|
"epoch": 68.13910921293471,
|
|
"grad_norm": 1.6611350774765015,
|
|
"learning_rate": 0.00010911475409836067,
|
|
"loss": 0.0463,
|
|
"step": 13960
|
|
},
|
|
{
|
|
"epoch": 68.23672971323978,
|
|
"grad_norm": 1.8240830898284912,
|
|
"learning_rate": 0.00010898360655737705,
|
|
"loss": 0.0528,
|
|
"step": 13980
|
|
},
|
|
{
|
|
"epoch": 68.33435021354484,
|
|
"grad_norm": 2.299060344696045,
|
|
"learning_rate": 0.00010885245901639345,
|
|
"loss": 0.0525,
|
|
"step": 14000
|
|
},
|
|
{
|
|
"epoch": 68.4319707138499,
|
|
"grad_norm": 2.1000702381134033,
|
|
"learning_rate": 0.00010872131147540985,
|
|
"loss": 0.0534,
|
|
"step": 14020
|
|
},
|
|
{
|
|
"epoch": 68.52959121415498,
|
|
"grad_norm": 1.4629288911819458,
|
|
"learning_rate": 0.00010859016393442623,
|
|
"loss": 0.05,
|
|
"step": 14040
|
|
},
|
|
{
|
|
"epoch": 68.62721171446003,
|
|
"grad_norm": 1.571041464805603,
|
|
"learning_rate": 0.00010845901639344262,
|
|
"loss": 0.0535,
|
|
"step": 14060
|
|
},
|
|
{
|
|
"epoch": 68.7248322147651,
|
|
"grad_norm": 1.6722811460494995,
|
|
"learning_rate": 0.00010832786885245902,
|
|
"loss": 0.0498,
|
|
"step": 14080
|
|
},
|
|
{
|
|
"epoch": 68.82245271507017,
|
|
"grad_norm": 1.8763340711593628,
|
|
"learning_rate": 0.00010819672131147543,
|
|
"loss": 0.0547,
|
|
"step": 14100
|
|
},
|
|
{
|
|
"epoch": 68.92007321537523,
|
|
"grad_norm": 2.936068534851074,
|
|
"learning_rate": 0.0001080655737704918,
|
|
"loss": 0.055,
|
|
"step": 14120
|
|
},
|
|
{
|
|
"epoch": 69.0176937156803,
|
|
"grad_norm": 2.24238920211792,
|
|
"learning_rate": 0.0001079344262295082,
|
|
"loss": 0.059,
|
|
"step": 14140
|
|
},
|
|
{
|
|
"epoch": 69.11531421598535,
|
|
"grad_norm": 1.4389880895614624,
|
|
"learning_rate": 0.00010780327868852461,
|
|
"loss": 0.0453,
|
|
"step": 14160
|
|
},
|
|
{
|
|
"epoch": 69.21293471629042,
|
|
"grad_norm": 1.5990976095199585,
|
|
"learning_rate": 0.00010767213114754098,
|
|
"loss": 0.0483,
|
|
"step": 14180
|
|
},
|
|
{
|
|
"epoch": 69.31055521659549,
|
|
"grad_norm": 1.6288248300552368,
|
|
"learning_rate": 0.00010754098360655737,
|
|
"loss": 0.0492,
|
|
"step": 14200
|
|
},
|
|
{
|
|
"epoch": 69.40817571690054,
|
|
"grad_norm": 1.6476655006408691,
|
|
"learning_rate": 0.00010740983606557378,
|
|
"loss": 0.0485,
|
|
"step": 14220
|
|
},
|
|
{
|
|
"epoch": 69.50579621720561,
|
|
"grad_norm": 1.6670396327972412,
|
|
"learning_rate": 0.00010727868852459018,
|
|
"loss": 0.0573,
|
|
"step": 14240
|
|
},
|
|
{
|
|
"epoch": 69.60341671751068,
|
|
"grad_norm": 1.6438359022140503,
|
|
"learning_rate": 0.00010714754098360655,
|
|
"loss": 0.0511,
|
|
"step": 14260
|
|
},
|
|
{
|
|
"epoch": 69.70103721781574,
|
|
"grad_norm": 2.0703489780426025,
|
|
"learning_rate": 0.00010701639344262296,
|
|
"loss": 0.056,
|
|
"step": 14280
|
|
},
|
|
{
|
|
"epoch": 69.79865771812081,
|
|
"grad_norm": 1.2886505126953125,
|
|
"learning_rate": 0.00010688524590163935,
|
|
"loss": 0.0518,
|
|
"step": 14300
|
|
},
|
|
{
|
|
"epoch": 69.89627821842586,
|
|
"grad_norm": 1.869315505027771,
|
|
"learning_rate": 0.00010675409836065575,
|
|
"loss": 0.0613,
|
|
"step": 14320
|
|
},
|
|
{
|
|
"epoch": 69.99389871873093,
|
|
"grad_norm": 1.5631581544876099,
|
|
"learning_rate": 0.00010662295081967212,
|
|
"loss": 0.0501,
|
|
"step": 14340
|
|
},
|
|
{
|
|
"epoch": 70.091519219036,
|
|
"grad_norm": 2.5238254070281982,
|
|
"learning_rate": 0.00010649180327868853,
|
|
"loss": 0.0429,
|
|
"step": 14360
|
|
},
|
|
{
|
|
"epoch": 70.18913971934106,
|
|
"grad_norm": 1.3227728605270386,
|
|
"learning_rate": 0.00010636065573770493,
|
|
"loss": 0.0474,
|
|
"step": 14380
|
|
},
|
|
{
|
|
"epoch": 70.28676021964613,
|
|
"grad_norm": 2.284006118774414,
|
|
"learning_rate": 0.0001062295081967213,
|
|
"loss": 0.0513,
|
|
"step": 14400
|
|
},
|
|
{
|
|
"epoch": 70.3843807199512,
|
|
"grad_norm": 1.3161708116531372,
|
|
"learning_rate": 0.00010609836065573771,
|
|
"loss": 0.0517,
|
|
"step": 14420
|
|
},
|
|
{
|
|
"epoch": 70.48200122025625,
|
|
"grad_norm": 1.7371370792388916,
|
|
"learning_rate": 0.0001059672131147541,
|
|
"loss": 0.0494,
|
|
"step": 14440
|
|
},
|
|
{
|
|
"epoch": 70.57962172056132,
|
|
"grad_norm": 3.2471296787261963,
|
|
"learning_rate": 0.0001058360655737705,
|
|
"loss": 0.0501,
|
|
"step": 14460
|
|
},
|
|
{
|
|
"epoch": 70.67724222086638,
|
|
"grad_norm": 1.9515056610107422,
|
|
"learning_rate": 0.00010570491803278688,
|
|
"loss": 0.0562,
|
|
"step": 14480
|
|
},
|
|
{
|
|
"epoch": 70.77486272117144,
|
|
"grad_norm": 1.9817485809326172,
|
|
"learning_rate": 0.00010557377049180328,
|
|
"loss": 0.0555,
|
|
"step": 14500
|
|
},
|
|
{
|
|
"epoch": 70.87248322147651,
|
|
"grad_norm": 1.4870388507843018,
|
|
"learning_rate": 0.00010544262295081968,
|
|
"loss": 0.0529,
|
|
"step": 14520
|
|
},
|
|
{
|
|
"epoch": 70.97010372178157,
|
|
"grad_norm": 1.6690961122512817,
|
|
"learning_rate": 0.00010531147540983609,
|
|
"loss": 0.0525,
|
|
"step": 14540
|
|
},
|
|
{
|
|
"epoch": 71.06772422208664,
|
|
"grad_norm": 1.4471197128295898,
|
|
"learning_rate": 0.00010518032786885246,
|
|
"loss": 0.0473,
|
|
"step": 14560
|
|
},
|
|
{
|
|
"epoch": 71.16534472239171,
|
|
"grad_norm": 2.1219794750213623,
|
|
"learning_rate": 0.00010504918032786885,
|
|
"loss": 0.044,
|
|
"step": 14580
|
|
},
|
|
{
|
|
"epoch": 71.26296522269676,
|
|
"grad_norm": 2.1904103755950928,
|
|
"learning_rate": 0.00010491803278688525,
|
|
"loss": 0.0455,
|
|
"step": 14600
|
|
},
|
|
{
|
|
"epoch": 71.36058572300183,
|
|
"grad_norm": 2.576287269592285,
|
|
"learning_rate": 0.00010478688524590163,
|
|
"loss": 0.047,
|
|
"step": 14620
|
|
},
|
|
{
|
|
"epoch": 71.45820622330689,
|
|
"grad_norm": 2.6635916233062744,
|
|
"learning_rate": 0.00010465573770491803,
|
|
"loss": 0.0461,
|
|
"step": 14640
|
|
},
|
|
{
|
|
"epoch": 71.55582672361196,
|
|
"grad_norm": 1.234979510307312,
|
|
"learning_rate": 0.00010452459016393444,
|
|
"loss": 0.0464,
|
|
"step": 14660
|
|
},
|
|
{
|
|
"epoch": 71.65344722391703,
|
|
"grad_norm": 1.6037148237228394,
|
|
"learning_rate": 0.00010439344262295083,
|
|
"loss": 0.0522,
|
|
"step": 14680
|
|
},
|
|
{
|
|
"epoch": 71.75106772422208,
|
|
"grad_norm": 1.990586280822754,
|
|
"learning_rate": 0.00010426229508196721,
|
|
"loss": 0.0577,
|
|
"step": 14700
|
|
},
|
|
{
|
|
"epoch": 71.84868822452715,
|
|
"grad_norm": 1.5060235261917114,
|
|
"learning_rate": 0.0001041311475409836,
|
|
"loss": 0.0582,
|
|
"step": 14720
|
|
},
|
|
{
|
|
"epoch": 71.94630872483222,
|
|
"grad_norm": 2.0390243530273438,
|
|
"learning_rate": 0.00010400000000000001,
|
|
"loss": 0.0559,
|
|
"step": 14740
|
|
},
|
|
{
|
|
"epoch": 72.04392922513728,
|
|
"grad_norm": 1.5087133646011353,
|
|
"learning_rate": 0.00010386885245901641,
|
|
"loss": 0.0538,
|
|
"step": 14760
|
|
},
|
|
{
|
|
"epoch": 72.14154972544235,
|
|
"grad_norm": 2.0309317111968994,
|
|
"learning_rate": 0.00010373770491803279,
|
|
"loss": 0.0455,
|
|
"step": 14780
|
|
},
|
|
{
|
|
"epoch": 72.2391702257474,
|
|
"grad_norm": 1.643815279006958,
|
|
"learning_rate": 0.00010360655737704919,
|
|
"loss": 0.0449,
|
|
"step": 14800
|
|
},
|
|
{
|
|
"epoch": 72.33679072605247,
|
|
"grad_norm": 1.881631851196289,
|
|
"learning_rate": 0.00010347540983606558,
|
|
"loss": 0.0473,
|
|
"step": 14820
|
|
},
|
|
{
|
|
"epoch": 72.43441122635754,
|
|
"grad_norm": 1.6459357738494873,
|
|
"learning_rate": 0.00010334426229508197,
|
|
"loss": 0.048,
|
|
"step": 14840
|
|
},
|
|
{
|
|
"epoch": 72.5320317266626,
|
|
"grad_norm": 2.058401346206665,
|
|
"learning_rate": 0.00010321311475409836,
|
|
"loss": 0.0435,
|
|
"step": 14860
|
|
},
|
|
{
|
|
"epoch": 72.62965222696766,
|
|
"grad_norm": 1.428796410560608,
|
|
"learning_rate": 0.00010308196721311476,
|
|
"loss": 0.0501,
|
|
"step": 14880
|
|
},
|
|
{
|
|
"epoch": 72.72727272727273,
|
|
"grad_norm": 1.7262030839920044,
|
|
"learning_rate": 0.00010295081967213116,
|
|
"loss": 0.0512,
|
|
"step": 14900
|
|
},
|
|
{
|
|
"epoch": 72.82489322757779,
|
|
"grad_norm": 1.5179914236068726,
|
|
"learning_rate": 0.00010281967213114754,
|
|
"loss": 0.0524,
|
|
"step": 14920
|
|
},
|
|
{
|
|
"epoch": 72.92251372788286,
|
|
"grad_norm": 1.5609859228134155,
|
|
"learning_rate": 0.00010268852459016394,
|
|
"loss": 0.0547,
|
|
"step": 14940
|
|
},
|
|
{
|
|
"epoch": 73.02013422818791,
|
|
"grad_norm": 1.1576975584030151,
|
|
"learning_rate": 0.00010255737704918033,
|
|
"loss": 0.0506,
|
|
"step": 14960
|
|
},
|
|
{
|
|
"epoch": 73.11775472849298,
|
|
"grad_norm": 1.4885412454605103,
|
|
"learning_rate": 0.00010242622950819673,
|
|
"loss": 0.0424,
|
|
"step": 14980
|
|
},
|
|
{
|
|
"epoch": 73.21537522879805,
|
|
"grad_norm": 1.0229533910751343,
|
|
"learning_rate": 0.00010229508196721311,
|
|
"loss": 0.0448,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"epoch": 73.31299572910311,
|
|
"grad_norm": 1.9551867246627808,
|
|
"learning_rate": 0.00010216393442622951,
|
|
"loss": 0.0468,
|
|
"step": 15020
|
|
},
|
|
{
|
|
"epoch": 73.41061622940818,
|
|
"grad_norm": 2.8374905586242676,
|
|
"learning_rate": 0.00010203278688524592,
|
|
"loss": 0.0473,
|
|
"step": 15040
|
|
},
|
|
{
|
|
"epoch": 73.50823672971325,
|
|
"grad_norm": 1.2752137184143066,
|
|
"learning_rate": 0.00010190163934426229,
|
|
"loss": 0.0487,
|
|
"step": 15060
|
|
},
|
|
{
|
|
"epoch": 73.6058572300183,
|
|
"grad_norm": 1.7868965864181519,
|
|
"learning_rate": 0.0001017704918032787,
|
|
"loss": 0.0479,
|
|
"step": 15080
|
|
},
|
|
{
|
|
"epoch": 73.70347773032337,
|
|
"grad_norm": 1.7490606307983398,
|
|
"learning_rate": 0.00010163934426229508,
|
|
"loss": 0.0466,
|
|
"step": 15100
|
|
},
|
|
{
|
|
"epoch": 73.80109823062843,
|
|
"grad_norm": 2.4176886081695557,
|
|
"learning_rate": 0.00010150819672131149,
|
|
"loss": 0.0539,
|
|
"step": 15120
|
|
},
|
|
{
|
|
"epoch": 73.8987187309335,
|
|
"grad_norm": 1.9388647079467773,
|
|
"learning_rate": 0.00010137704918032786,
|
|
"loss": 0.0527,
|
|
"step": 15140
|
|
},
|
|
{
|
|
"epoch": 73.99633923123857,
|
|
"grad_norm": 1.9763060808181763,
|
|
"learning_rate": 0.00010124590163934427,
|
|
"loss": 0.0512,
|
|
"step": 15160
|
|
},
|
|
{
|
|
"epoch": 74.09395973154362,
|
|
"grad_norm": 1.1881762742996216,
|
|
"learning_rate": 0.00010111475409836067,
|
|
"loss": 0.0406,
|
|
"step": 15180
|
|
},
|
|
{
|
|
"epoch": 74.19158023184869,
|
|
"grad_norm": 1.5949184894561768,
|
|
"learning_rate": 0.00010098360655737706,
|
|
"loss": 0.0446,
|
|
"step": 15200
|
|
},
|
|
{
|
|
"epoch": 74.28920073215376,
|
|
"grad_norm": 1.3817694187164307,
|
|
"learning_rate": 0.00010085245901639345,
|
|
"loss": 0.0468,
|
|
"step": 15220
|
|
},
|
|
{
|
|
"epoch": 74.38682123245881,
|
|
"grad_norm": 1.4940481185913086,
|
|
"learning_rate": 0.00010072131147540984,
|
|
"loss": 0.0528,
|
|
"step": 15240
|
|
},
|
|
{
|
|
"epoch": 74.48444173276388,
|
|
"grad_norm": 1.5146404504776,
|
|
"learning_rate": 0.00010059016393442624,
|
|
"loss": 0.0503,
|
|
"step": 15260
|
|
},
|
|
{
|
|
"epoch": 74.58206223306894,
|
|
"grad_norm": 1.4908167123794556,
|
|
"learning_rate": 0.00010045901639344261,
|
|
"loss": 0.0474,
|
|
"step": 15280
|
|
},
|
|
{
|
|
"epoch": 74.67968273337401,
|
|
"grad_norm": 1.9127382040023804,
|
|
"learning_rate": 0.00010032786885245902,
|
|
"loss": 0.0475,
|
|
"step": 15300
|
|
},
|
|
{
|
|
"epoch": 74.77730323367908,
|
|
"grad_norm": 1.9283052682876587,
|
|
"learning_rate": 0.00010019672131147542,
|
|
"loss": 0.0482,
|
|
"step": 15320
|
|
},
|
|
{
|
|
"epoch": 74.87492373398413,
|
|
"grad_norm": 1.434121012687683,
|
|
"learning_rate": 0.00010006557377049181,
|
|
"loss": 0.0484,
|
|
"step": 15340
|
|
},
|
|
{
|
|
"epoch": 74.9725442342892,
|
|
"grad_norm": 1.2398452758789062,
|
|
"learning_rate": 9.99344262295082e-05,
|
|
"loss": 0.0474,
|
|
"step": 15360
|
|
},
|
|
{
|
|
"epoch": 75.07016473459427,
|
|
"grad_norm": 1.463592529296875,
|
|
"learning_rate": 9.980327868852459e-05,
|
|
"loss": 0.0435,
|
|
"step": 15380
|
|
},
|
|
{
|
|
"epoch": 75.16778523489933,
|
|
"grad_norm": 1.5821679830551147,
|
|
"learning_rate": 9.967213114754099e-05,
|
|
"loss": 0.0447,
|
|
"step": 15400
|
|
},
|
|
{
|
|
"epoch": 75.2654057352044,
|
|
"grad_norm": 1.525728702545166,
|
|
"learning_rate": 9.954098360655738e-05,
|
|
"loss": 0.0442,
|
|
"step": 15420
|
|
},
|
|
{
|
|
"epoch": 75.36302623550945,
|
|
"grad_norm": 1.2761436700820923,
|
|
"learning_rate": 9.940983606557378e-05,
|
|
"loss": 0.0464,
|
|
"step": 15440
|
|
},
|
|
{
|
|
"epoch": 75.46064673581452,
|
|
"grad_norm": 1.4206050634384155,
|
|
"learning_rate": 9.927868852459017e-05,
|
|
"loss": 0.0436,
|
|
"step": 15460
|
|
},
|
|
{
|
|
"epoch": 75.55826723611959,
|
|
"grad_norm": 2.7865281105041504,
|
|
"learning_rate": 9.914754098360656e-05,
|
|
"loss": 0.0505,
|
|
"step": 15480
|
|
},
|
|
{
|
|
"epoch": 75.65588773642465,
|
|
"grad_norm": 1.249638557434082,
|
|
"learning_rate": 9.901639344262295e-05,
|
|
"loss": 0.0461,
|
|
"step": 15500
|
|
},
|
|
{
|
|
"epoch": 75.75350823672972,
|
|
"grad_norm": 1.9104782342910767,
|
|
"learning_rate": 9.888524590163934e-05,
|
|
"loss": 0.0465,
|
|
"step": 15520
|
|
},
|
|
{
|
|
"epoch": 75.85112873703477,
|
|
"grad_norm": 1.3011826276779175,
|
|
"learning_rate": 9.875409836065574e-05,
|
|
"loss": 0.0488,
|
|
"step": 15540
|
|
},
|
|
{
|
|
"epoch": 75.94874923733984,
|
|
"grad_norm": 2.3834664821624756,
|
|
"learning_rate": 9.862295081967213e-05,
|
|
"loss": 0.0519,
|
|
"step": 15560
|
|
},
|
|
{
|
|
"epoch": 76.04636973764491,
|
|
"grad_norm": 1.3640445470809937,
|
|
"learning_rate": 9.849180327868854e-05,
|
|
"loss": 0.0449,
|
|
"step": 15580
|
|
},
|
|
{
|
|
"epoch": 76.14399023794996,
|
|
"grad_norm": 1.3013081550598145,
|
|
"learning_rate": 9.836065573770493e-05,
|
|
"loss": 0.0399,
|
|
"step": 15600
|
|
},
|
|
{
|
|
"epoch": 76.24161073825503,
|
|
"grad_norm": 2.726545810699463,
|
|
"learning_rate": 9.822950819672132e-05,
|
|
"loss": 0.0427,
|
|
"step": 15620
|
|
},
|
|
{
|
|
"epoch": 76.3392312385601,
|
|
"grad_norm": 1.2742412090301514,
|
|
"learning_rate": 9.80983606557377e-05,
|
|
"loss": 0.0439,
|
|
"step": 15640
|
|
},
|
|
{
|
|
"epoch": 76.43685173886516,
|
|
"grad_norm": 1.67985999584198,
|
|
"learning_rate": 9.796721311475411e-05,
|
|
"loss": 0.0483,
|
|
"step": 15660
|
|
},
|
|
{
|
|
"epoch": 76.53447223917023,
|
|
"grad_norm": 1.9676116704940796,
|
|
"learning_rate": 9.78360655737705e-05,
|
|
"loss": 0.0442,
|
|
"step": 15680
|
|
},
|
|
{
|
|
"epoch": 76.63209273947528,
|
|
"grad_norm": 2.992687463760376,
|
|
"learning_rate": 9.770491803278689e-05,
|
|
"loss": 0.0429,
|
|
"step": 15700
|
|
},
|
|
{
|
|
"epoch": 76.72971323978035,
|
|
"grad_norm": 1.0530943870544434,
|
|
"learning_rate": 9.757377049180329e-05,
|
|
"loss": 0.0488,
|
|
"step": 15720
|
|
},
|
|
{
|
|
"epoch": 76.82733374008542,
|
|
"grad_norm": 1.5308395624160767,
|
|
"learning_rate": 9.744262295081968e-05,
|
|
"loss": 0.0442,
|
|
"step": 15740
|
|
},
|
|
{
|
|
"epoch": 76.92495424039048,
|
|
"grad_norm": 1.6086573600769043,
|
|
"learning_rate": 9.731147540983607e-05,
|
|
"loss": 0.0504,
|
|
"step": 15760
|
|
},
|
|
{
|
|
"epoch": 77.02257474069555,
|
|
"grad_norm": 0.7401487827301025,
|
|
"learning_rate": 9.718032786885246e-05,
|
|
"loss": 0.0469,
|
|
"step": 15780
|
|
},
|
|
{
|
|
"epoch": 77.12019524100062,
|
|
"grad_norm": 1.6960521936416626,
|
|
"learning_rate": 9.704918032786886e-05,
|
|
"loss": 0.041,
|
|
"step": 15800
|
|
},
|
|
{
|
|
"epoch": 77.21781574130567,
|
|
"grad_norm": 1.638244390487671,
|
|
"learning_rate": 9.691803278688525e-05,
|
|
"loss": 0.0381,
|
|
"step": 15820
|
|
},
|
|
{
|
|
"epoch": 77.31543624161074,
|
|
"grad_norm": 1.749157190322876,
|
|
"learning_rate": 9.678688524590165e-05,
|
|
"loss": 0.0425,
|
|
"step": 15840
|
|
},
|
|
{
|
|
"epoch": 77.4130567419158,
|
|
"grad_norm": 1.9708433151245117,
|
|
"learning_rate": 9.665573770491804e-05,
|
|
"loss": 0.046,
|
|
"step": 15860
|
|
},
|
|
{
|
|
"epoch": 77.51067724222086,
|
|
"grad_norm": 1.3981733322143555,
|
|
"learning_rate": 9.652459016393443e-05,
|
|
"loss": 0.0489,
|
|
"step": 15880
|
|
},
|
|
{
|
|
"epoch": 77.60829774252593,
|
|
"grad_norm": 2.0385096073150635,
|
|
"learning_rate": 9.639344262295082e-05,
|
|
"loss": 0.0442,
|
|
"step": 15900
|
|
},
|
|
{
|
|
"epoch": 77.70591824283099,
|
|
"grad_norm": 1.5670779943466187,
|
|
"learning_rate": 9.626229508196721e-05,
|
|
"loss": 0.0412,
|
|
"step": 15920
|
|
},
|
|
{
|
|
"epoch": 77.80353874313606,
|
|
"grad_norm": 1.3687881231307983,
|
|
"learning_rate": 9.613114754098361e-05,
|
|
"loss": 0.0469,
|
|
"step": 15940
|
|
},
|
|
{
|
|
"epoch": 77.90115924344113,
|
|
"grad_norm": 1.388791799545288,
|
|
"learning_rate": 9.6e-05,
|
|
"loss": 0.0456,
|
|
"step": 15960
|
|
},
|
|
{
|
|
"epoch": 77.99877974374618,
|
|
"grad_norm": 1.5988264083862305,
|
|
"learning_rate": 9.58688524590164e-05,
|
|
"loss": 0.048,
|
|
"step": 15980
|
|
},
|
|
{
|
|
"epoch": 78.09640024405125,
|
|
"grad_norm": 2.0128464698791504,
|
|
"learning_rate": 9.57377049180328e-05,
|
|
"loss": 0.0406,
|
|
"step": 16000
|
|
},
|
|
{
|
|
"epoch": 78.19402074435631,
|
|
"grad_norm": 1.9334806203842163,
|
|
"learning_rate": 9.560655737704918e-05,
|
|
"loss": 0.0402,
|
|
"step": 16020
|
|
},
|
|
{
|
|
"epoch": 78.29164124466138,
|
|
"grad_norm": 2.799999952316284,
|
|
"learning_rate": 9.547540983606557e-05,
|
|
"loss": 0.0415,
|
|
"step": 16040
|
|
},
|
|
{
|
|
"epoch": 78.38926174496645,
|
|
"grad_norm": 1.2322758436203003,
|
|
"learning_rate": 9.534426229508198e-05,
|
|
"loss": 0.0396,
|
|
"step": 16060
|
|
},
|
|
{
|
|
"epoch": 78.4868822452715,
|
|
"grad_norm": 2.0646209716796875,
|
|
"learning_rate": 9.521311475409837e-05,
|
|
"loss": 0.0449,
|
|
"step": 16080
|
|
},
|
|
{
|
|
"epoch": 78.58450274557657,
|
|
"grad_norm": 1.3239318132400513,
|
|
"learning_rate": 9.508196721311476e-05,
|
|
"loss": 0.0473,
|
|
"step": 16100
|
|
},
|
|
{
|
|
"epoch": 78.68212324588164,
|
|
"grad_norm": 1.0266462564468384,
|
|
"learning_rate": 9.495081967213116e-05,
|
|
"loss": 0.047,
|
|
"step": 16120
|
|
},
|
|
{
|
|
"epoch": 78.7797437461867,
|
|
"grad_norm": 1.3568265438079834,
|
|
"learning_rate": 9.481967213114755e-05,
|
|
"loss": 0.0472,
|
|
"step": 16140
|
|
},
|
|
{
|
|
"epoch": 78.87736424649177,
|
|
"grad_norm": 1.8407416343688965,
|
|
"learning_rate": 9.468852459016394e-05,
|
|
"loss": 0.0489,
|
|
"step": 16160
|
|
},
|
|
{
|
|
"epoch": 78.97498474679682,
|
|
"grad_norm": 3.1566367149353027,
|
|
"learning_rate": 9.455737704918033e-05,
|
|
"loss": 0.0447,
|
|
"step": 16180
|
|
},
|
|
{
|
|
"epoch": 79.07260524710189,
|
|
"grad_norm": 1.4140985012054443,
|
|
"learning_rate": 9.442622950819673e-05,
|
|
"loss": 0.0397,
|
|
"step": 16200
|
|
},
|
|
{
|
|
"epoch": 79.17022574740696,
|
|
"grad_norm": 1.774674654006958,
|
|
"learning_rate": 9.429508196721312e-05,
|
|
"loss": 0.0394,
|
|
"step": 16220
|
|
},
|
|
{
|
|
"epoch": 79.26784624771201,
|
|
"grad_norm": 1.8382582664489746,
|
|
"learning_rate": 9.416393442622952e-05,
|
|
"loss": 0.0391,
|
|
"step": 16240
|
|
},
|
|
{
|
|
"epoch": 79.36546674801708,
|
|
"grad_norm": 1.465022087097168,
|
|
"learning_rate": 9.403278688524591e-05,
|
|
"loss": 0.0427,
|
|
"step": 16260
|
|
},
|
|
{
|
|
"epoch": 79.46308724832215,
|
|
"grad_norm": 1.5447912216186523,
|
|
"learning_rate": 9.39016393442623e-05,
|
|
"loss": 0.0435,
|
|
"step": 16280
|
|
},
|
|
{
|
|
"epoch": 79.56070774862721,
|
|
"grad_norm": 1.5630700588226318,
|
|
"learning_rate": 9.377049180327869e-05,
|
|
"loss": 0.0426,
|
|
"step": 16300
|
|
},
|
|
{
|
|
"epoch": 79.65832824893228,
|
|
"grad_norm": 2.1536877155303955,
|
|
"learning_rate": 9.363934426229508e-05,
|
|
"loss": 0.0436,
|
|
"step": 16320
|
|
},
|
|
{
|
|
"epoch": 79.75594874923733,
|
|
"grad_norm": 1.5898195505142212,
|
|
"learning_rate": 9.350819672131148e-05,
|
|
"loss": 0.0488,
|
|
"step": 16340
|
|
},
|
|
{
|
|
"epoch": 79.8535692495424,
|
|
"grad_norm": 1.5002379417419434,
|
|
"learning_rate": 9.337704918032787e-05,
|
|
"loss": 0.0453,
|
|
"step": 16360
|
|
},
|
|
{
|
|
"epoch": 79.95118974984747,
|
|
"grad_norm": 1.7117916345596313,
|
|
"learning_rate": 9.324590163934427e-05,
|
|
"loss": 0.0483,
|
|
"step": 16380
|
|
},
|
|
{
|
|
"epoch": 80.04881025015253,
|
|
"grad_norm": 1.672136664390564,
|
|
"learning_rate": 9.311475409836066e-05,
|
|
"loss": 0.0444,
|
|
"step": 16400
|
|
},
|
|
{
|
|
"epoch": 80.1464307504576,
|
|
"grad_norm": 1.2889288663864136,
|
|
"learning_rate": 9.298360655737705e-05,
|
|
"loss": 0.0377,
|
|
"step": 16420
|
|
},
|
|
{
|
|
"epoch": 80.24405125076267,
|
|
"grad_norm": 1.602657675743103,
|
|
"learning_rate": 9.285245901639344e-05,
|
|
"loss": 0.0431,
|
|
"step": 16440
|
|
},
|
|
{
|
|
"epoch": 80.34167175106772,
|
|
"grad_norm": 1.673161506652832,
|
|
"learning_rate": 9.272131147540985e-05,
|
|
"loss": 0.0392,
|
|
"step": 16460
|
|
},
|
|
{
|
|
"epoch": 80.43929225137279,
|
|
"grad_norm": 1.5590755939483643,
|
|
"learning_rate": 9.259016393442623e-05,
|
|
"loss": 0.0401,
|
|
"step": 16480
|
|
},
|
|
{
|
|
"epoch": 80.53691275167785,
|
|
"grad_norm": 1.3792154788970947,
|
|
"learning_rate": 9.245901639344264e-05,
|
|
"loss": 0.0434,
|
|
"step": 16500
|
|
},
|
|
{
|
|
"epoch": 80.63453325198292,
|
|
"grad_norm": 1.0110834836959839,
|
|
"learning_rate": 9.232786885245903e-05,
|
|
"loss": 0.0455,
|
|
"step": 16520
|
|
},
|
|
{
|
|
"epoch": 80.73215375228799,
|
|
"grad_norm": 1.3876726627349854,
|
|
"learning_rate": 9.21967213114754e-05,
|
|
"loss": 0.0457,
|
|
"step": 16540
|
|
},
|
|
{
|
|
"epoch": 80.82977425259304,
|
|
"grad_norm": 2.110999822616577,
|
|
"learning_rate": 9.20655737704918e-05,
|
|
"loss": 0.0467,
|
|
"step": 16560
|
|
},
|
|
{
|
|
"epoch": 80.92739475289811,
|
|
"grad_norm": 1.9339549541473389,
|
|
"learning_rate": 9.19344262295082e-05,
|
|
"loss": 0.0432,
|
|
"step": 16580
|
|
},
|
|
{
|
|
"epoch": 81.02501525320318,
|
|
"grad_norm": 1.1022757291793823,
|
|
"learning_rate": 9.18032786885246e-05,
|
|
"loss": 0.0454,
|
|
"step": 16600
|
|
},
|
|
{
|
|
"epoch": 81.12263575350823,
|
|
"grad_norm": 1.7495752573013306,
|
|
"learning_rate": 9.167213114754099e-05,
|
|
"loss": 0.0375,
|
|
"step": 16620
|
|
},
|
|
{
|
|
"epoch": 81.2202562538133,
|
|
"grad_norm": 1.6110937595367432,
|
|
"learning_rate": 9.154098360655739e-05,
|
|
"loss": 0.0399,
|
|
"step": 16640
|
|
},
|
|
{
|
|
"epoch": 81.31787675411836,
|
|
"grad_norm": 1.7473942041397095,
|
|
"learning_rate": 9.140983606557378e-05,
|
|
"loss": 0.0399,
|
|
"step": 16660
|
|
},
|
|
{
|
|
"epoch": 81.41549725442343,
|
|
"grad_norm": 1.6311129331588745,
|
|
"learning_rate": 9.127868852459017e-05,
|
|
"loss": 0.0397,
|
|
"step": 16680
|
|
},
|
|
{
|
|
"epoch": 81.5131177547285,
|
|
"grad_norm": 2.0772838592529297,
|
|
"learning_rate": 9.114754098360656e-05,
|
|
"loss": 0.042,
|
|
"step": 16700
|
|
},
|
|
{
|
|
"epoch": 81.61073825503355,
|
|
"grad_norm": 1.5818514823913574,
|
|
"learning_rate": 9.101639344262296e-05,
|
|
"loss": 0.0419,
|
|
"step": 16720
|
|
},
|
|
{
|
|
"epoch": 81.70835875533862,
|
|
"grad_norm": 1.0751603841781616,
|
|
"learning_rate": 9.088524590163935e-05,
|
|
"loss": 0.0447,
|
|
"step": 16740
|
|
},
|
|
{
|
|
"epoch": 81.80597925564369,
|
|
"grad_norm": 1.2915217876434326,
|
|
"learning_rate": 9.075409836065574e-05,
|
|
"loss": 0.0425,
|
|
"step": 16760
|
|
},
|
|
{
|
|
"epoch": 81.90359975594875,
|
|
"grad_norm": 1.4814674854278564,
|
|
"learning_rate": 9.062295081967214e-05,
|
|
"loss": 0.0455,
|
|
"step": 16780
|
|
},
|
|
{
|
|
"epoch": 82.00122025625382,
|
|
"grad_norm": 1.1471761465072632,
|
|
"learning_rate": 9.049180327868852e-05,
|
|
"loss": 0.0418,
|
|
"step": 16800
|
|
},
|
|
{
|
|
"epoch": 82.09884075655887,
|
|
"grad_norm": 1.663212537765503,
|
|
"learning_rate": 9.036065573770492e-05,
|
|
"loss": 0.0348,
|
|
"step": 16820
|
|
},
|
|
{
|
|
"epoch": 82.19646125686394,
|
|
"grad_norm": 2.423266887664795,
|
|
"learning_rate": 9.022950819672131e-05,
|
|
"loss": 0.0367,
|
|
"step": 16840
|
|
},
|
|
{
|
|
"epoch": 82.29408175716901,
|
|
"grad_norm": 1.5083626508712769,
|
|
"learning_rate": 9.009836065573771e-05,
|
|
"loss": 0.038,
|
|
"step": 16860
|
|
},
|
|
{
|
|
"epoch": 82.39170225747407,
|
|
"grad_norm": 1.2111772298812866,
|
|
"learning_rate": 8.99672131147541e-05,
|
|
"loss": 0.0404,
|
|
"step": 16880
|
|
},
|
|
{
|
|
"epoch": 82.48932275777914,
|
|
"grad_norm": 2.1727235317230225,
|
|
"learning_rate": 8.98360655737705e-05,
|
|
"loss": 0.0434,
|
|
"step": 16900
|
|
},
|
|
{
|
|
"epoch": 82.5869432580842,
|
|
"grad_norm": 1.3636887073516846,
|
|
"learning_rate": 8.97049180327869e-05,
|
|
"loss": 0.0404,
|
|
"step": 16920
|
|
},
|
|
{
|
|
"epoch": 82.68456375838926,
|
|
"grad_norm": 2.5128073692321777,
|
|
"learning_rate": 8.957377049180328e-05,
|
|
"loss": 0.0442,
|
|
"step": 16940
|
|
},
|
|
{
|
|
"epoch": 82.78218425869433,
|
|
"grad_norm": 1.3435509204864502,
|
|
"learning_rate": 8.944262295081967e-05,
|
|
"loss": 0.044,
|
|
"step": 16960
|
|
},
|
|
{
|
|
"epoch": 82.87980475899938,
|
|
"grad_norm": 2.397655487060547,
|
|
"learning_rate": 8.931147540983606e-05,
|
|
"loss": 0.0447,
|
|
"step": 16980
|
|
},
|
|
{
|
|
"epoch": 82.97742525930445,
|
|
"grad_norm": 1.5072938203811646,
|
|
"learning_rate": 8.918032786885247e-05,
|
|
"loss": 0.0411,
|
|
"step": 17000
|
|
},
|
|
{
|
|
"epoch": 83.07504575960952,
|
|
"grad_norm": 1.9065088033676147,
|
|
"learning_rate": 8.904918032786886e-05,
|
|
"loss": 0.036,
|
|
"step": 17020
|
|
},
|
|
{
|
|
"epoch": 83.17266625991458,
|
|
"grad_norm": 1.4267394542694092,
|
|
"learning_rate": 8.891803278688526e-05,
|
|
"loss": 0.035,
|
|
"step": 17040
|
|
},
|
|
{
|
|
"epoch": 83.27028676021965,
|
|
"grad_norm": 1.677395224571228,
|
|
"learning_rate": 8.878688524590163e-05,
|
|
"loss": 0.0425,
|
|
"step": 17060
|
|
},
|
|
{
|
|
"epoch": 83.36790726052472,
|
|
"grad_norm": 1.5407129526138306,
|
|
"learning_rate": 8.865573770491804e-05,
|
|
"loss": 0.0362,
|
|
"step": 17080
|
|
},
|
|
{
|
|
"epoch": 83.46552776082977,
|
|
"grad_norm": 1.1154581308364868,
|
|
"learning_rate": 8.852459016393443e-05,
|
|
"loss": 0.0403,
|
|
"step": 17100
|
|
},
|
|
{
|
|
"epoch": 83.56314826113484,
|
|
"grad_norm": 1.6043624877929688,
|
|
"learning_rate": 8.839344262295083e-05,
|
|
"loss": 0.0413,
|
|
"step": 17120
|
|
},
|
|
{
|
|
"epoch": 83.6607687614399,
|
|
"grad_norm": 1.4952675104141235,
|
|
"learning_rate": 8.826229508196722e-05,
|
|
"loss": 0.0397,
|
|
"step": 17140
|
|
},
|
|
{
|
|
"epoch": 83.75838926174497,
|
|
"grad_norm": 1.4162040948867798,
|
|
"learning_rate": 8.813114754098362e-05,
|
|
"loss": 0.0435,
|
|
"step": 17160
|
|
},
|
|
{
|
|
"epoch": 83.85600976205004,
|
|
"grad_norm": 1.616909384727478,
|
|
"learning_rate": 8.800000000000001e-05,
|
|
"loss": 0.0455,
|
|
"step": 17180
|
|
},
|
|
{
|
|
"epoch": 83.95363026235509,
|
|
"grad_norm": 1.3712050914764404,
|
|
"learning_rate": 8.786885245901639e-05,
|
|
"loss": 0.0442,
|
|
"step": 17200
|
|
},
|
|
{
|
|
"epoch": 84.05125076266016,
|
|
"grad_norm": 1.0349308252334595,
|
|
"learning_rate": 8.773770491803279e-05,
|
|
"loss": 0.0368,
|
|
"step": 17220
|
|
},
|
|
{
|
|
"epoch": 84.14887126296523,
|
|
"grad_norm": 1.427959680557251,
|
|
"learning_rate": 8.760655737704918e-05,
|
|
"loss": 0.0318,
|
|
"step": 17240
|
|
},
|
|
{
|
|
"epoch": 84.24649176327028,
|
|
"grad_norm": 1.1814582347869873,
|
|
"learning_rate": 8.747540983606558e-05,
|
|
"loss": 0.0386,
|
|
"step": 17260
|
|
},
|
|
{
|
|
"epoch": 84.34411226357535,
|
|
"grad_norm": 1.0893490314483643,
|
|
"learning_rate": 8.734426229508197e-05,
|
|
"loss": 0.0398,
|
|
"step": 17280
|
|
},
|
|
{
|
|
"epoch": 84.44173276388041,
|
|
"grad_norm": 1.2510510683059692,
|
|
"learning_rate": 8.721311475409837e-05,
|
|
"loss": 0.0425,
|
|
"step": 17300
|
|
},
|
|
{
|
|
"epoch": 84.53935326418548,
|
|
"grad_norm": 0.9642720818519592,
|
|
"learning_rate": 8.708196721311475e-05,
|
|
"loss": 0.0384,
|
|
"step": 17320
|
|
},
|
|
{
|
|
"epoch": 84.63697376449055,
|
|
"grad_norm": 1.2420659065246582,
|
|
"learning_rate": 8.695081967213115e-05,
|
|
"loss": 0.038,
|
|
"step": 17340
|
|
},
|
|
{
|
|
"epoch": 84.7345942647956,
|
|
"grad_norm": 1.0976778268814087,
|
|
"learning_rate": 8.681967213114754e-05,
|
|
"loss": 0.0428,
|
|
"step": 17360
|
|
},
|
|
{
|
|
"epoch": 84.83221476510067,
|
|
"grad_norm": 1.282233476638794,
|
|
"learning_rate": 8.668852459016393e-05,
|
|
"loss": 0.0418,
|
|
"step": 17380
|
|
},
|
|
{
|
|
"epoch": 84.92983526540573,
|
|
"grad_norm": 2.0834672451019287,
|
|
"learning_rate": 8.655737704918033e-05,
|
|
"loss": 0.0441,
|
|
"step": 17400
|
|
},
|
|
{
|
|
"epoch": 85.0274557657108,
|
|
"grad_norm": 1.0747671127319336,
|
|
"learning_rate": 8.642622950819672e-05,
|
|
"loss": 0.0415,
|
|
"step": 17420
|
|
},
|
|
{
|
|
"epoch": 85.12507626601587,
|
|
"grad_norm": 1.6715941429138184,
|
|
"learning_rate": 8.629508196721313e-05,
|
|
"loss": 0.0366,
|
|
"step": 17440
|
|
},
|
|
{
|
|
"epoch": 85.22269676632092,
|
|
"grad_norm": 1.115537166595459,
|
|
"learning_rate": 8.61639344262295e-05,
|
|
"loss": 0.0326,
|
|
"step": 17460
|
|
},
|
|
{
|
|
"epoch": 85.32031726662599,
|
|
"grad_norm": 1.118455410003662,
|
|
"learning_rate": 8.60327868852459e-05,
|
|
"loss": 0.0384,
|
|
"step": 17480
|
|
},
|
|
{
|
|
"epoch": 85.41793776693106,
|
|
"grad_norm": 1.6129100322723389,
|
|
"learning_rate": 8.59016393442623e-05,
|
|
"loss": 0.0376,
|
|
"step": 17500
|
|
},
|
|
{
|
|
"epoch": 85.51555826723612,
|
|
"grad_norm": 2.3377175331115723,
|
|
"learning_rate": 8.57704918032787e-05,
|
|
"loss": 0.0388,
|
|
"step": 17520
|
|
},
|
|
{
|
|
"epoch": 85.61317876754119,
|
|
"grad_norm": 1.2189383506774902,
|
|
"learning_rate": 8.563934426229509e-05,
|
|
"loss": 0.0405,
|
|
"step": 17540
|
|
},
|
|
{
|
|
"epoch": 85.71079926784624,
|
|
"grad_norm": 1.24251389503479,
|
|
"learning_rate": 8.550819672131149e-05,
|
|
"loss": 0.0426,
|
|
"step": 17560
|
|
},
|
|
{
|
|
"epoch": 85.80841976815131,
|
|
"grad_norm": 1.1355634927749634,
|
|
"learning_rate": 8.537704918032787e-05,
|
|
"loss": 0.0407,
|
|
"step": 17580
|
|
},
|
|
{
|
|
"epoch": 85.90604026845638,
|
|
"grad_norm": 1.5692590475082397,
|
|
"learning_rate": 8.524590163934426e-05,
|
|
"loss": 0.0438,
|
|
"step": 17600
|
|
},
|
|
{
|
|
"epoch": 86.00366076876143,
|
|
"grad_norm": 0.9955014586448669,
|
|
"learning_rate": 8.511475409836066e-05,
|
|
"loss": 0.0414,
|
|
"step": 17620
|
|
},
|
|
{
|
|
"epoch": 86.1012812690665,
|
|
"grad_norm": 1.3448798656463623,
|
|
"learning_rate": 8.498360655737705e-05,
|
|
"loss": 0.0307,
|
|
"step": 17640
|
|
},
|
|
{
|
|
"epoch": 86.19890176937157,
|
|
"grad_norm": 2.263423442840576,
|
|
"learning_rate": 8.485245901639345e-05,
|
|
"loss": 0.0354,
|
|
"step": 17660
|
|
},
|
|
{
|
|
"epoch": 86.29652226967663,
|
|
"grad_norm": 2.385218620300293,
|
|
"learning_rate": 8.472131147540984e-05,
|
|
"loss": 0.0373,
|
|
"step": 17680
|
|
},
|
|
{
|
|
"epoch": 86.3941427699817,
|
|
"grad_norm": 1.5091354846954346,
|
|
"learning_rate": 8.459016393442624e-05,
|
|
"loss": 0.0366,
|
|
"step": 17700
|
|
},
|
|
{
|
|
"epoch": 86.49176327028675,
|
|
"grad_norm": 1.300026297569275,
|
|
"learning_rate": 8.445901639344262e-05,
|
|
"loss": 0.0423,
|
|
"step": 17720
|
|
},
|
|
{
|
|
"epoch": 86.58938377059182,
|
|
"grad_norm": 1.4994693994522095,
|
|
"learning_rate": 8.432786885245902e-05,
|
|
"loss": 0.0406,
|
|
"step": 17740
|
|
},
|
|
{
|
|
"epoch": 86.68700427089689,
|
|
"grad_norm": 1.3377670049667358,
|
|
"learning_rate": 8.419672131147541e-05,
|
|
"loss": 0.0414,
|
|
"step": 17760
|
|
},
|
|
{
|
|
"epoch": 86.78462477120195,
|
|
"grad_norm": 0.9744328260421753,
|
|
"learning_rate": 8.406557377049181e-05,
|
|
"loss": 0.0391,
|
|
"step": 17780
|
|
},
|
|
{
|
|
"epoch": 86.88224527150702,
|
|
"grad_norm": 1.1345022916793823,
|
|
"learning_rate": 8.39344262295082e-05,
|
|
"loss": 0.0404,
|
|
"step": 17800
|
|
},
|
|
{
|
|
"epoch": 86.97986577181209,
|
|
"grad_norm": 1.7516177892684937,
|
|
"learning_rate": 8.380327868852459e-05,
|
|
"loss": 0.0439,
|
|
"step": 17820
|
|
},
|
|
{
|
|
"epoch": 87.07748627211714,
|
|
"grad_norm": 1.2439887523651123,
|
|
"learning_rate": 8.367213114754098e-05,
|
|
"loss": 0.0335,
|
|
"step": 17840
|
|
},
|
|
{
|
|
"epoch": 87.17510677242221,
|
|
"grad_norm": 1.586146354675293,
|
|
"learning_rate": 8.354098360655737e-05,
|
|
"loss": 0.031,
|
|
"step": 17860
|
|
},
|
|
{
|
|
"epoch": 87.27272727272727,
|
|
"grad_norm": 1.3947432041168213,
|
|
"learning_rate": 8.340983606557377e-05,
|
|
"loss": 0.0385,
|
|
"step": 17880
|
|
},
|
|
{
|
|
"epoch": 87.37034777303234,
|
|
"grad_norm": 0.9418930411338806,
|
|
"learning_rate": 8.327868852459016e-05,
|
|
"loss": 0.0364,
|
|
"step": 17900
|
|
},
|
|
{
|
|
"epoch": 87.4679682733374,
|
|
"grad_norm": 0.8570541739463806,
|
|
"learning_rate": 8.314754098360657e-05,
|
|
"loss": 0.0378,
|
|
"step": 17920
|
|
},
|
|
{
|
|
"epoch": 87.56558877364246,
|
|
"grad_norm": 1.2564805746078491,
|
|
"learning_rate": 8.301639344262296e-05,
|
|
"loss": 0.0373,
|
|
"step": 17940
|
|
},
|
|
{
|
|
"epoch": 87.66320927394753,
|
|
"grad_norm": 1.062628149986267,
|
|
"learning_rate": 8.288524590163935e-05,
|
|
"loss": 0.0372,
|
|
"step": 17960
|
|
},
|
|
{
|
|
"epoch": 87.7608297742526,
|
|
"grad_norm": 1.4381693601608276,
|
|
"learning_rate": 8.275409836065573e-05,
|
|
"loss": 0.0417,
|
|
"step": 17980
|
|
},
|
|
{
|
|
"epoch": 87.85845027455765,
|
|
"grad_norm": 1.6939609050750732,
|
|
"learning_rate": 8.262295081967214e-05,
|
|
"loss": 0.0414,
|
|
"step": 18000
|
|
},
|
|
{
|
|
"epoch": 87.95607077486272,
|
|
"grad_norm": 1.5280972719192505,
|
|
"learning_rate": 8.249180327868853e-05,
|
|
"loss": 0.0411,
|
|
"step": 18020
|
|
},
|
|
{
|
|
"epoch": 88.05369127516778,
|
|
"grad_norm": 1.0062308311462402,
|
|
"learning_rate": 8.236065573770492e-05,
|
|
"loss": 0.0354,
|
|
"step": 18040
|
|
},
|
|
{
|
|
"epoch": 88.15131177547285,
|
|
"grad_norm": 2.354576826095581,
|
|
"learning_rate": 8.222950819672132e-05,
|
|
"loss": 0.0318,
|
|
"step": 18060
|
|
},
|
|
{
|
|
"epoch": 88.24893227577792,
|
|
"grad_norm": 1.3578128814697266,
|
|
"learning_rate": 8.209836065573771e-05,
|
|
"loss": 0.0355,
|
|
"step": 18080
|
|
},
|
|
{
|
|
"epoch": 88.34655277608297,
|
|
"grad_norm": 1.0361335277557373,
|
|
"learning_rate": 8.19672131147541e-05,
|
|
"loss": 0.0332,
|
|
"step": 18100
|
|
},
|
|
{
|
|
"epoch": 88.44417327638804,
|
|
"grad_norm": 1.3577088117599487,
|
|
"learning_rate": 8.183606557377049e-05,
|
|
"loss": 0.0372,
|
|
"step": 18120
|
|
},
|
|
{
|
|
"epoch": 88.54179377669311,
|
|
"grad_norm": 1.5548292398452759,
|
|
"learning_rate": 8.170491803278689e-05,
|
|
"loss": 0.0384,
|
|
"step": 18140
|
|
},
|
|
{
|
|
"epoch": 88.63941427699817,
|
|
"grad_norm": 0.9935953617095947,
|
|
"learning_rate": 8.157377049180328e-05,
|
|
"loss": 0.0364,
|
|
"step": 18160
|
|
},
|
|
{
|
|
"epoch": 88.73703477730324,
|
|
"grad_norm": 1.093231201171875,
|
|
"learning_rate": 8.144262295081968e-05,
|
|
"loss": 0.0369,
|
|
"step": 18180
|
|
},
|
|
{
|
|
"epoch": 88.83465527760829,
|
|
"grad_norm": 1.0061986446380615,
|
|
"learning_rate": 8.131147540983607e-05,
|
|
"loss": 0.0415,
|
|
"step": 18200
|
|
},
|
|
{
|
|
"epoch": 88.93227577791336,
|
|
"grad_norm": 1.2928169965744019,
|
|
"learning_rate": 8.118032786885246e-05,
|
|
"loss": 0.0405,
|
|
"step": 18220
|
|
},
|
|
{
|
|
"epoch": 89.02989627821843,
|
|
"grad_norm": 1.4209178686141968,
|
|
"learning_rate": 8.104918032786885e-05,
|
|
"loss": 0.0382,
|
|
"step": 18240
|
|
},
|
|
{
|
|
"epoch": 89.12751677852349,
|
|
"grad_norm": 1.7799944877624512,
|
|
"learning_rate": 8.091803278688524e-05,
|
|
"loss": 0.0338,
|
|
"step": 18260
|
|
},
|
|
{
|
|
"epoch": 89.22513727882856,
|
|
"grad_norm": 0.9708789587020874,
|
|
"learning_rate": 8.078688524590164e-05,
|
|
"loss": 0.0362,
|
|
"step": 18280
|
|
},
|
|
{
|
|
"epoch": 89.32275777913362,
|
|
"grad_norm": 1.9509353637695312,
|
|
"learning_rate": 8.065573770491803e-05,
|
|
"loss": 0.0339,
|
|
"step": 18300
|
|
},
|
|
{
|
|
"epoch": 89.42037827943868,
|
|
"grad_norm": 1.2407267093658447,
|
|
"learning_rate": 8.052459016393444e-05,
|
|
"loss": 0.0368,
|
|
"step": 18320
|
|
},
|
|
{
|
|
"epoch": 89.51799877974375,
|
|
"grad_norm": 1.3398857116699219,
|
|
"learning_rate": 8.039344262295082e-05,
|
|
"loss": 0.0368,
|
|
"step": 18340
|
|
},
|
|
{
|
|
"epoch": 89.6156192800488,
|
|
"grad_norm": 1.7856793403625488,
|
|
"learning_rate": 8.026229508196721e-05,
|
|
"loss": 0.0421,
|
|
"step": 18360
|
|
},
|
|
{
|
|
"epoch": 89.71323978035387,
|
|
"grad_norm": 0.8198001980781555,
|
|
"learning_rate": 8.01311475409836e-05,
|
|
"loss": 0.0343,
|
|
"step": 18380
|
|
},
|
|
{
|
|
"epoch": 89.81086028065894,
|
|
"grad_norm": 1.2887758016586304,
|
|
"learning_rate": 8e-05,
|
|
"loss": 0.037,
|
|
"step": 18400
|
|
},
|
|
{
|
|
"epoch": 89.908480780964,
|
|
"grad_norm": 1.6205062866210938,
|
|
"learning_rate": 7.98688524590164e-05,
|
|
"loss": 0.0395,
|
|
"step": 18420
|
|
},
|
|
{
|
|
"epoch": 90.00610128126907,
|
|
"grad_norm": 1.4648716449737549,
|
|
"learning_rate": 7.97377049180328e-05,
|
|
"loss": 0.0387,
|
|
"step": 18440
|
|
},
|
|
{
|
|
"epoch": 90.10372178157414,
|
|
"grad_norm": 0.8236134648323059,
|
|
"learning_rate": 7.960655737704919e-05,
|
|
"loss": 0.0317,
|
|
"step": 18460
|
|
},
|
|
{
|
|
"epoch": 90.20134228187919,
|
|
"grad_norm": 1.4608750343322754,
|
|
"learning_rate": 7.947540983606558e-05,
|
|
"loss": 0.0347,
|
|
"step": 18480
|
|
},
|
|
{
|
|
"epoch": 90.29896278218426,
|
|
"grad_norm": 0.9114894866943359,
|
|
"learning_rate": 7.934426229508197e-05,
|
|
"loss": 0.0347,
|
|
"step": 18500
|
|
},
|
|
{
|
|
"epoch": 90.39658328248932,
|
|
"grad_norm": 0.7654961347579956,
|
|
"learning_rate": 7.921311475409836e-05,
|
|
"loss": 0.0324,
|
|
"step": 18520
|
|
},
|
|
{
|
|
"epoch": 90.49420378279439,
|
|
"grad_norm": 1.1844931840896606,
|
|
"learning_rate": 7.908196721311476e-05,
|
|
"loss": 0.0352,
|
|
"step": 18540
|
|
},
|
|
{
|
|
"epoch": 90.59182428309946,
|
|
"grad_norm": 1.6406841278076172,
|
|
"learning_rate": 7.895081967213115e-05,
|
|
"loss": 0.0352,
|
|
"step": 18560
|
|
},
|
|
{
|
|
"epoch": 90.68944478340451,
|
|
"grad_norm": 1.5059739351272583,
|
|
"learning_rate": 7.881967213114755e-05,
|
|
"loss": 0.0386,
|
|
"step": 18580
|
|
},
|
|
{
|
|
"epoch": 90.78706528370958,
|
|
"grad_norm": 0.7421912550926208,
|
|
"learning_rate": 7.868852459016394e-05,
|
|
"loss": 0.0381,
|
|
"step": 18600
|
|
},
|
|
{
|
|
"epoch": 90.88468578401465,
|
|
"grad_norm": 1.2397936582565308,
|
|
"learning_rate": 7.855737704918033e-05,
|
|
"loss": 0.0365,
|
|
"step": 18620
|
|
},
|
|
{
|
|
"epoch": 90.9823062843197,
|
|
"grad_norm": 1.0268443822860718,
|
|
"learning_rate": 7.842622950819672e-05,
|
|
"loss": 0.0402,
|
|
"step": 18640
|
|
},
|
|
{
|
|
"epoch": 91.07992678462477,
|
|
"grad_norm": 0.87629234790802,
|
|
"learning_rate": 7.829508196721311e-05,
|
|
"loss": 0.0308,
|
|
"step": 18660
|
|
},
|
|
{
|
|
"epoch": 91.17754728492983,
|
|
"grad_norm": 2.2132532596588135,
|
|
"learning_rate": 7.816393442622951e-05,
|
|
"loss": 0.031,
|
|
"step": 18680
|
|
},
|
|
{
|
|
"epoch": 91.2751677852349,
|
|
"grad_norm": 1.2565170526504517,
|
|
"learning_rate": 7.80327868852459e-05,
|
|
"loss": 0.034,
|
|
"step": 18700
|
|
},
|
|
{
|
|
"epoch": 91.37278828553997,
|
|
"grad_norm": 1.191254734992981,
|
|
"learning_rate": 7.79016393442623e-05,
|
|
"loss": 0.0348,
|
|
"step": 18720
|
|
},
|
|
{
|
|
"epoch": 91.47040878584502,
|
|
"grad_norm": 0.711301326751709,
|
|
"learning_rate": 7.77704918032787e-05,
|
|
"loss": 0.0342,
|
|
"step": 18740
|
|
},
|
|
{
|
|
"epoch": 91.5680292861501,
|
|
"grad_norm": 0.8898797035217285,
|
|
"learning_rate": 7.763934426229508e-05,
|
|
"loss": 0.0348,
|
|
"step": 18760
|
|
},
|
|
{
|
|
"epoch": 91.66564978645516,
|
|
"grad_norm": 0.8842754364013672,
|
|
"learning_rate": 7.750819672131147e-05,
|
|
"loss": 0.0343,
|
|
"step": 18780
|
|
},
|
|
{
|
|
"epoch": 91.76327028676022,
|
|
"grad_norm": 1.7899420261383057,
|
|
"learning_rate": 7.737704918032788e-05,
|
|
"loss": 0.0395,
|
|
"step": 18800
|
|
},
|
|
{
|
|
"epoch": 91.86089078706529,
|
|
"grad_norm": 1.710175633430481,
|
|
"learning_rate": 7.724590163934426e-05,
|
|
"loss": 0.0387,
|
|
"step": 18820
|
|
},
|
|
{
|
|
"epoch": 91.95851128737034,
|
|
"grad_norm": 1.8949339389801025,
|
|
"learning_rate": 7.711475409836067e-05,
|
|
"loss": 0.0409,
|
|
"step": 18840
|
|
},
|
|
{
|
|
"epoch": 92.05613178767541,
|
|
"grad_norm": 1.6512871980667114,
|
|
"learning_rate": 7.698360655737706e-05,
|
|
"loss": 0.0353,
|
|
"step": 18860
|
|
},
|
|
{
|
|
"epoch": 92.15375228798048,
|
|
"grad_norm": 1.1428453922271729,
|
|
"learning_rate": 7.685245901639345e-05,
|
|
"loss": 0.031,
|
|
"step": 18880
|
|
},
|
|
{
|
|
"epoch": 92.25137278828554,
|
|
"grad_norm": 1.9489351511001587,
|
|
"learning_rate": 7.672131147540984e-05,
|
|
"loss": 0.0328,
|
|
"step": 18900
|
|
},
|
|
{
|
|
"epoch": 92.3489932885906,
|
|
"grad_norm": 1.148255467414856,
|
|
"learning_rate": 7.659016393442622e-05,
|
|
"loss": 0.0334,
|
|
"step": 18920
|
|
},
|
|
{
|
|
"epoch": 92.44661378889568,
|
|
"grad_norm": 1.2118985652923584,
|
|
"learning_rate": 7.645901639344263e-05,
|
|
"loss": 0.0319,
|
|
"step": 18940
|
|
},
|
|
{
|
|
"epoch": 92.54423428920073,
|
|
"grad_norm": 0.9434294700622559,
|
|
"learning_rate": 7.632786885245902e-05,
|
|
"loss": 0.0377,
|
|
"step": 18960
|
|
},
|
|
{
|
|
"epoch": 92.6418547895058,
|
|
"grad_norm": 1.136614441871643,
|
|
"learning_rate": 7.619672131147542e-05,
|
|
"loss": 0.0358,
|
|
"step": 18980
|
|
},
|
|
{
|
|
"epoch": 92.73947528981085,
|
|
"grad_norm": 1.6465675830841064,
|
|
"learning_rate": 7.606557377049181e-05,
|
|
"loss": 0.0335,
|
|
"step": 19000
|
|
},
|
|
{
|
|
"epoch": 92.83709579011592,
|
|
"grad_norm": 1.623631477355957,
|
|
"learning_rate": 7.59344262295082e-05,
|
|
"loss": 0.0377,
|
|
"step": 19020
|
|
},
|
|
{
|
|
"epoch": 92.934716290421,
|
|
"grad_norm": 1.2394886016845703,
|
|
"learning_rate": 7.580327868852459e-05,
|
|
"loss": 0.0378,
|
|
"step": 19040
|
|
},
|
|
{
|
|
"epoch": 93.03233679072605,
|
|
"grad_norm": 1.0836046934127808,
|
|
"learning_rate": 7.567213114754099e-05,
|
|
"loss": 0.0374,
|
|
"step": 19060
|
|
},
|
|
{
|
|
"epoch": 93.12995729103112,
|
|
"grad_norm": 1.2980749607086182,
|
|
"learning_rate": 7.554098360655738e-05,
|
|
"loss": 0.0299,
|
|
"step": 19080
|
|
},
|
|
{
|
|
"epoch": 93.22757779133617,
|
|
"grad_norm": 0.7896084785461426,
|
|
"learning_rate": 7.540983606557377e-05,
|
|
"loss": 0.0338,
|
|
"step": 19100
|
|
},
|
|
{
|
|
"epoch": 93.32519829164124,
|
|
"grad_norm": 1.4562427997589111,
|
|
"learning_rate": 7.527868852459017e-05,
|
|
"loss": 0.0325,
|
|
"step": 19120
|
|
},
|
|
{
|
|
"epoch": 93.42281879194631,
|
|
"grad_norm": 1.0484211444854736,
|
|
"learning_rate": 7.514754098360656e-05,
|
|
"loss": 0.0319,
|
|
"step": 19140
|
|
},
|
|
{
|
|
"epoch": 93.52043929225137,
|
|
"grad_norm": 1.2180018424987793,
|
|
"learning_rate": 7.501639344262295e-05,
|
|
"loss": 0.0358,
|
|
"step": 19160
|
|
},
|
|
{
|
|
"epoch": 93.61805979255644,
|
|
"grad_norm": 1.5725558996200562,
|
|
"learning_rate": 7.488524590163934e-05,
|
|
"loss": 0.0371,
|
|
"step": 19180
|
|
},
|
|
{
|
|
"epoch": 93.7156802928615,
|
|
"grad_norm": 0.962448000907898,
|
|
"learning_rate": 7.475409836065574e-05,
|
|
"loss": 0.0329,
|
|
"step": 19200
|
|
},
|
|
{
|
|
"epoch": 93.81330079316656,
|
|
"grad_norm": 1.2971309423446655,
|
|
"learning_rate": 7.462295081967213e-05,
|
|
"loss": 0.0388,
|
|
"step": 19220
|
|
},
|
|
{
|
|
"epoch": 93.91092129347163,
|
|
"grad_norm": 1.621389627456665,
|
|
"learning_rate": 7.449180327868854e-05,
|
|
"loss": 0.035,
|
|
"step": 19240
|
|
},
|
|
{
|
|
"epoch": 94.00854179377669,
|
|
"grad_norm": 1.1524312496185303,
|
|
"learning_rate": 7.436065573770493e-05,
|
|
"loss": 0.036,
|
|
"step": 19260
|
|
},
|
|
{
|
|
"epoch": 94.10616229408176,
|
|
"grad_norm": 1.0361751317977905,
|
|
"learning_rate": 7.422950819672131e-05,
|
|
"loss": 0.0311,
|
|
"step": 19280
|
|
},
|
|
{
|
|
"epoch": 94.20378279438683,
|
|
"grad_norm": 1.2280365228652954,
|
|
"learning_rate": 7.40983606557377e-05,
|
|
"loss": 0.032,
|
|
"step": 19300
|
|
},
|
|
{
|
|
"epoch": 94.30140329469188,
|
|
"grad_norm": 1.39139723777771,
|
|
"learning_rate": 7.39672131147541e-05,
|
|
"loss": 0.0312,
|
|
"step": 19320
|
|
},
|
|
{
|
|
"epoch": 94.39902379499695,
|
|
"grad_norm": 2.101762533187866,
|
|
"learning_rate": 7.38360655737705e-05,
|
|
"loss": 0.0304,
|
|
"step": 19340
|
|
},
|
|
{
|
|
"epoch": 94.49664429530202,
|
|
"grad_norm": 1.5655161142349243,
|
|
"learning_rate": 7.370491803278689e-05,
|
|
"loss": 0.0332,
|
|
"step": 19360
|
|
},
|
|
{
|
|
"epoch": 94.59426479560707,
|
|
"grad_norm": 1.1902984380722046,
|
|
"learning_rate": 7.357377049180329e-05,
|
|
"loss": 0.0308,
|
|
"step": 19380
|
|
},
|
|
{
|
|
"epoch": 94.69188529591214,
|
|
"grad_norm": 1.3503782749176025,
|
|
"learning_rate": 7.344262295081968e-05,
|
|
"loss": 0.0342,
|
|
"step": 19400
|
|
},
|
|
{
|
|
"epoch": 94.7895057962172,
|
|
"grad_norm": 1.6430237293243408,
|
|
"learning_rate": 7.331147540983607e-05,
|
|
"loss": 0.0375,
|
|
"step": 19420
|
|
},
|
|
{
|
|
"epoch": 94.88712629652227,
|
|
"grad_norm": 1.4753270149230957,
|
|
"learning_rate": 7.318032786885246e-05,
|
|
"loss": 0.0361,
|
|
"step": 19440
|
|
},
|
|
{
|
|
"epoch": 94.98474679682734,
|
|
"grad_norm": 1.6849883794784546,
|
|
"learning_rate": 7.304918032786886e-05,
|
|
"loss": 0.0377,
|
|
"step": 19460
|
|
},
|
|
{
|
|
"epoch": 95.0823672971324,
|
|
"grad_norm": 0.9980133175849915,
|
|
"learning_rate": 7.291803278688525e-05,
|
|
"loss": 0.0286,
|
|
"step": 19480
|
|
},
|
|
{
|
|
"epoch": 95.17998779743746,
|
|
"grad_norm": 0.911799430847168,
|
|
"learning_rate": 7.278688524590165e-05,
|
|
"loss": 0.0304,
|
|
"step": 19500
|
|
},
|
|
{
|
|
"epoch": 95.27760829774253,
|
|
"grad_norm": 1.0748980045318604,
|
|
"learning_rate": 7.265573770491804e-05,
|
|
"loss": 0.0312,
|
|
"step": 19520
|
|
},
|
|
{
|
|
"epoch": 95.37522879804759,
|
|
"grad_norm": 1.6856895685195923,
|
|
"learning_rate": 7.252459016393443e-05,
|
|
"loss": 0.0313,
|
|
"step": 19540
|
|
},
|
|
{
|
|
"epoch": 95.47284929835266,
|
|
"grad_norm": 1.281265139579773,
|
|
"learning_rate": 7.239344262295082e-05,
|
|
"loss": 0.0325,
|
|
"step": 19560
|
|
},
|
|
{
|
|
"epoch": 95.57046979865771,
|
|
"grad_norm": 1.3441650867462158,
|
|
"learning_rate": 7.226229508196721e-05,
|
|
"loss": 0.0369,
|
|
"step": 19580
|
|
},
|
|
{
|
|
"epoch": 95.66809029896278,
|
|
"grad_norm": 0.9338245391845703,
|
|
"learning_rate": 7.213114754098361e-05,
|
|
"loss": 0.0328,
|
|
"step": 19600
|
|
},
|
|
{
|
|
"epoch": 95.76571079926785,
|
|
"grad_norm": 1.0416558980941772,
|
|
"learning_rate": 7.2e-05,
|
|
"loss": 0.0313,
|
|
"step": 19620
|
|
},
|
|
{
|
|
"epoch": 95.8633312995729,
|
|
"grad_norm": 1.187015175819397,
|
|
"learning_rate": 7.18688524590164e-05,
|
|
"loss": 0.0389,
|
|
"step": 19640
|
|
},
|
|
{
|
|
"epoch": 95.96095179987798,
|
|
"grad_norm": 1.7248045206069946,
|
|
"learning_rate": 7.17377049180328e-05,
|
|
"loss": 0.0389,
|
|
"step": 19660
|
|
},
|
|
{
|
|
"epoch": 96.05857230018304,
|
|
"grad_norm": 1.244112253189087,
|
|
"learning_rate": 7.160655737704918e-05,
|
|
"loss": 0.0282,
|
|
"step": 19680
|
|
},
|
|
{
|
|
"epoch": 96.1561928004881,
|
|
"grad_norm": 0.868563711643219,
|
|
"learning_rate": 7.147540983606557e-05,
|
|
"loss": 0.03,
|
|
"step": 19700
|
|
},
|
|
{
|
|
"epoch": 96.25381330079317,
|
|
"grad_norm": 0.6851422190666199,
|
|
"learning_rate": 7.134426229508198e-05,
|
|
"loss": 0.0293,
|
|
"step": 19720
|
|
},
|
|
{
|
|
"epoch": 96.35143380109822,
|
|
"grad_norm": 1.0482094287872314,
|
|
"learning_rate": 7.121311475409837e-05,
|
|
"loss": 0.0348,
|
|
"step": 19740
|
|
},
|
|
{
|
|
"epoch": 96.4490543014033,
|
|
"grad_norm": 0.9821768403053284,
|
|
"learning_rate": 7.108196721311475e-05,
|
|
"loss": 0.0346,
|
|
"step": 19760
|
|
},
|
|
{
|
|
"epoch": 96.54667480170836,
|
|
"grad_norm": 1.2944159507751465,
|
|
"learning_rate": 7.095081967213116e-05,
|
|
"loss": 0.033,
|
|
"step": 19780
|
|
},
|
|
{
|
|
"epoch": 96.64429530201342,
|
|
"grad_norm": 1.797796368598938,
|
|
"learning_rate": 7.081967213114755e-05,
|
|
"loss": 0.0313,
|
|
"step": 19800
|
|
},
|
|
{
|
|
"epoch": 96.74191580231849,
|
|
"grad_norm": 2.1053693294525146,
|
|
"learning_rate": 7.068852459016394e-05,
|
|
"loss": 0.0321,
|
|
"step": 19820
|
|
},
|
|
{
|
|
"epoch": 96.83953630262356,
|
|
"grad_norm": 0.6129661202430725,
|
|
"learning_rate": 7.055737704918033e-05,
|
|
"loss": 0.0343,
|
|
"step": 19840
|
|
},
|
|
{
|
|
"epoch": 96.93715680292861,
|
|
"grad_norm": 1.184906005859375,
|
|
"learning_rate": 7.042622950819673e-05,
|
|
"loss": 0.0335,
|
|
"step": 19860
|
|
},
|
|
{
|
|
"epoch": 97.03477730323368,
|
|
"grad_norm": 1.0629398822784424,
|
|
"learning_rate": 7.029508196721312e-05,
|
|
"loss": 0.0336,
|
|
"step": 19880
|
|
},
|
|
{
|
|
"epoch": 97.13239780353874,
|
|
"grad_norm": 1.0311895608901978,
|
|
"learning_rate": 7.016393442622952e-05,
|
|
"loss": 0.028,
|
|
"step": 19900
|
|
},
|
|
{
|
|
"epoch": 97.2300183038438,
|
|
"grad_norm": 1.3785455226898193,
|
|
"learning_rate": 7.003278688524591e-05,
|
|
"loss": 0.0304,
|
|
"step": 19920
|
|
},
|
|
{
|
|
"epoch": 97.32763880414888,
|
|
"grad_norm": 1.515749216079712,
|
|
"learning_rate": 6.99016393442623e-05,
|
|
"loss": 0.0314,
|
|
"step": 19940
|
|
},
|
|
{
|
|
"epoch": 97.42525930445393,
|
|
"grad_norm": 1.5662028789520264,
|
|
"learning_rate": 6.977049180327869e-05,
|
|
"loss": 0.0302,
|
|
"step": 19960
|
|
},
|
|
{
|
|
"epoch": 97.522879804759,
|
|
"grad_norm": 1.1467646360397339,
|
|
"learning_rate": 6.963934426229508e-05,
|
|
"loss": 0.0317,
|
|
"step": 19980
|
|
},
|
|
{
|
|
"epoch": 97.62050030506407,
|
|
"grad_norm": 0.9327864646911621,
|
|
"learning_rate": 6.950819672131148e-05,
|
|
"loss": 0.0323,
|
|
"step": 20000
|
|
}
|
|
],
|
|
"logging_steps": 20,
|
|
"max_steps": 30600,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 150,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.1916893437732045e+17,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|