|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.653271338425772, |
|
"global_step": 11000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1e-07, |
|
"loss": 10.8195, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2e-07, |
|
"loss": 10.198, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3e-07, |
|
"loss": 9.7934, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4e-07, |
|
"loss": 9.6558, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5e-07, |
|
"loss": 9.5031, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 6e-07, |
|
"loss": 9.3498, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7e-07, |
|
"loss": 9.171, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8e-07, |
|
"loss": 8.9623, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9e-07, |
|
"loss": 8.885, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1e-06, |
|
"loss": 8.6453, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.96564754379938e-07, |
|
"loss": 8.5159, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.931295087598764e-07, |
|
"loss": 8.3516, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.896942631398145e-07, |
|
"loss": 8.2104, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.862590175197526e-07, |
|
"loss": 8.0953, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.828237718996907e-07, |
|
"loss": 7.9979, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.79388526279629e-07, |
|
"loss": 7.8445, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.759532806595672e-07, |
|
"loss": 7.7416, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.725180350395053e-07, |
|
"loss": 7.649, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.690827894194434e-07, |
|
"loss": 7.5877, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.656475437993815e-07, |
|
"loss": 7.4996, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.622122981793198e-07, |
|
"loss": 7.412, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.58777052559258e-07, |
|
"loss": 7.4068, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.55341806939196e-07, |
|
"loss": 7.3062, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.519065613191343e-07, |
|
"loss": 7.2333, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.484713156990724e-07, |
|
"loss": 7.09, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 9.450360700790106e-07, |
|
"loss": 7.0942, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.416008244589487e-07, |
|
"loss": 7.0845, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 9.38165578838887e-07, |
|
"loss": 6.8747, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.347303332188251e-07, |
|
"loss": 7.0027, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.312950875987633e-07, |
|
"loss": 6.9904, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.278598419787015e-07, |
|
"loss": 6.9223, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.244245963586395e-07, |
|
"loss": 6.8921, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.209893507385778e-07, |
|
"loss": 6.8751, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.17554105118516e-07, |
|
"loss": 6.7727, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 9.141188594984541e-07, |
|
"loss": 6.8281, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 9.106836138783922e-07, |
|
"loss": 6.8361, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 9.072483682583304e-07, |
|
"loss": 6.7184, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 9.038131226382685e-07, |
|
"loss": 6.8271, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 9.003778770182068e-07, |
|
"loss": 6.7768, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 8.96942631398145e-07, |
|
"loss": 6.7108, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 8.935073857780831e-07, |
|
"loss": 6.6765, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 8.900721401580212e-07, |
|
"loss": 6.6567, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 8.866368945379594e-07, |
|
"loss": 6.7092, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 8.832016489178976e-07, |
|
"loss": 6.7071, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 8.797664032978358e-07, |
|
"loss": 6.5984, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 8.763311576777739e-07, |
|
"loss": 6.5961, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 8.728959120577121e-07, |
|
"loss": 6.5279, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 8.694606664376502e-07, |
|
"loss": 6.55, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 8.660254208175884e-07, |
|
"loss": 6.5216, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.625901751975267e-07, |
|
"loss": 6.5823, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.591549295774648e-07, |
|
"loss": 6.4402, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.557196839574029e-07, |
|
"loss": 6.4921, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.522844383373411e-07, |
|
"loss": 6.4431, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 8.488491927172793e-07, |
|
"loss": 6.4583, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 8.454139470972173e-07, |
|
"loss": 6.408, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 8.419787014771555e-07, |
|
"loss": 6.3809, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 8.385434558570938e-07, |
|
"loss": 6.5188, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 8.351082102370319e-07, |
|
"loss": 6.3411, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 8.316729646169701e-07, |
|
"loss": 6.3358, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 8.282377189969082e-07, |
|
"loss": 6.4091, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 8.248024733768464e-07, |
|
"loss": 6.2717, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 8.213672277567846e-07, |
|
"loss": 6.3028, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 8.179319821367228e-07, |
|
"loss": 6.354, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.14496736516661e-07, |
|
"loss": 6.2591, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.11061490896599e-07, |
|
"loss": 6.1371, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.076262452765372e-07, |
|
"loss": 6.2945, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 8.041909996564754e-07, |
|
"loss": 6.3096, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 8.007557540364136e-07, |
|
"loss": 6.2438, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.973205084163517e-07, |
|
"loss": 6.2362, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 7.938852627962899e-07, |
|
"loss": 6.3412, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 7.90450017176228e-07, |
|
"loss": 6.1895, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 7.870147715561662e-07, |
|
"loss": 6.263, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 7.835795259361044e-07, |
|
"loss": 6.183, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 7.801442803160426e-07, |
|
"loss": 6.1913, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 7.767090346959807e-07, |
|
"loss": 6.2258, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 7.732737890759189e-07, |
|
"loss": 6.1109, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.698385434558571e-07, |
|
"loss": 6.1534, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.664032978357952e-07, |
|
"loss": 6.2051, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.629680522157333e-07, |
|
"loss": 6.2167, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 7.595328065956716e-07, |
|
"loss": 6.1834, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 7.560975609756097e-07, |
|
"loss": 6.1025, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 7.526623153555479e-07, |
|
"loss": 6.1659, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 7.492270697354861e-07, |
|
"loss": 6.153, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 7.457918241154242e-07, |
|
"loss": 6.1415, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 7.423565784953624e-07, |
|
"loss": 6.2841, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 7.389213328753006e-07, |
|
"loss": 6.14, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 7.354860872552388e-07, |
|
"loss": 6.1218, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 7.320508416351768e-07, |
|
"loss": 6.1991, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.28615596015115e-07, |
|
"loss": 5.9527, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 7.251803503950532e-07, |
|
"loss": 6.1217, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 7.217451047749914e-07, |
|
"loss": 6.0276, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 7.183098591549296e-07, |
|
"loss": 6.0417, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 7.148746135348677e-07, |
|
"loss": 6.1444, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 7.114393679148058e-07, |
|
"loss": 5.9651, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 7.08004122294744e-07, |
|
"loss": 6.1054, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 7.045688766746822e-07, |
|
"loss": 5.9115, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 7.011336310546205e-07, |
|
"loss": 6.0053, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 6.976983854345585e-07, |
|
"loss": 5.9664, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 6.942631398144967e-07, |
|
"loss": 6.0903, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 6.908278941944349e-07, |
|
"loss": 6.1041, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 6.87392648574373e-07, |
|
"loss": 6.151, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 6.839574029543112e-07, |
|
"loss": 5.9338, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 6.805221573342494e-07, |
|
"loss": 6.1562, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 6.770869117141875e-07, |
|
"loss": 5.9743, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 6.736516660941257e-07, |
|
"loss": 6.0068, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 6.702164204740639e-07, |
|
"loss": 6.061, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 6.66781174854002e-07, |
|
"loss": 6.077, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 6.633459292339401e-07, |
|
"loss": 5.9978, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 6.599106836138784e-07, |
|
"loss": 6.1817, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 6.564754379938166e-07, |
|
"loss": 6.1392, |
|
"step": 11000 |
|
} |
|
], |
|
"max_steps": 30110, |
|
"num_train_epochs": 10, |
|
"start_time": 1683297044.048102, |
|
"total_flos": 1.1772773793792e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|