|
{ |
|
"best_metric": 1.1968414783477783, |
|
"best_model_checkpoint": "./results/models/checkpoint-307584", |
|
"epoch": 16.0, |
|
"eval_steps": 500, |
|
"global_step": 307584, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0019989596337910945, |
|
"loss": 2.0581, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.001997919267582189, |
|
"loss": 1.5829, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0019968789013732834, |
|
"loss": 1.5037, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.001995838535164378, |
|
"loss": 1.4638, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0019947981689554723, |
|
"loss": 1.4347, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.001993757802746567, |
|
"loss": 1.4135, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0019927174365376612, |
|
"loss": 1.3959, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0019916770703287557, |
|
"loss": 1.3816, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00199063670411985, |
|
"loss": 1.3715, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0019895963379109446, |
|
"loss": 1.3634, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.001988555971702039, |
|
"loss": 1.354, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0019875156054931335, |
|
"loss": 1.3475, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.001986475239284228, |
|
"loss": 1.3392, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0019854348730753224, |
|
"loss": 1.3336, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.001984394506866417, |
|
"loss": 1.3283, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0019833541406575114, |
|
"loss": 1.3234, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.001982313774448606, |
|
"loss": 1.321, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0019812734082397003, |
|
"loss": 1.3154, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0019802330420307947, |
|
"loss": 1.3126, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0019791926758218896, |
|
"loss": 1.3087, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.001978152309612984, |
|
"loss": 1.3058, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.001977111943404078, |
|
"loss": 1.3022, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0019760715771951726, |
|
"loss": 1.3002, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.001975031210986267, |
|
"loss": 1.2965, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0019739908447773615, |
|
"loss": 1.2948, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0019729504785684564, |
|
"loss": 1.2932, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.001971910112359551, |
|
"loss": 1.289, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0019708697461506453, |
|
"loss": 1.2894, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0019698293799417393, |
|
"loss": 1.2862, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0019687890137328337, |
|
"loss": 1.2827, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0019677486475239286, |
|
"loss": 1.2818, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.001966708281315023, |
|
"loss": 1.2813, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0019656679151061176, |
|
"loss": 1.279, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.001964627548897212, |
|
"loss": 1.276, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0019635871826883065, |
|
"loss": 1.2761, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0019625468164794005, |
|
"loss": 1.2737, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0019615064502704954, |
|
"loss": 1.2729, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00196046608406159, |
|
"loss": 1.2708, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.28058660030365, |
|
"eval_runtime": 0.6137, |
|
"eval_samples_per_second": 1629.563, |
|
"eval_steps_per_second": 3.259, |
|
"step": 19224 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0019594257178526843, |
|
"loss": 1.2682, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0019583853516437788, |
|
"loss": 1.2641, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.001957344985434873, |
|
"loss": 1.2635, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0019563046192259677, |
|
"loss": 1.265, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.001955264253017062, |
|
"loss": 1.2636, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0019542238868081566, |
|
"loss": 1.2617, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.001953183520599251, |
|
"loss": 1.2605, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0019521431543903455, |
|
"loss": 1.2585, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0019511027881814397, |
|
"loss": 1.2583, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0019500624219725344, |
|
"loss": 1.2565, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0019490220557636289, |
|
"loss": 1.2558, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0019479816895547233, |
|
"loss": 1.2523, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0019469413233458178, |
|
"loss": 1.2553, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0019459009571369122, |
|
"loss": 1.2539, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0019448605909280067, |
|
"loss": 1.2521, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0019438202247191011, |
|
"loss": 1.2511, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0019427798585101956, |
|
"loss": 1.2517, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00194173949230129, |
|
"loss": 1.2497, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0019406991260923845, |
|
"loss": 1.2495, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.001939658759883479, |
|
"loss": 1.2488, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0019386183936745737, |
|
"loss": 1.248, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.001937578027465668, |
|
"loss": 1.2471, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0019365376612567623, |
|
"loss": 1.248, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0019354972950478568, |
|
"loss": 1.2446, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0019344569288389513, |
|
"loss": 1.2447, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.0019334165626300457, |
|
"loss": 1.2439, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.0019323761964211404, |
|
"loss": 1.2442, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0019313358302122348, |
|
"loss": 1.2444, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0019302954640033293, |
|
"loss": 1.2417, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0019292550977944235, |
|
"loss": 1.2429, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.001928214731585518, |
|
"loss": 1.2398, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0019271743653766125, |
|
"loss": 1.2419, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0019261339991677071, |
|
"loss": 1.2393, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0019250936329588016, |
|
"loss": 1.2396, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.001924053266749896, |
|
"loss": 1.2374, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.0019230129005409905, |
|
"loss": 1.24, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.001921972534332085, |
|
"loss": 1.2377, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0019209321681231794, |
|
"loss": 1.2387, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.2544126510620117, |
|
"eval_runtime": 0.5912, |
|
"eval_samples_per_second": 1691.485, |
|
"eval_steps_per_second": 3.383, |
|
"step": 38448 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0019198918019142739, |
|
"loss": 1.2349, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.0019188514357053683, |
|
"loss": 1.2314, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0019178110694964628, |
|
"loss": 1.2342, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.0019167707032875572, |
|
"loss": 1.2318, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0019157303370786517, |
|
"loss": 1.231, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0019146899708697464, |
|
"loss": 1.2321, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.0019136496046608406, |
|
"loss": 1.2315, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.001912609238451935, |
|
"loss": 1.2308, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.0019115688722430295, |
|
"loss": 1.2303, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.001910528506034124, |
|
"loss": 1.2297, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0019094881398252184, |
|
"loss": 1.2295, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.0019084477736163131, |
|
"loss": 1.2271, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.0019074074074074076, |
|
"loss": 1.2275, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.0019063670411985018, |
|
"loss": 1.2284, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.0019053266749895963, |
|
"loss": 1.2327, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 0.0019042863087806907, |
|
"loss": 1.2286, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.0019032459425717854, |
|
"loss": 1.2293, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.0019022055763628799, |
|
"loss": 1.2263, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.0019011652101539743, |
|
"loss": 1.2297, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.0019001248439450688, |
|
"loss": 1.227, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.001899084477736163, |
|
"loss": 1.2263, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0018980441115272575, |
|
"loss": 1.2274, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.0018970037453183521, |
|
"loss": 1.2271, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.0018959633791094466, |
|
"loss": 1.2262, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.001894923012900541, |
|
"loss": 1.2256, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.0018938826466916355, |
|
"loss": 1.2256, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 0.00189284228048273, |
|
"loss": 1.225, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0018918019142738244, |
|
"loss": 1.2254, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.0018907615480649189, |
|
"loss": 1.2249, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.0018897211818560133, |
|
"loss": 1.2247, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.0018886808156471078, |
|
"loss": 1.2231, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 0.0018876404494382023, |
|
"loss": 1.2226, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.0018866000832292967, |
|
"loss": 1.2239, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.0018855597170203914, |
|
"loss": 1.2226, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.0018845193508114856, |
|
"loss": 1.2234, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.00188347898460258, |
|
"loss": 1.221, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.0018824386183936745, |
|
"loss": 1.2227, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.001881398252184769, |
|
"loss": 1.2199, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.0018803578859758635, |
|
"loss": 1.2195, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.233520746231079, |
|
"eval_runtime": 0.6051, |
|
"eval_samples_per_second": 1652.629, |
|
"eval_steps_per_second": 3.305, |
|
"step": 57672 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.0018793175197669581, |
|
"loss": 1.2162, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.0018782771535580526, |
|
"loss": 1.2161, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.001877236787349147, |
|
"loss": 1.2146, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 0.0018761964211402413, |
|
"loss": 1.2163, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.0018751560549313357, |
|
"loss": 1.2163, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.0018741156887224304, |
|
"loss": 1.2152, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 0.0018730753225135249, |
|
"loss": 1.2164, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.0018720349563046193, |
|
"loss": 1.2157, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 0.0018709945900957138, |
|
"loss": 1.2176, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 0.0018699542238868082, |
|
"loss": 1.2168, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.0018689138576779025, |
|
"loss": 1.2159, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.0018678734914689972, |
|
"loss": 1.2145, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.0018668331252600916, |
|
"loss": 1.2148, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.001865792759051186, |
|
"loss": 1.2151, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 0.0018647523928422805, |
|
"loss": 1.2145, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 0.001863712026633375, |
|
"loss": 1.216, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.0018626716604244697, |
|
"loss": 1.2152, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 0.001861631294215564, |
|
"loss": 1.2132, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 0.0018605909280066584, |
|
"loss": 1.2137, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.0018595505617977528, |
|
"loss": 1.2145, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.0018585101955888473, |
|
"loss": 1.2141, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.0018574698293799417, |
|
"loss": 1.2148, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 0.0018564294631710364, |
|
"loss": 1.2125, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.0018553890969621309, |
|
"loss": 1.2132, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 0.001854348730753225, |
|
"loss": 1.2124, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.0018533083645443196, |
|
"loss": 1.2148, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 0.001852267998335414, |
|
"loss": 1.2135, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 0.0018512276321265085, |
|
"loss": 1.2132, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 0.0018501872659176031, |
|
"loss": 1.2133, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.0018491468997086976, |
|
"loss": 1.2144, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 0.001848106533499792, |
|
"loss": 1.2101, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 0.0018470661672908863, |
|
"loss": 1.212, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 0.0018460258010819808, |
|
"loss": 1.2119, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.0018449854348730754, |
|
"loss": 1.2117, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 0.0018439450686641699, |
|
"loss": 1.2106, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 0.0018429047024552643, |
|
"loss": 1.2125, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 0.0018418643362463588, |
|
"loss": 1.2106, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 0.0018408239700374533, |
|
"loss": 1.2108, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.2261288166046143, |
|
"eval_runtime": 0.8323, |
|
"eval_samples_per_second": 1201.497, |
|
"eval_steps_per_second": 2.403, |
|
"step": 76896 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 0.0018397836038285475, |
|
"loss": 1.2091, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.0018387432376196422, |
|
"loss": 1.2055, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 0.0018377028714107366, |
|
"loss": 1.2061, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 0.001836662505201831, |
|
"loss": 1.2047, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.0018356221389929255, |
|
"loss": 1.2077, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 0.00183458177278402, |
|
"loss": 1.2069, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.0018335414065751145, |
|
"loss": 1.2078, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.001832501040366209, |
|
"loss": 1.2058, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 0.0018314606741573034, |
|
"loss": 1.2075, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 0.0018304203079483978, |
|
"loss": 1.2064, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 0.0018293799417394923, |
|
"loss": 1.2059, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.0018283395755305867, |
|
"loss": 1.2075, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 0.0018272992093216814, |
|
"loss": 1.2042, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 0.0018262588431127759, |
|
"loss": 1.2051, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 0.0018252184769038703, |
|
"loss": 1.2066, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.0018241781106949646, |
|
"loss": 1.2081, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 0.001823137744486059, |
|
"loss": 1.2072, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 0.0018220973782771535, |
|
"loss": 1.2052, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 0.0018210570120682482, |
|
"loss": 1.2042, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.0018200166458593426, |
|
"loss": 1.2058, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 0.001818976279650437, |
|
"loss": 1.2064, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 0.0018179359134415315, |
|
"loss": 1.2032, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 0.0018168955472326258, |
|
"loss": 1.2059, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 0.0018158551810237204, |
|
"loss": 1.2058, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.001814814814814815, |
|
"loss": 1.2041, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.0018137744486059093, |
|
"loss": 1.2057, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 0.0018127340823970038, |
|
"loss": 1.2031, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 0.0018116937161880983, |
|
"loss": 1.2062, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 0.0018106533499791927, |
|
"loss": 1.2051, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 0.0018096129837702872, |
|
"loss": 1.2037, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.0018085726175613816, |
|
"loss": 1.2053, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.001807532251352476, |
|
"loss": 1.2046, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 0.0018064918851435705, |
|
"loss": 1.2023, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.001805451518934665, |
|
"loss": 1.2045, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 0.0018044111527257595, |
|
"loss": 1.204, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 0.0018033707865168541, |
|
"loss": 1.2037, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 0.0018023304203079484, |
|
"loss": 1.204, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 0.0018012900540990428, |
|
"loss": 1.2044, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 0.0018002496878901373, |
|
"loss": 1.2022, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.2207547426223755, |
|
"eval_runtime": 0.6112, |
|
"eval_samples_per_second": 1636.066, |
|
"eval_steps_per_second": 3.272, |
|
"step": 96120 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.0017992093216812317, |
|
"loss": 1.2001, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.0017981689554723264, |
|
"loss": 1.1996, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.0017971285892634209, |
|
"loss": 1.1989, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.0017960882230545153, |
|
"loss": 1.1998, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 0.0017950478568456096, |
|
"loss": 1.1989, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 0.001794007490636704, |
|
"loss": 1.1984, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 0.0017929671244277985, |
|
"loss": 1.1991, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.0017919267582188932, |
|
"loss": 1.1993, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 0.0017908863920099876, |
|
"loss": 1.1996, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 0.001789846025801082, |
|
"loss": 1.1995, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.0017888056595921765, |
|
"loss": 1.1987, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.0017877652933832708, |
|
"loss": 1.1971, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.0017867249271743654, |
|
"loss": 1.1984, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.00178568456096546, |
|
"loss": 1.2003, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 0.0017846441947565544, |
|
"loss": 1.2011, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 0.0017836038285476488, |
|
"loss": 1.1994, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 0.0017825634623387433, |
|
"loss": 1.1989, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 0.0017815230961298377, |
|
"loss": 1.1996, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 0.0017804827299209324, |
|
"loss": 1.1982, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.0017794423637120266, |
|
"loss": 1.1971, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 0.001778401997503121, |
|
"loss": 1.1988, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.0017773616312942156, |
|
"loss": 1.1996, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 0.00177632126508531, |
|
"loss": 1.1972, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 0.0017752808988764045, |
|
"loss": 1.1991, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 0.0017742405326674991, |
|
"loss": 1.1987, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 0.0017732001664585936, |
|
"loss": 1.1983, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.0017721598002496878, |
|
"loss": 1.1993, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 0.0017711194340407823, |
|
"loss": 1.1979, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 0.0017700790678318768, |
|
"loss": 1.1989, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 0.0017690387016229714, |
|
"loss": 1.2003, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 0.0017679983354140659, |
|
"loss": 1.1987, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 0.0017669579692051603, |
|
"loss": 1.1987, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 0.0017659176029962548, |
|
"loss": 1.1976, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 0.001764877236787349, |
|
"loss": 1.1983, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 0.0017638368705784435, |
|
"loss": 1.1981, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 0.0017627965043695382, |
|
"loss": 1.1989, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 0.0017617561381606326, |
|
"loss": 1.1963, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 0.001760715771951727, |
|
"loss": 1.1983, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.216284155845642, |
|
"eval_runtime": 0.6159, |
|
"eval_samples_per_second": 1623.524, |
|
"eval_steps_per_second": 3.247, |
|
"step": 115344 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 0.0017596754057428215, |
|
"loss": 1.1979, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 0.001758635039533916, |
|
"loss": 1.1933, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 0.0017575946733250102, |
|
"loss": 1.1942, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 0.001756554307116105, |
|
"loss": 1.1937, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 0.0017555139409071994, |
|
"loss": 1.1937, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 0.0017544735746982938, |
|
"loss": 1.1958, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 0.0017534332084893883, |
|
"loss": 1.1936, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 0.0017523928422804827, |
|
"loss": 1.1952, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 0.0017513524760715774, |
|
"loss": 1.1959, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 0.0017503121098626717, |
|
"loss": 1.1943, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 0.0017492717436537661, |
|
"loss": 1.1945, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 0.0017482313774448606, |
|
"loss": 1.1952, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 0.001747191011235955, |
|
"loss": 1.1972, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 0.0017461506450270495, |
|
"loss": 1.1933, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.0017451102788181442, |
|
"loss": 1.1949, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.0017440699126092386, |
|
"loss": 1.1947, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 0.0017430295464003329, |
|
"loss": 1.1935, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.0017419891801914273, |
|
"loss": 1.1954, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.0017409488139825218, |
|
"loss": 1.193, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 0.0017399084477736164, |
|
"loss": 1.1938, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 0.001738868081564711, |
|
"loss": 1.1939, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 0.0017378277153558054, |
|
"loss": 1.1948, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 0.0017367873491468998, |
|
"loss": 1.1926, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 0.001735746982937994, |
|
"loss": 1.1936, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 0.0017347066167290885, |
|
"loss": 1.1933, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 0.0017336662505201832, |
|
"loss": 1.1947, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 0.0017326258843112776, |
|
"loss": 1.1931, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 0.001731585518102372, |
|
"loss": 1.1931, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 0.0017305451518934666, |
|
"loss": 1.1938, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 0.001729504785684561, |
|
"loss": 1.1939, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 0.0017284644194756553, |
|
"loss": 1.1923, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 0.00172742405326675, |
|
"loss": 1.1932, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.0017263836870578444, |
|
"loss": 1.1926, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 0.0017253433208489388, |
|
"loss": 1.1929, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 0.0017243029546400333, |
|
"loss": 1.1932, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 0.0017232625884311278, |
|
"loss": 1.1932, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 0.0017222222222222224, |
|
"loss": 1.1952, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 0.0017211818560133169, |
|
"loss": 1.1924, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.0017201414898044111, |
|
"loss": 1.1927, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.2103557586669922, |
|
"eval_runtime": 0.6147, |
|
"eval_samples_per_second": 1626.936, |
|
"eval_steps_per_second": 3.254, |
|
"step": 134568 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 0.0017191011235955056, |
|
"loss": 1.1869, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.0017180607573866, |
|
"loss": 1.1872, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.0017170203911776945, |
|
"loss": 1.1898, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.0017159800249687892, |
|
"loss": 1.1895, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 0.0017149396587598836, |
|
"loss": 1.1902, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 0.001713899292550978, |
|
"loss": 1.1901, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 0.0017128589263420723, |
|
"loss": 1.1892, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.0017118185601331668, |
|
"loss": 1.1902, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 0.0017107781939242615, |
|
"loss": 1.1906, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 0.001709737827715356, |
|
"loss": 1.1904, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 0.0017086974615064504, |
|
"loss": 1.1898, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 0.0017076570952975448, |
|
"loss": 1.1917, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.0017066167290886393, |
|
"loss": 1.1914, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 0.0017055763628797335, |
|
"loss": 1.1905, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 0.0017045359966708282, |
|
"loss": 1.1921, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 0.0017034956304619227, |
|
"loss": 1.1899, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 0.001702455264253017, |
|
"loss": 1.19, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 0.0017014148980441116, |
|
"loss": 1.1883, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 0.001700374531835206, |
|
"loss": 1.191, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 0.0016993341656263005, |
|
"loss": 1.1896, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 0.001698293799417395, |
|
"loss": 1.1893, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 0.0016972534332084894, |
|
"loss": 1.1892, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 0.0016962130669995838, |
|
"loss": 1.1887, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 0.0016951727007906783, |
|
"loss": 1.1913, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 0.0016941323345817728, |
|
"loss": 1.1895, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 0.0016930919683728674, |
|
"loss": 1.1891, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 0.001692051602163962, |
|
"loss": 1.189, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 0.0016910112359550561, |
|
"loss": 1.1907, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 0.0016899708697461506, |
|
"loss": 1.1911, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 0.001688930503537245, |
|
"loss": 1.1881, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 0.0016878901373283395, |
|
"loss": 1.1893, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 0.0016868497711194342, |
|
"loss": 1.1902, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 0.0016858094049105286, |
|
"loss": 1.1912, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 0.001684769038701623, |
|
"loss": 1.1907, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 0.0016837286724927173, |
|
"loss": 1.1909, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 0.0016826883062838118, |
|
"loss": 1.1875, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 0.0016816479400749065, |
|
"loss": 1.19, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 0.001680607573866001, |
|
"loss": 1.1881, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.2096730470657349, |
|
"eval_runtime": 0.6211, |
|
"eval_samples_per_second": 1609.947, |
|
"eval_steps_per_second": 3.22, |
|
"step": 153792 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.0016795672076570954, |
|
"loss": 1.1874, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 0.0016785268414481898, |
|
"loss": 1.1844, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 0.0016774864752392843, |
|
"loss": 1.1845, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 0.0016764461090303787, |
|
"loss": 1.186, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 0.0016754057428214732, |
|
"loss": 1.1851, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 0.0016743653766125677, |
|
"loss": 1.1872, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 0.0016733250104036621, |
|
"loss": 1.1872, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 0.0016722846441947566, |
|
"loss": 1.1862, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 0.001671244277985851, |
|
"loss": 1.1867, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 0.0016702039117769455, |
|
"loss": 1.186, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 0.0016691635455680402, |
|
"loss": 1.1858, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 0.0016681231793591344, |
|
"loss": 1.1843, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 0.0016670828131502289, |
|
"loss": 1.1857, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 0.0016660424469413233, |
|
"loss": 1.1872, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 0.0016650020807324178, |
|
"loss": 1.1859, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 0.0016639617145235124, |
|
"loss": 1.1865, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 0.001662921348314607, |
|
"loss": 1.1871, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 0.0016618809821057014, |
|
"loss": 1.1871, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 0.0016608406158967956, |
|
"loss": 1.1876, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 0.00165980024968789, |
|
"loss": 1.1876, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 0.0016587598834789845, |
|
"loss": 1.1874, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 0.0016577195172700792, |
|
"loss": 1.186, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 0.0016566791510611736, |
|
"loss": 1.1869, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 0.001655638784852268, |
|
"loss": 1.1865, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 0.0016545984186433626, |
|
"loss": 1.1862, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 0.0016535580524344568, |
|
"loss": 1.1881, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 0.0016525176862255513, |
|
"loss": 1.1875, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 0.001651477320016646, |
|
"loss": 1.1866, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 0.0016504369538077404, |
|
"loss": 1.1868, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 0.0016493965875988348, |
|
"loss": 1.1867, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 0.0016483562213899293, |
|
"loss": 1.1858, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 0.0016473158551810238, |
|
"loss": 1.1869, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 0.0016462754889721182, |
|
"loss": 1.1861, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 0.0016452351227632127, |
|
"loss": 1.1849, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 0.0016441947565543071, |
|
"loss": 1.1857, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 0.0016431543903454016, |
|
"loss": 1.1883, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 0.001642114024136496, |
|
"loss": 1.1862, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 0.0016410736579275905, |
|
"loss": 1.1859, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 0.0016400332917186852, |
|
"loss": 1.1865, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.2050005197525024, |
|
"eval_runtime": 0.6154, |
|
"eval_samples_per_second": 1624.869, |
|
"eval_steps_per_second": 3.25, |
|
"step": 173016 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 0.0016389929255097794, |
|
"loss": 1.181, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 0.0016379525593008739, |
|
"loss": 1.1819, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 0.0016369121930919683, |
|
"loss": 1.1832, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 0.0016358718268830628, |
|
"loss": 1.1833, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 0.0016348314606741575, |
|
"loss": 1.1814, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 0.001633791094465252, |
|
"loss": 1.1824, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 0.0016327507282563464, |
|
"loss": 1.1836, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 0.0016317103620474408, |
|
"loss": 1.1824, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 0.001630669995838535, |
|
"loss": 1.1837, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 0.0016296296296296295, |
|
"loss": 1.1859, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 0.0016285892634207242, |
|
"loss": 1.1834, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 0.0016275488972118187, |
|
"loss": 1.1829, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 0.0016265085310029131, |
|
"loss": 1.1827, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 0.0016254681647940076, |
|
"loss": 1.1845, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 0.001624427798585102, |
|
"loss": 1.1836, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 0.0016233874323761963, |
|
"loss": 1.1821, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 0.001622347066167291, |
|
"loss": 1.1833, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 0.0016213066999583854, |
|
"loss": 1.1843, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 0.0016202663337494799, |
|
"loss": 1.1845, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 0.0016192259675405743, |
|
"loss": 1.1837, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 0.0016181856013316688, |
|
"loss": 1.1837, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 0.0016171452351227634, |
|
"loss": 1.184, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.0016161048689138577, |
|
"loss": 1.1835, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.0016150645027049521, |
|
"loss": 1.1826, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 0.0016140241364960466, |
|
"loss": 1.1846, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 0.001612983770287141, |
|
"loss": 1.1833, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 0.0016119434040782355, |
|
"loss": 1.1829, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 0.0016109030378693302, |
|
"loss": 1.183, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 0.0016098626716604246, |
|
"loss": 1.184, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 0.0016088223054515189, |
|
"loss": 1.1831, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 0.0016077819392426133, |
|
"loss": 1.185, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 0.0016067415730337078, |
|
"loss": 1.183, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 0.0016057012068248025, |
|
"loss": 1.183, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 0.001604660840615897, |
|
"loss": 1.1837, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 0.0016036204744069914, |
|
"loss": 1.1809, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 0.0016025801081980858, |
|
"loss": 1.1838, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 0.00160153974198918, |
|
"loss": 1.1831, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 0.0016004993757802745, |
|
"loss": 1.1846, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.2037365436553955, |
|
"eval_runtime": 0.6098, |
|
"eval_samples_per_second": 1639.906, |
|
"eval_steps_per_second": 3.28, |
|
"step": 192240 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 0.0015994590095713692, |
|
"loss": 1.181, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 0.0015984186433624637, |
|
"loss": 1.1792, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 10.07, |
|
"learning_rate": 0.0015973782771535581, |
|
"loss": 1.1796, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 10.09, |
|
"learning_rate": 0.0015963379109446526, |
|
"loss": 1.1806, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 0.001595297544735747, |
|
"loss": 1.1803, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 0.0015942571785268413, |
|
"loss": 1.1801, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 0.001593216812317936, |
|
"loss": 1.1799, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 0.0015921764461090304, |
|
"loss": 1.1812, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 0.0015911360799001249, |
|
"loss": 1.1795, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 10.25, |
|
"learning_rate": 0.0015900957136912193, |
|
"loss": 1.1812, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"learning_rate": 0.0015890553474823138, |
|
"loss": 1.1803, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 0.0015880149812734085, |
|
"loss": 1.1818, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"learning_rate": 0.0015869746150645027, |
|
"loss": 1.1802, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"learning_rate": 0.0015859342488555972, |
|
"loss": 1.1805, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 0.0015848938826466916, |
|
"loss": 1.1802, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 0.001583853516437786, |
|
"loss": 1.1812, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 0.0015828131502288805, |
|
"loss": 1.1817, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 10.46, |
|
"learning_rate": 0.0015817727840199752, |
|
"loss": 1.1828, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 0.0015807324178110697, |
|
"loss": 1.1798, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"learning_rate": 0.0015796920516021641, |
|
"loss": 1.1817, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 0.0015786516853932584, |
|
"loss": 1.181, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 10.56, |
|
"learning_rate": 0.0015776113191843528, |
|
"loss": 1.1814, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"learning_rate": 0.0015765709529754473, |
|
"loss": 1.1798, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"learning_rate": 0.001575530586766542, |
|
"loss": 1.1819, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"learning_rate": 0.0015744902205576364, |
|
"loss": 1.1818, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 10.66, |
|
"learning_rate": 0.0015734498543487309, |
|
"loss": 1.182, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"learning_rate": 0.0015724094881398253, |
|
"loss": 1.1821, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"learning_rate": 0.0015713691219309195, |
|
"loss": 1.1819, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 10.74, |
|
"learning_rate": 0.0015703287557220142, |
|
"loss": 1.1809, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 10.77, |
|
"learning_rate": 0.0015692883895131087, |
|
"loss": 1.1806, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 10.79, |
|
"learning_rate": 0.0015682480233042031, |
|
"loss": 1.1814, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 0.0015672076570952976, |
|
"loss": 1.181, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 0.001566167290886392, |
|
"loss": 1.183, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 0.0015651269246774865, |
|
"loss": 1.1812, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 0.001564086558468581, |
|
"loss": 1.1803, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 10.92, |
|
"learning_rate": 0.0015630461922596754, |
|
"loss": 1.1817, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"learning_rate": 0.0015620058260507699, |
|
"loss": 1.1781, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 0.0015609654598418643, |
|
"loss": 1.1806, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 1.2047163248062134, |
|
"eval_runtime": 0.6153, |
|
"eval_samples_per_second": 1625.195, |
|
"eval_steps_per_second": 3.25, |
|
"step": 211464 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 0.0015599250936329588, |
|
"loss": 1.1819, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 0.0015588847274240535, |
|
"loss": 1.1753, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 0.001557844361215148, |
|
"loss": 1.1781, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"learning_rate": 0.0015568039950062422, |
|
"loss": 1.1788, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 0.0015557636287973366, |
|
"loss": 1.1768, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"learning_rate": 0.001554723262588431, |
|
"loss": 1.1775, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 11.16, |
|
"learning_rate": 0.0015536828963795255, |
|
"loss": 1.1782, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 11.18, |
|
"learning_rate": 0.0015526425301706202, |
|
"loss": 1.1771, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 11.21, |
|
"learning_rate": 0.0015516021639617147, |
|
"loss": 1.1778, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 11.24, |
|
"learning_rate": 0.0015505617977528091, |
|
"loss": 1.1767, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 11.26, |
|
"learning_rate": 0.0015495214315439034, |
|
"loss": 1.1781, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"learning_rate": 0.0015484810653349978, |
|
"loss": 1.1781, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"learning_rate": 0.0015474406991260923, |
|
"loss": 1.179, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 11.34, |
|
"learning_rate": 0.001546400332917187, |
|
"loss": 1.1775, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 11.37, |
|
"learning_rate": 0.0015453599667082814, |
|
"loss": 1.1799, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 11.39, |
|
"learning_rate": 0.0015443196004993759, |
|
"loss": 1.1773, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 11.42, |
|
"learning_rate": 0.0015432792342904703, |
|
"loss": 1.1786, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 11.44, |
|
"learning_rate": 0.0015422388680815646, |
|
"loss": 1.1766, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 11.47, |
|
"learning_rate": 0.0015411985018726592, |
|
"loss": 1.1793, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 0.0015401581356637537, |
|
"loss": 1.1785, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 0.0015391177694548481, |
|
"loss": 1.1787, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 0.0015380774032459426, |
|
"loss": 1.1786, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"learning_rate": 0.001537037037037037, |
|
"loss": 1.1796, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 0.0015359966708281315, |
|
"loss": 1.1785, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"learning_rate": 0.0015349563046192262, |
|
"loss": 1.1789, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 11.65, |
|
"learning_rate": 0.0015339159384103204, |
|
"loss": 1.1775, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 11.68, |
|
"learning_rate": 0.0015328755722014149, |
|
"loss": 1.181, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 0.0015318352059925093, |
|
"loss": 1.1781, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 0.0015307948397836038, |
|
"loss": 1.1791, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"learning_rate": 0.0015297544735746985, |
|
"loss": 1.1797, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 0.001528714107365793, |
|
"loss": 1.1779, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"learning_rate": 0.0015276737411568874, |
|
"loss": 1.1773, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 11.83, |
|
"learning_rate": 0.0015266333749479816, |
|
"loss": 1.178, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 0.001525593008739076, |
|
"loss": 1.179, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 11.89, |
|
"learning_rate": 0.0015245526425301705, |
|
"loss": 1.1782, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 11.91, |
|
"learning_rate": 0.0015235122763212652, |
|
"loss": 1.1796, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 11.94, |
|
"learning_rate": 0.0015224719101123597, |
|
"loss": 1.1771, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"learning_rate": 0.0015214315439034541, |
|
"loss": 1.179, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 0.0015203911776945486, |
|
"loss": 1.1791, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 1.1990782022476196, |
|
"eval_runtime": 0.5976, |
|
"eval_samples_per_second": 1673.372, |
|
"eval_steps_per_second": 3.347, |
|
"step": 230688 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"learning_rate": 0.0015193508114856428, |
|
"loss": 1.1745, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 12.04, |
|
"learning_rate": 0.0015183104452767373, |
|
"loss": 1.1744, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 12.07, |
|
"learning_rate": 0.001517270079067832, |
|
"loss": 1.1743, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 12.09, |
|
"learning_rate": 0.0015162297128589264, |
|
"loss": 1.1758, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"learning_rate": 0.0015151893466500209, |
|
"loss": 1.1738, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"learning_rate": 0.0015141489804411153, |
|
"loss": 1.1752, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 12.17, |
|
"learning_rate": 0.0015131086142322098, |
|
"loss": 1.1753, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"learning_rate": 0.0015120682480233042, |
|
"loss": 1.1764, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 12.22, |
|
"learning_rate": 0.0015110278818143987, |
|
"loss": 1.174, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 12.25, |
|
"learning_rate": 0.0015099875156054932, |
|
"loss": 1.176, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 12.28, |
|
"learning_rate": 0.0015089471493965876, |
|
"loss": 1.176, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 0.001507906783187682, |
|
"loss": 1.1758, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"learning_rate": 0.0015068664169787765, |
|
"loss": 1.1747, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"learning_rate": 0.0015058260507698712, |
|
"loss": 1.1752, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 12.38, |
|
"learning_rate": 0.0015047856845609654, |
|
"loss": 1.1761, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 12.41, |
|
"learning_rate": 0.00150374531835206, |
|
"loss": 1.1771, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 12.43, |
|
"learning_rate": 0.0015027049521431544, |
|
"loss": 1.1764, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"learning_rate": 0.0015016645859342488, |
|
"loss": 1.1754, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 0.0015006242197253433, |
|
"loss": 1.1763, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 12.51, |
|
"learning_rate": 0.001499583853516438, |
|
"loss": 1.1751, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 12.54, |
|
"learning_rate": 0.0014985434873075324, |
|
"loss": 1.177, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 12.56, |
|
"learning_rate": 0.0014975031210986266, |
|
"loss": 1.178, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 0.001496462754889721, |
|
"loss": 1.178, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"learning_rate": 0.0014954223886808156, |
|
"loss": 1.1761, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 12.64, |
|
"learning_rate": 0.0014943820224719102, |
|
"loss": 1.1763, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"learning_rate": 0.0014933416562630047, |
|
"loss": 1.1781, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 12.69, |
|
"learning_rate": 0.0014923012900540991, |
|
"loss": 1.1773, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"learning_rate": 0.0014912609238451936, |
|
"loss": 1.176, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 12.74, |
|
"learning_rate": 0.0014902205576362878, |
|
"loss": 1.1762, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"learning_rate": 0.0014891801914273823, |
|
"loss": 1.1776, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 0.001488139825218477, |
|
"loss": 1.1774, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.0014870994590095714, |
|
"loss": 1.1759, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"learning_rate": 0.0014860590928006659, |
|
"loss": 1.1783, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 12.87, |
|
"learning_rate": 0.0014850187265917603, |
|
"loss": 1.1757, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 0.0014839783603828548, |
|
"loss": 1.1769, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 0.0014829379941739495, |
|
"loss": 1.1749, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"learning_rate": 0.0014818976279650437, |
|
"loss": 1.1751, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"learning_rate": 0.0014808572617561382, |
|
"loss": 1.1757, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 1.198763132095337, |
|
"eval_runtime": 0.6114, |
|
"eval_samples_per_second": 1635.549, |
|
"eval_steps_per_second": 3.271, |
|
"step": 249912 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 0.0014798168955472326, |
|
"loss": 1.1756, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 0.001478776529338327, |
|
"loss": 1.1712, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"learning_rate": 0.0014777361631294215, |
|
"loss": 1.1714, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 13.08, |
|
"learning_rate": 0.0014766957969205162, |
|
"loss": 1.1724, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 0.0014756554307116107, |
|
"loss": 1.1736, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 13.13, |
|
"learning_rate": 0.001474615064502705, |
|
"loss": 1.1739, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"learning_rate": 0.0014735746982937994, |
|
"loss": 1.1737, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 0.0014725343320848938, |
|
"loss": 1.1732, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 0.0014714939658759883, |
|
"loss": 1.1725, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"learning_rate": 0.001470453599667083, |
|
"loss": 1.1737, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"learning_rate": 0.0014694132334581774, |
|
"loss": 1.1737, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 13.29, |
|
"learning_rate": 0.0014683728672492719, |
|
"loss": 1.1733, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 13.32, |
|
"learning_rate": 0.0014673325010403661, |
|
"loss": 1.1729, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 13.34, |
|
"learning_rate": 0.0014662921348314606, |
|
"loss": 1.1742, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 13.37, |
|
"learning_rate": 0.0014652517686225552, |
|
"loss": 1.174, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 13.39, |
|
"learning_rate": 0.0014642114024136497, |
|
"loss": 1.1749, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 13.42, |
|
"learning_rate": 0.0014631710362047442, |
|
"loss": 1.1735, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 13.45, |
|
"learning_rate": 0.0014621306699958386, |
|
"loss": 1.1724, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 0.001461090303786933, |
|
"loss": 1.1743, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 0.0014600499375780273, |
|
"loss": 1.1755, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 13.52, |
|
"learning_rate": 0.001459009571369122, |
|
"loss": 1.1759, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 13.55, |
|
"learning_rate": 0.0014579692051602164, |
|
"loss": 1.1749, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 13.58, |
|
"learning_rate": 0.001456928838951311, |
|
"loss": 1.1756, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 0.0014558884727424054, |
|
"loss": 1.1747, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 13.63, |
|
"learning_rate": 0.0014548481065334998, |
|
"loss": 1.1745, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 13.65, |
|
"learning_rate": 0.0014538077403245945, |
|
"loss": 1.1736, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 0.0014527673741156887, |
|
"loss": 1.1749, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 0.0014517270079067832, |
|
"loss": 1.1747, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"learning_rate": 0.0014506866416978776, |
|
"loss": 1.1735, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"learning_rate": 0.001449646275488972, |
|
"loss": 1.1736, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 13.78, |
|
"learning_rate": 0.0014486059092800666, |
|
"loss": 1.1741, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"learning_rate": 0.0014475655430711612, |
|
"loss": 1.1756, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 13.84, |
|
"learning_rate": 0.0014465251768622557, |
|
"loss": 1.1746, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 13.86, |
|
"learning_rate": 0.00144548481065335, |
|
"loss": 1.176, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"learning_rate": 0.0014444444444444444, |
|
"loss": 1.1746, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 0.0014434040782355388, |
|
"loss": 1.1746, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 0.0014423637120266333, |
|
"loss": 1.1741, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"learning_rate": 0.001441323345817728, |
|
"loss": 1.173, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 0.0014402829796088224, |
|
"loss": 1.1741, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 1.199006199836731, |
|
"eval_runtime": 0.8396, |
|
"eval_samples_per_second": 1191.026, |
|
"eval_steps_per_second": 2.382, |
|
"step": 269136 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 0.0014392426133999169, |
|
"loss": 1.1715, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"learning_rate": 0.0014382022471910111, |
|
"loss": 1.1707, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 14.07, |
|
"learning_rate": 0.0014371618809821056, |
|
"loss": 1.171, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 0.0014361215147732003, |
|
"loss": 1.1708, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 14.12, |
|
"learning_rate": 0.0014350811485642947, |
|
"loss": 1.1715, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 0.0014340407823553892, |
|
"loss": 1.171, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 14.17, |
|
"learning_rate": 0.0014330004161464836, |
|
"loss": 1.1721, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 0.001431960049937578, |
|
"loss": 1.1717, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 14.23, |
|
"learning_rate": 0.0014309196837286725, |
|
"loss": 1.1715, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 14.25, |
|
"learning_rate": 0.001429879317519767, |
|
"loss": 1.171, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"learning_rate": 0.0014288389513108614, |
|
"loss": 1.1712, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 14.31, |
|
"learning_rate": 0.001427798585101956, |
|
"loss": 1.1726, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 14.33, |
|
"learning_rate": 0.0014267582188930504, |
|
"loss": 1.173, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 14.36, |
|
"learning_rate": 0.0014257178526841448, |
|
"loss": 1.1709, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 14.38, |
|
"learning_rate": 0.0014246774864752393, |
|
"loss": 1.1728, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"learning_rate": 0.001423637120266334, |
|
"loss": 1.1721, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 14.44, |
|
"learning_rate": 0.0014225967540574282, |
|
"loss": 1.1732, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 14.46, |
|
"learning_rate": 0.0014215563878485226, |
|
"loss": 1.1718, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 0.001420516021639617, |
|
"loss": 1.1716, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 14.51, |
|
"learning_rate": 0.0014194756554307116, |
|
"loss": 1.1735, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 14.54, |
|
"learning_rate": 0.0014184352892218062, |
|
"loss": 1.1731, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 14.57, |
|
"learning_rate": 0.0014173949230129007, |
|
"loss": 1.1724, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 0.0014163545568039951, |
|
"loss": 1.1717, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 14.62, |
|
"learning_rate": 0.0014153141905950894, |
|
"loss": 1.1714, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"learning_rate": 0.0014142738243861838, |
|
"loss": 1.1725, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 14.67, |
|
"learning_rate": 0.0014132334581772783, |
|
"loss": 1.1726, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 14.7, |
|
"learning_rate": 0.001412193091968373, |
|
"loss": 1.1731, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"learning_rate": 0.0014111527257594674, |
|
"loss": 1.1727, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 14.75, |
|
"learning_rate": 0.0014101123595505619, |
|
"loss": 1.1724, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 14.77, |
|
"learning_rate": 0.0014090719933416563, |
|
"loss": 1.1715, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 0.0014080316271327506, |
|
"loss": 1.173, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 14.83, |
|
"learning_rate": 0.0014069912609238453, |
|
"loss": 1.1727, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 0.0014059508947149397, |
|
"loss": 1.1744, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 14.88, |
|
"learning_rate": 0.0014049105285060342, |
|
"loss": 1.1709, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 0.0014038701622971286, |
|
"loss": 1.1727, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 0.001402829796088223, |
|
"loss": 1.1733, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"learning_rate": 0.0014017894298793175, |
|
"loss": 1.1736, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"learning_rate": 0.001400749063670412, |
|
"loss": 1.1734, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 1.1976137161254883, |
|
"eval_runtime": 0.6131, |
|
"eval_samples_per_second": 1631.003, |
|
"eval_steps_per_second": 3.262, |
|
"step": 288360 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 0.0013997086974615065, |
|
"loss": 1.1707, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"learning_rate": 0.001398668331252601, |
|
"loss": 1.1673, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 15.06, |
|
"learning_rate": 0.0013976279650436954, |
|
"loss": 1.1696, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 0.0013965875988347898, |
|
"loss": 1.169, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"learning_rate": 0.0013955472326258843, |
|
"loss": 1.1689, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 15.14, |
|
"learning_rate": 0.001394506866416979, |
|
"loss": 1.1702, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 15.16, |
|
"learning_rate": 0.0013934665002080732, |
|
"loss": 1.1687, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 15.19, |
|
"learning_rate": 0.0013924261339991677, |
|
"loss": 1.1688, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 0.0013913857677902621, |
|
"loss": 1.1693, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"learning_rate": 0.0013903454015813566, |
|
"loss": 1.1703, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 15.27, |
|
"learning_rate": 0.0013893050353724512, |
|
"loss": 1.1719, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 15.29, |
|
"learning_rate": 0.0013882646691635457, |
|
"loss": 1.1701, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 0.0013872243029546402, |
|
"loss": 1.1707, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 15.35, |
|
"learning_rate": 0.0013861839367457346, |
|
"loss": 1.1708, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 15.37, |
|
"learning_rate": 0.0013851435705368289, |
|
"loss": 1.1716, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"learning_rate": 0.0013841032043279233, |
|
"loss": 1.1716, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 15.42, |
|
"learning_rate": 0.001383062838119018, |
|
"loss": 1.1707, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 15.45, |
|
"learning_rate": 0.0013820224719101124, |
|
"loss": 1.1708, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 0.001380982105701207, |
|
"loss": 1.1691, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"learning_rate": 0.0013799417394923014, |
|
"loss": 1.1725, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"learning_rate": 0.0013789013732833958, |
|
"loss": 1.1697, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 15.55, |
|
"learning_rate": 0.0013778610070744903, |
|
"loss": 1.1715, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 15.58, |
|
"learning_rate": 0.0013768206408655847, |
|
"loss": 1.1713, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 15.61, |
|
"learning_rate": 0.0013757802746566792, |
|
"loss": 1.1708, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"learning_rate": 0.0013747399084477736, |
|
"loss": 1.1705, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"learning_rate": 0.001373699542238868, |
|
"loss": 1.1712, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 15.68, |
|
"learning_rate": 0.0013726591760299626, |
|
"loss": 1.1712, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"learning_rate": 0.0013716188098210572, |
|
"loss": 1.1715, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 15.74, |
|
"learning_rate": 0.0013705784436121515, |
|
"loss": 1.1709, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 15.76, |
|
"learning_rate": 0.001369538077403246, |
|
"loss": 1.1722, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"learning_rate": 0.0013684977111943404, |
|
"loss": 1.1718, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 15.81, |
|
"learning_rate": 0.0013674573449854348, |
|
"loss": 1.1708, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 15.84, |
|
"learning_rate": 0.0013664169787765293, |
|
"loss": 1.1723, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 15.87, |
|
"learning_rate": 0.001365376612567624, |
|
"loss": 1.1702, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 15.89, |
|
"learning_rate": 0.0013643362463587184, |
|
"loss": 1.1714, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 15.92, |
|
"learning_rate": 0.0013632958801498127, |
|
"loss": 1.1701, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 15.94, |
|
"learning_rate": 0.0013622555139409071, |
|
"loss": 1.1709, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 15.97, |
|
"learning_rate": 0.0013612151477320016, |
|
"loss": 1.1697, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.0013601747815230963, |
|
"loss": 1.1709, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 1.1968414783477783, |
|
"eval_runtime": 0.6109, |
|
"eval_samples_per_second": 1636.861, |
|
"eval_steps_per_second": 3.274, |
|
"step": 307584 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 961200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 1.2378919133169423e+18, |
|
"train_batch_size": 512, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|