DAPO / trainer_state.json
kangdawei's picture
Model save
3939526 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.22857142857142856,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_fraction": 0.0,
"completion_length": 2216.625045776367,
"dapo/avg_reward_std": 0.23920068350331536,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3735632248993578,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 33.86904761904762,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.001142857142857143,
"grad_norm": 0.10874509066343307,
"kl": 0.0,
"learning_rate": 0.0,
"loss": 0.0468,
"reward": 0.6486758906394243,
"reward_std": 0.9342863708734512,
"step": 1
},
{
"clip_fraction": 0.0,
"completion_length": 2926.4757690429688,
"dapo/avg_reward_std": 0.24011585204040303,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3284313836518456,
"dapo/num_sampling_attempts": 4.25,
"dapo/sampling_efficiency": 26.874999999999993,
"dapo/total_prompts_processed": 25.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.002285714285714286,
"grad_norm": 0.12814132869243622,
"kl": 0.0,
"learning_rate": 1e-07,
"loss": 0.0508,
"reward": 0.2922485675662756,
"reward_std": 0.9327598959207535,
"step": 2
},
{
"clip_fraction": 0.0,
"completion_length": 2888.1527709960938,
"dapo/avg_reward_std": 0.2903491040070852,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.36111111839612325,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 36.875,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.0034285714285714284,
"grad_norm": 0.1155443787574768,
"kl": 2.9146671295166016e-05,
"learning_rate": 2e-07,
"loss": 0.0647,
"reward": 0.3509849710389972,
"reward_std": 0.9315856546163559,
"step": 3
},
{
"clip_fraction": 0.0,
"completion_length": 2535.718734741211,
"dapo/avg_reward_std": 0.25628158891642533,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.35802469595714853,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 41.56249999999999,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.004571428571428572,
"grad_norm": 0.14338600635528564,
"kl": 2.1044164896011353e-05,
"learning_rate": 3e-07,
"loss": 0.0536,
"reward": 0.5615630690008402,
"reward_std": 0.9670609682798386,
"step": 4
},
{
"clip_fraction": 0.0,
"completion_length": 2548.916702270508,
"dapo/avg_reward_std": 0.2889887053391029,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.40804598814454573,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 36.875,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.005714285714285714,
"grad_norm": 0.10121661424636841,
"kl": 2.7820467948913574e-05,
"learning_rate": 4e-07,
"loss": 0.0263,
"reward": 0.5986085031181574,
"reward_std": 0.9444186091423035,
"step": 5
},
{
"clip_fraction": 0.0,
"completion_length": 2357.579864501953,
"dapo/avg_reward_std": 0.30308351665735245,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.36309524306229185,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 37.5,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.006857142857142857,
"grad_norm": 0.171969935297966,
"kl": 2.6032328605651855e-05,
"learning_rate": 5e-07,
"loss": 0.0906,
"reward": 0.4527070773765445,
"reward_std": 0.9109365493059158,
"step": 6
},
{
"clip_fraction": 0.0,
"completion_length": 2404.2534790039062,
"dapo/avg_reward_std": 0.3077041815828394,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.41975309506610586,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 37.916666666666664,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.008,
"grad_norm": 0.12406504899263382,
"kl": 1.9066035747528076e-05,
"learning_rate": 6e-07,
"loss": 0.0645,
"reward": 0.5808906648308039,
"reward_std": 0.9664968773722649,
"step": 7
},
{
"clip_fraction": 0.0,
"completion_length": 2833.3056030273438,
"dapo/avg_reward_std": 0.2214778729023472,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.31182796435971416,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 36.577380952380956,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.009142857142857144,
"grad_norm": 0.13480524718761444,
"kl": 3.4965574741363525e-05,
"learning_rate": 7e-07,
"loss": 0.0738,
"reward": 0.5177570842206478,
"reward_std": 0.9147621840238571,
"step": 8
},
{
"clip_fraction": 0.0,
"completion_length": 2965.6736450195312,
"dapo/avg_reward_std": 0.2788830002148946,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3888888966154169,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 46.36904761904761,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.010285714285714285,
"grad_norm": 0.08226096630096436,
"kl": 1.4536082744598389e-05,
"learning_rate": 8e-07,
"loss": 0.0316,
"reward": 0.5644797384738922,
"reward_std": 0.9423079788684845,
"step": 9
},
{
"clip_fraction": 0.0,
"completion_length": 2574.461814880371,
"dapo/avg_reward_std": 0.3602010520065532,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.588235302883036,
"dapo/num_sampling_attempts": 2.125,
"dapo/sampling_efficiency": 61.45833333333333,
"dapo/total_prompts_processed": 12.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.011428571428571429,
"grad_norm": 0.1667146533727646,
"kl": 2.9319897294044495e-05,
"learning_rate": 9e-07,
"loss": 0.0894,
"reward": 0.6415909845381975,
"reward_std": 0.9869548827409744,
"step": 10
},
{
"clip_fraction": 0.0,
"completion_length": 2798.982666015625,
"dapo/avg_reward_std": 0.15393146287117684,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.14880952797830105,
"dapo/num_sampling_attempts": 7.0,
"dapo/sampling_efficiency": 15.882936507936506,
"dapo/total_prompts_processed": 42.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.012571428571428572,
"grad_norm": 0.1166534572839737,
"kl": 2.0567327737808228e-05,
"learning_rate": 1e-06,
"loss": 0.0207,
"reward": 0.2987014357931912,
"reward_std": 0.868266686797142,
"step": 11
},
{
"clip_fraction": 0.0,
"completion_length": 2377.555595397949,
"dapo/avg_reward_std": 0.21645361091941595,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2968750037252903,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 38.125,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.013714285714285714,
"grad_norm": 0.23483960330486298,
"kl": 3.6854296922683716e-05,
"learning_rate": 9.997258721585931e-07,
"loss": 0.0491,
"reward": 0.6348252706229687,
"reward_std": 0.9863902181386948,
"step": 12
},
{
"clip_fraction": 0.0,
"completion_length": 2688.1111755371094,
"dapo/avg_reward_std": 0.34906478971242905,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.49166667461395264,
"dapo/num_sampling_attempts": 2.5,
"dapo/sampling_efficiency": 52.08333333333333,
"dapo/total_prompts_processed": 15.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.014857142857142857,
"grad_norm": 0.09364266693592072,
"kl": 3.152713179588318e-05,
"learning_rate": 9.989038226169207e-07,
"loss": 0.0431,
"reward": 0.5878111608326435,
"reward_std": 0.9752944633364677,
"step": 13
},
{
"clip_fraction": 0.0,
"completion_length": 2029.9132270812988,
"dapo/avg_reward_std": 0.25792322993278505,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.36666666984558105,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 57.5,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.016,
"grad_norm": 0.13894271850585938,
"kl": 4.156678915023804e-05,
"learning_rate": 9.975348529157229e-07,
"loss": 0.0279,
"reward": 0.5834919223561883,
"reward_std": 0.9710095003247261,
"step": 14
},
{
"clip_fraction": 0.0,
"completion_length": 2817.8576583862305,
"dapo/avg_reward_std": 0.3106007158756256,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5333333484828472,
"dapo/num_sampling_attempts": 2.5,
"dapo/sampling_efficiency": 52.08333333333333,
"dapo/total_prompts_processed": 15.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.017142857142857144,
"grad_norm": 0.08778129518032074,
"kl": 3.078579902648926e-05,
"learning_rate": 9.956206309337066e-07,
"loss": 0.0343,
"reward": 0.6716702915728092,
"reward_std": 0.99223193526268,
"step": 15
},
{
"clip_fraction": 0.0,
"completion_length": 2570.2500076293945,
"dapo/avg_reward_std": 0.244095021715531,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.35897436336829114,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 44.49404761904762,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.018285714285714287,
"grad_norm": 0.07460447400808334,
"kl": 0.00025935471057891846,
"learning_rate": 9.931634888554935e-07,
"loss": 0.0146,
"reward": 0.7213943339884281,
"reward_std": 0.9671430364251137,
"step": 16
},
{
"clip_fraction": 0.0,
"completion_length": 2483.413215637207,
"dapo/avg_reward_std": 0.2672279636065165,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.35000000496705375,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 29.166666666666664,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.019428571428571427,
"grad_norm": 0.12397046387195587,
"kl": 0.00022289156913757324,
"learning_rate": 9.901664203302124e-07,
"loss": 0.0624,
"reward": 0.4952134042978287,
"reward_std": 0.9074268043041229,
"step": 17
},
{
"clip_fraction": 0.0,
"completion_length": 2537.8194580078125,
"dapo/avg_reward_std": 0.34170445956681905,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5438596567040995,
"dapo/num_sampling_attempts": 2.375,
"dapo/sampling_efficiency": 48.95833333333333,
"dapo/total_prompts_processed": 14.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.02057142857142857,
"grad_norm": 0.1614188253879547,
"kl": 0.0003694295883178711,
"learning_rate": 9.866330768241983e-07,
"loss": 0.1136,
"reward": 0.6263789646327496,
"reward_std": 0.9367138147354126,
"step": 18
},
{
"clip_fraction": 0.0,
"completion_length": 2041.2916984558105,
"dapo/avg_reward_std": 0.23441629879402393,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.31818182224577124,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 38.36805555555556,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.021714285714285714,
"grad_norm": 0.2115960717201233,
"kl": 0.0005898326635360718,
"learning_rate": 9.825677631722435e-07,
"loss": 0.0603,
"reward": 0.6228582374751568,
"reward_std": 0.9455358982086182,
"step": 19
},
{
"clip_fraction": 0.0,
"completion_length": 2392.7882385253906,
"dapo/avg_reward_std": 0.22908216629709516,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2952381010566439,
"dapo/num_sampling_attempts": 4.375,
"dapo/sampling_efficiency": 33.541666666666664,
"dapo/total_prompts_processed": 26.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.022857142857142857,
"grad_norm": 0.20383711159229279,
"kl": 0.0008958578109741211,
"learning_rate": 9.779754323328192e-07,
"loss": 0.1313,
"reward": 0.41653589624911547,
"reward_std": 0.9027180448174477,
"step": 20
},
{
"clip_fraction": 0.0,
"completion_length": 2966.260452270508,
"dapo/avg_reward_std": 0.16204138861762152,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.25555555986033546,
"dapo/num_sampling_attempts": 5.625,
"dapo/sampling_efficiency": 22.84722222222222,
"dapo/total_prompts_processed": 33.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.024,
"grad_norm": 0.1098903939127922,
"kl": 0.0002017766237258911,
"learning_rate": 9.728616793536587e-07,
"loss": 0.0825,
"reward": 0.43902475386857986,
"reward_std": 0.9111825451254845,
"step": 21
},
{
"clip_fraction": 0.0,
"completion_length": 3016.357696533203,
"dapo/avg_reward_std": 0.28799043401427893,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.42028986371081806,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 52.20238095238095,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.025142857142857144,
"grad_norm": 0.1315963715314865,
"kl": 0.0005468130111694336,
"learning_rate": 9.672327345550543e-07,
"loss": 0.0657,
"reward": 0.5281127206981182,
"reward_std": 0.9846171587705612,
"step": 22
},
{
"clip_fraction": 0.0,
"completion_length": 2408.8333282470703,
"dapo/avg_reward_std": 0.24506365811383282,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3703703780968984,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 40.74404761904761,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.026285714285714287,
"grad_norm": 0.12457310408353806,
"kl": 0.001109391450881958,
"learning_rate": 9.610954559391704e-07,
"loss": 0.0304,
"reward": 0.6419337540864944,
"reward_std": 0.9689808040857315,
"step": 23
},
{
"clip_fraction": 0.0,
"completion_length": 2891.7777709960938,
"dapo/avg_reward_std": 0.2580765459848487,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4420289954413538,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 51.785714285714285,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.027428571428571427,
"grad_norm": 0.09673310071229935,
"kl": 0.0006018728017807007,
"learning_rate": 9.54457320834625e-07,
"loss": 0.0143,
"reward": 0.4589955974370241,
"reward_std": 0.9405186697840691,
"step": 24
},
{
"clip_fraction": 0.0,
"completion_length": 2994.8159790039062,
"dapo/avg_reward_std": 0.24148962597052256,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3777777850627899,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 34.99999999999999,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.02857142857142857,
"grad_norm": 0.12189235538244247,
"kl": 0.0021944642066955566,
"learning_rate": 9.473264167865171e-07,
"loss": 0.0869,
"reward": 0.4214114509522915,
"reward_std": 0.918621838092804,
"step": 25
},
{
"clip_fraction": 0.0,
"completion_length": 3106.2743530273438,
"dapo/avg_reward_std": 0.21211836412549018,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.23750000558793544,
"dapo/num_sampling_attempts": 5.0,
"dapo/sampling_efficiency": 25.729166666666664,
"dapo/total_prompts_processed": 30.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.029714285714285714,
"grad_norm": 0.11006143689155579,
"kl": 0.002092994749546051,
"learning_rate": 9.397114317029974e-07,
"loss": 0.0617,
"reward": 0.4296974149765447,
"reward_std": 0.9136241301894188,
"step": 26
},
{
"clip_fraction": 0.0,
"completion_length": 2601.388946533203,
"dapo/avg_reward_std": 0.24121128850513035,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.28240741416811943,
"dapo/num_sampling_attempts": 4.5,
"dapo/sampling_efficiency": 25.76388888888889,
"dapo/total_prompts_processed": 27.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.030857142857142857,
"grad_norm": 0.11345893889665604,
"kl": 0.003206908702850342,
"learning_rate": 9.316216432703916e-07,
"loss": 0.0926,
"reward": 0.5876726619899273,
"reward_std": 0.9382903277873993,
"step": 27
},
{
"clip_fraction": 0.0,
"completion_length": 2861.6180839538574,
"dapo/avg_reward_std": 0.23961352888080809,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3055555605226093,
"dapo/num_sampling_attempts": 4.5,
"dapo/sampling_efficiency": 27.94642857142857,
"dapo/total_prompts_processed": 27.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.032,
"grad_norm": 0.1445908397436142,
"kl": 0.0031346678733825684,
"learning_rate": 9.230669076497687e-07,
"loss": 0.0852,
"reward": 0.40619770623743534,
"reward_std": 0.9506878778338432,
"step": 28
},
{
"clip_fraction": 0.0,
"completion_length": 2729.1875,
"dapo/avg_reward_std": 0.24243796567122142,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.35555555919806164,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 39.93055555555555,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.03314285714285714,
"grad_norm": 0.11093314737081528,
"kl": 0.0027089565992355347,
"learning_rate": 9.140576474687263e-07,
"loss": 0.0604,
"reward": 0.6693072468042374,
"reward_std": 0.9926005378365517,
"step": 29
},
{
"clip_fraction": 0.0,
"completion_length": 3155.7083740234375,
"dapo/avg_reward_std": 0.222336781601752,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.311827961956301,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 42.93154761904761,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.03428571428571429,
"grad_norm": 0.08208812773227692,
"kl": 0.001552581787109375,
"learning_rate": 9.046048391230247e-07,
"loss": 0.0268,
"reward": 0.521108225453645,
"reward_std": 0.9469912871718407,
"step": 30
},
{
"clip_fraction": 0.0,
"completion_length": 2657.559036254883,
"dapo/avg_reward_std": 0.1865689324008094,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.259259263260497,
"dapo/num_sampling_attempts": 4.5,
"dapo/sampling_efficiency": 51.076388888888886,
"dapo/total_prompts_processed": 27.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.03542857142857143,
"grad_norm": 0.1316680908203125,
"kl": 0.009428024291992188,
"learning_rate": 8.9471999940354e-07,
"loss": 0.0745,
"reward": 0.6315789166837931,
"reward_std": 0.9327967762947083,
"step": 31
},
{
"clip_fraction": 0.0,
"completion_length": 3071.7535095214844,
"dapo/avg_reward_std": 0.3048748767375946,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.40000000298023225,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 55.104166666666664,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.036571428571428574,
"grad_norm": 0.10442829132080078,
"kl": 0.0021753311157226562,
"learning_rate": 8.844151714648274e-07,
"loss": 0.0567,
"reward": 0.5447857324033976,
"reward_std": 0.921301856637001,
"step": 32
},
{
"clip_fraction": 0.0,
"completion_length": 3025.826416015625,
"dapo/avg_reward_std": 0.23097028769552708,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3385416748933494,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 38.95833333333333,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.037714285714285714,
"grad_norm": 0.09167502820491791,
"kl": 0.003194093704223633,
"learning_rate": 8.737029101523929e-07,
"loss": 0.0612,
"reward": 0.5547973131760955,
"reward_std": 0.9730775579810143,
"step": 33
},
{
"clip_fraction": 0.0,
"completion_length": 2558.7812423706055,
"dapo/avg_reward_std": 0.2557758816650936,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3452381007373333,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 42.113095238095234,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.038857142857142854,
"grad_norm": 0.11055821925401688,
"kl": 0.019285082817077637,
"learning_rate": 8.625962667065487e-07,
"loss": 0.0831,
"reward": 0.5826370492577553,
"reward_std": 0.9168377369642258,
"step": 34
},
{
"clip_fraction": 0.0,
"completion_length": 2909.2361602783203,
"dapo/avg_reward_std": 0.22593376713414345,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.30645161626800416,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 49.598214285714285,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.04,
"grad_norm": 0.09941194951534271,
"kl": 0.004673004150390625,
"learning_rate": 8.511087728614862e-07,
"loss": 0.0581,
"reward": 0.5392080545425415,
"reward_std": 0.9793680757284164,
"step": 35
},
{
"clip_fraction": 0.0,
"completion_length": 2629.3333435058594,
"dapo/avg_reward_std": 0.2632370889186859,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.338541675824672,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 29.513888888888886,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.04114285714285714,
"grad_norm": 0.17353901267051697,
"kl": 0.010207176208496094,
"learning_rate": 8.392544243589427e-07,
"loss": 0.0623,
"reward": 0.5811682712519541,
"reward_std": 0.9331383407115936,
"step": 36
},
{
"clip_fraction": 0.0,
"completion_length": 3220.9409790039062,
"dapo/avg_reward_std": 0.2187359256403787,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.29047619913305556,
"dapo/num_sampling_attempts": 4.375,
"dapo/sampling_efficiency": 36.25,
"dapo/total_prompts_processed": 26.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.04228571428571429,
"grad_norm": 0.10708803683519363,
"kl": 0.0023801326751708984,
"learning_rate": 8.270476638965461e-07,
"loss": 0.0657,
"reward": 0.48440539091825485,
"reward_std": 0.9014616012573242,
"step": 37
},
{
"clip_fraction": 0.0,
"completion_length": 3233.420135498047,
"dapo/avg_reward_std": 0.2624325007200241,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.30208333721384406,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 30.119047619047617,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.04342857142857143,
"grad_norm": 0.0923333689570427,
"kl": 0.0031156539916992188,
"learning_rate": 8.145033635316128e-07,
"loss": 0.053,
"reward": 0.45120809972286224,
"reward_std": 0.9732232913374901,
"step": 38
},
{
"clip_fraction": 0.0,
"completion_length": 2787.031280517578,
"dapo/avg_reward_std": 0.1930955442644301,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.23412698933056422,
"dapo/num_sampling_attempts": 5.25,
"dapo/sampling_efficiency": 23.244047619047617,
"dapo/total_prompts_processed": 31.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.044571428571428574,
"grad_norm": 0.12707453966140747,
"kl": 0.006325244903564453,
"learning_rate": 8.01636806561836e-07,
"loss": 0.0905,
"reward": 0.5048832832835615,
"reward_std": 0.9330806732177734,
"step": 39
},
{
"clip_fraction": 0.0,
"completion_length": 2921.6180572509766,
"dapo/avg_reward_std": 0.25906160804960465,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3009259340663751,
"dapo/num_sampling_attempts": 4.5,
"dapo/sampling_efficiency": 31.562499999999996,
"dapo/total_prompts_processed": 27.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.045714285714285714,
"grad_norm": 0.1152920126914978,
"kl": 0.004504203796386719,
"learning_rate": 7.884636689049422e-07,
"loss": 0.0443,
"reward": 0.3671413380652666,
"reward_std": 0.9126428663730621,
"step": 40
},
{
"clip_fraction": 0.0,
"completion_length": 3100.8194732666016,
"dapo/avg_reward_std": 0.26266304695087933,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3985507280930229,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 59.895833333333336,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.046857142857142854,
"grad_norm": 0.1462322324514389,
"kl": 0.0058536529541015625,
"learning_rate": 7.75e-07,
"loss": 0.0836,
"reward": 0.6537042334675789,
"reward_std": 0.9643120691180229,
"step": 41
},
{
"clip_fraction": 0.0,
"completion_length": 3083.8541870117188,
"dapo/avg_reward_std": 0.2028282030540354,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.27941177215646296,
"dapo/num_sampling_attempts": 4.25,
"dapo/sampling_efficiency": 34.61309523809524,
"dapo/total_prompts_processed": 25.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.048,
"grad_norm": 0.11620575189590454,
"kl": 0.005963563919067383,
"learning_rate": 7.612622032536507e-07,
"loss": 0.0756,
"reward": 0.6132493373006582,
"reward_std": 0.9271278157830238,
"step": 42
},
{
"clip_fraction": 0.0,
"completion_length": 2860.6840209960938,
"dapo/avg_reward_std": 0.2537354379892349,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.31666667262713116,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 27.916666666666664,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.04914285714285714,
"grad_norm": 0.15706917643547058,
"kl": 0.012288570404052734,
"learning_rate": 7.472670160550848e-07,
"loss": 0.0864,
"reward": 0.4896182883530855,
"reward_std": 0.9406783953309059,
"step": 43
},
{
"clip_fraction": 0.0,
"completion_length": 3230.951416015625,
"dapo/avg_reward_std": 0.2785276919603348,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4047619104385376,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 35.20833333333333,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.05028571428571429,
"grad_norm": 0.10281670838594437,
"kl": 0.0028905868530273438,
"learning_rate": 7.330314893841101e-07,
"loss": 0.0474,
"reward": 0.5266857808455825,
"reward_std": 0.9769049882888794,
"step": 44
},
{
"clip_fraction": 0.0,
"completion_length": 2598.888885498047,
"dapo/avg_reward_std": 0.25520460651471066,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3205128231873879,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 54.61309523809524,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.05142857142857143,
"grad_norm": 0.20818237960338593,
"kl": 0.0046825408935546875,
"learning_rate": 7.185729670371604e-07,
"loss": 0.111,
"reward": 0.8208948634564877,
"reward_std": 0.9365335553884506,
"step": 45
},
{
"clip_fraction": 0.0,
"completion_length": 2529.66316986084,
"dapo/avg_reward_std": 0.23859836988978916,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2916666753590107,
"dapo/num_sampling_attempts": 4.5,
"dapo/sampling_efficiency": 25.535714285714285,
"dapo/total_prompts_processed": 27.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.052571428571428575,
"grad_norm": 0.12924660742282867,
"kl": 0.05440711975097656,
"learning_rate": 7.039090644965509e-07,
"loss": 0.058,
"reward": 0.5307688321918249,
"reward_std": 0.9391194358468056,
"step": 46
},
{
"clip_fraction": 0.0,
"completion_length": 2737.288230895996,
"dapo/avg_reward_std": 0.25754969901052016,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3678160998327979,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 41.14583333333333,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.053714285714285714,
"grad_norm": 0.1452113687992096,
"kl": 0.01877737045288086,
"learning_rate": 6.890576474687263e-07,
"loss": 0.0601,
"reward": 0.5596560873091221,
"reward_std": 0.9911476969718933,
"step": 47
},
{
"clip_fraction": 0.0,
"completion_length": 2543.0694885253906,
"dapo/avg_reward_std": 0.2434165603839434,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3717948794364929,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 34.37499999999999,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.054857142857142854,
"grad_norm": 0.15664616227149963,
"kl": 0.008816719055175781,
"learning_rate": 6.740368101176495e-07,
"loss": 0.0783,
"reward": 0.7667456082999706,
"reward_std": 0.9330208897590637,
"step": 48
},
{
"clip_fraction": 0.0,
"completion_length": 3054.357666015625,
"dapo/avg_reward_std": 0.16933719928448016,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.22222222693455526,
"dapo/num_sampling_attempts": 4.875,
"dapo/sampling_efficiency": 26.5625,
"dapo/total_prompts_processed": 29.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.056,
"grad_norm": 0.13884593546390533,
"kl": 0.00569915771484375,
"learning_rate": 6.588648530198504e-07,
"loss": 0.0645,
"reward": 0.7750914767384529,
"reward_std": 0.9781928732991219,
"step": 49
},
{
"clip_fraction": 0.0,
"completion_length": 3030.9652709960938,
"dapo/avg_reward_std": 0.2089548914721518,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.28282828629016876,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 33.779761904761905,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.05714285714285714,
"grad_norm": 0.13095000386238098,
"kl": 0.005908966064453125,
"learning_rate": 6.435602608679916e-07,
"loss": 0.0854,
"reward": 0.7626989148557186,
"reward_std": 0.9684056863188744,
"step": 50
},
{
"clip_fraction": 0.0,
"completion_length": 3176.8819274902344,
"dapo/avg_reward_std": 0.2258962235516972,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.29629630057348144,
"dapo/num_sampling_attempts": 4.5,
"dapo/sampling_efficiency": 33.25892857142857,
"dapo/total_prompts_processed": 27.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.05828571428571429,
"grad_norm": 0.11041354387998581,
"kl": 0.002262115478515625,
"learning_rate": 6.281416799501187e-07,
"loss": 0.0892,
"reward": 0.6493857521563768,
"reward_std": 0.9608959034085274,
"step": 51
},
{
"clip_fraction": 0.0,
"completion_length": 2991.208366394043,
"dapo/avg_reward_std": 0.23346692004374095,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3392857201397419,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 52.70833333333333,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.05942857142857143,
"grad_norm": 0.13827170431613922,
"kl": 0.014558792114257812,
"learning_rate": 6.126278954320294e-07,
"loss": 0.0435,
"reward": 0.5274152141064405,
"reward_std": 0.9937505125999451,
"step": 52
},
{
"clip_fraction": 0.0,
"completion_length": 2921.013946533203,
"dapo/avg_reward_std": 0.2715419438378564,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3390804626818361,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 51.5625,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.060571428571428575,
"grad_norm": 0.09735170006752014,
"kl": 0.009172439575195312,
"learning_rate": 5.97037808470444e-07,
"loss": 0.0541,
"reward": 0.7217882052063942,
"reward_std": 0.9594404622912407,
"step": 53
},
{
"clip_fraction": 0.0,
"completion_length": 3133.46875,
"dapo/avg_reward_std": 0.2624934350068753,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.35256411077884525,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 41.041666666666664,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.061714285714285715,
"grad_norm": 0.10414379835128784,
"kl": 0.010915756225585938,
"learning_rate": 5.813904131848564e-07,
"loss": 0.061,
"reward": 0.5302782151848078,
"reward_std": 0.9707583636045456,
"step": 54
},
{
"clip_fraction": 0.0,
"completion_length": 3010.5938110351562,
"dapo/avg_reward_std": 0.21664191484451295,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.24444444941149818,
"dapo/num_sampling_attempts": 5.625,
"dapo/sampling_efficiency": 19.791666666666664,
"dapo/total_prompts_processed": 33.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.06285714285714286,
"grad_norm": 0.11232081800699234,
"kl": 0.012262344360351562,
"learning_rate": 5.657047735161255e-07,
"loss": 0.0561,
"reward": 0.5284321270883083,
"reward_std": 0.9165859594941139,
"step": 55
},
{
"clip_fraction": 0.0,
"completion_length": 3144.951416015625,
"dapo/avg_reward_std": 0.2279102834207671,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.34523809807641165,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 45.32738095238095,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.064,
"grad_norm": 0.13161872327327728,
"kl": 0.007735252380371094,
"learning_rate": 5.5e-07,
"loss": 0.0717,
"reward": 0.6519734226167202,
"reward_std": 0.9642440155148506,
"step": 56
},
{
"clip_fraction": 0.0,
"completion_length": 3222.6111450195312,
"dapo/avg_reward_std": 0.2675224413042483,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4492753724689069,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 45.535714285714285,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.06514285714285714,
"grad_norm": 0.09332293272018433,
"kl": 0.0064525604248046875,
"learning_rate": 5.342952264838747e-07,
"loss": 0.0302,
"reward": 0.5501165799796581,
"reward_std": 0.9585564360022545,
"step": 57
},
{
"clip_fraction": 0.0,
"completion_length": 2679.9236907958984,
"dapo/avg_reward_std": 0.17708626160254845,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.24358974741055414,
"dapo/num_sampling_attempts": 4.875,
"dapo/sampling_efficiency": 28.91865079365079,
"dapo/total_prompts_processed": 29.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.06628571428571428,
"grad_norm": 0.16309793293476105,
"kl": 0.01690673828125,
"learning_rate": 5.186095868151436e-07,
"loss": 0.0846,
"reward": 0.8469000309705734,
"reward_std": 0.9497043192386627,
"step": 58
},
{
"clip_fraction": 0.0,
"completion_length": 2847.048629760742,
"dapo/avg_reward_std": 0.2622834824282548,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3563218476443455,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 29.999999999999993,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.06742857142857143,
"grad_norm": 0.09638360142707825,
"kl": 0.0057086944580078125,
"learning_rate": 5.02962191529556e-07,
"loss": 0.0634,
"reward": 0.6089529246091843,
"reward_std": 0.9450863003730774,
"step": 59
},
{
"clip_fraction": 0.0,
"completion_length": 3119.9132385253906,
"dapo/avg_reward_std": 0.19833819533503333,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2674418656631958,
"dapo/num_sampling_attempts": 5.375,
"dapo/sampling_efficiency": 29.563492063492063,
"dapo/total_prompts_processed": 32.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.06857142857142857,
"grad_norm": 0.1252850890159607,
"kl": 0.008715629577636719,
"learning_rate": 4.873721045679706e-07,
"loss": 0.0666,
"reward": 0.5249154977500439,
"reward_std": 0.947566568851471,
"step": 60
},
{
"clip_fraction": 0.0,
"completion_length": 2844.795181274414,
"dapo/avg_reward_std": 0.2648707001373686,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.35632184610284606,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 39.791666666666664,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.06971428571428571,
"grad_norm": 0.10366301238536835,
"kl": 0.056069374084472656,
"learning_rate": 4.7185832004988133e-07,
"loss": 0.037,
"reward": 0.5161248315125704,
"reward_std": 0.9692364558577538,
"step": 61
},
{
"clip_fraction": 0.0,
"completion_length": 3053.951446533203,
"dapo/avg_reward_std": 0.21576767837679064,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.25225225574261434,
"dapo/num_sampling_attempts": 4.625,
"dapo/sampling_efficiency": 37.013888888888886,
"dapo/total_prompts_processed": 27.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.07085714285714285,
"grad_norm": 0.14441759884357452,
"kl": 0.009164810180664062,
"learning_rate": 4.5643973913200837e-07,
"loss": 0.0609,
"reward": 0.6510533541440964,
"reward_std": 0.9361515268683434,
"step": 62
},
{
"clip_fraction": 0.0,
"completion_length": 3326.781280517578,
"dapo/avg_reward_std": 0.2158982500885472,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3181818254066236,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 44.49404761904761,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.072,
"grad_norm": 0.12127737700939178,
"kl": 0.031108856201171875,
"learning_rate": 4.4113514698014953e-07,
"loss": 0.0463,
"reward": 0.45860649459064007,
"reward_std": 0.9209225550293922,
"step": 63
},
{
"clip_fraction": 0.0,
"completion_length": 3208.6319885253906,
"dapo/avg_reward_std": 0.28419332668699065,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3563218440475135,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 53.591269841269835,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.07314285714285715,
"grad_norm": 0.13326792418956757,
"kl": 0.0061321258544921875,
"learning_rate": 4.2596318988235037e-07,
"loss": 0.0614,
"reward": 0.5644803196191788,
"reward_std": 0.9919605851173401,
"step": 64
},
{
"clip_fraction": 0.0,
"completion_length": 2597.437530517578,
"dapo/avg_reward_std": 0.2766759342380932,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3928571529686451,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 32.08333333333333,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.07428571428571429,
"grad_norm": 0.10434358566999435,
"kl": 0.049472808837890625,
"learning_rate": 4.1094235253127374e-07,
"loss": 0.0312,
"reward": 0.393868962302804,
"reward_std": 0.9459580257534981,
"step": 65
},
{
"clip_fraction": 0.0,
"completion_length": 2630.0833587646484,
"dapo/avg_reward_std": 0.25837596147148695,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.35802469595714853,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 31.666666666666664,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.07542857142857143,
"grad_norm": 0.11327924579381943,
"kl": 0.23560714721679688,
"learning_rate": 3.9609093550344907e-07,
"loss": 0.0563,
"reward": 0.674448698759079,
"reward_std": 0.9591537117958069,
"step": 66
},
{
"clip_fraction": 0.0,
"completion_length": 3343.3159790039062,
"dapo/avg_reward_std": 0.2785816714167595,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.41666667101283866,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 42.08333333333333,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.07657142857142857,
"grad_norm": 0.10341926664113998,
"kl": 0.005463600158691406,
"learning_rate": 3.8142703296283953e-07,
"loss": 0.0653,
"reward": 0.42072685062885284,
"reward_std": 0.9649706333875656,
"step": 67
},
{
"clip_fraction": 0.0,
"completion_length": 2880.0590438842773,
"dapo/avg_reward_std": 0.2447407204243872,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2916666728754838,
"dapo/num_sampling_attempts": 4.5,
"dapo/sampling_efficiency": 28.591269841269842,
"dapo/total_prompts_processed": 27.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.07771428571428571,
"grad_norm": 0.15764088928699493,
"kl": 0.011991500854492188,
"learning_rate": 3.6696851061588994e-07,
"loss": 0.1004,
"reward": 0.537701515480876,
"reward_std": 0.9107673466205597,
"step": 68
},
{
"clip_fraction": 0.0,
"completion_length": 2839.0069580078125,
"dapo/avg_reward_std": 0.21828406437849388,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.26495726865071517,
"dapo/num_sampling_attempts": 4.875,
"dapo/sampling_efficiency": 32.39583333333333,
"dapo/total_prompts_processed": 29.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.07885714285714286,
"grad_norm": 0.1426348239183426,
"kl": 0.16588592529296875,
"learning_rate": 3.5273298394491515e-07,
"loss": 0.065,
"reward": 0.5752462260425091,
"reward_std": 0.9265653118491173,
"step": 69
},
{
"clip_fraction": 0.0,
"completion_length": 3154.9479370117188,
"dapo/avg_reward_std": 0.24686445650600253,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.40476191185769583,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 58.75,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08,
"grad_norm": 0.09042708575725555,
"kl": 0.015224456787109375,
"learning_rate": 3.387377967463493e-07,
"loss": 0.0278,
"reward": 0.5091124139726162,
"reward_std": 0.9951601624488831,
"step": 70
},
{
"clip_fraction": 0.0,
"completion_length": 2558.7118377685547,
"dapo/avg_reward_std": 0.24922772922686168,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.35119048452803064,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 41.979166666666664,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08114285714285714,
"grad_norm": 0.18424691259860992,
"kl": 0.012338638305664062,
"learning_rate": 3.250000000000001e-07,
"loss": 0.135,
"reward": 0.80832345969975,
"reward_std": 0.9256910160183907,
"step": 71
},
{
"clip_fraction": 0.0,
"completion_length": 2797.5659790039062,
"dapo/avg_reward_std": 0.3421325541677929,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4603174655210404,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 52.916666666666664,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08228571428571428,
"grad_norm": 0.10505988448858261,
"kl": 0.027385711669921875,
"learning_rate": 3.115363310950578e-07,
"loss": 0.0435,
"reward": 0.5198174491524696,
"reward_std": 0.932801865041256,
"step": 72
},
{
"clip_fraction": 0.0,
"completion_length": 3024.5243225097656,
"dapo/avg_reward_std": 0.26287247288611626,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.33333334038334506,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 40.0297619047619,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08342857142857144,
"grad_norm": 0.09084703773260117,
"kl": 0.09223747253417969,
"learning_rate": 2.9836319343816397e-07,
"loss": 0.0314,
"reward": 0.3449883237481117,
"reward_std": 0.9521737843751907,
"step": 73
},
{
"clip_fraction": 0.0,
"completion_length": 2648.7257080078125,
"dapo/avg_reward_std": 0.2678213362340574,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.38271605582148943,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 40.0,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08457142857142858,
"grad_norm": 0.15155129134655,
"kl": 1.0743579864501953,
"learning_rate": 2.854966364683872e-07,
"loss": 0.0851,
"reward": 0.7227161657065153,
"reward_std": 0.9239719212055206,
"step": 74
},
{
"clip_fraction": 0.0,
"completion_length": 2659.388900756836,
"dapo/avg_reward_std": 0.28101804742106684,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.37037037699310865,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 34.791666666666664,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08571428571428572,
"grad_norm": 0.1127755343914032,
"kl": 0.02587890625,
"learning_rate": 2.729523361034538e-07,
"loss": 0.0523,
"reward": 0.7372388476505876,
"reward_std": 0.918749064207077,
"step": 75
},
{
"clip_fraction": 0.0,
"completion_length": 2402.364585876465,
"dapo/avg_reward_std": 0.26893362632164586,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.36538462111583125,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 48.854166666666664,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08685714285714285,
"grad_norm": 0.14693324267864227,
"kl": 0.12501144409179688,
"learning_rate": 2.6074557564105724e-07,
"loss": 0.0747,
"reward": 0.6182113699615002,
"reward_std": 0.9421844929456711,
"step": 76
},
{
"clip_fraction": 0.0,
"completion_length": 2970.1146392822266,
"dapo/avg_reward_std": 0.2118390180170536,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.25000000521540644,
"dapo/num_sampling_attempts": 5.0,
"dapo/sampling_efficiency": 30.53571428571428,
"dapo/total_prompts_processed": 30.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.088,
"grad_norm": 0.12072475999593735,
"kl": 0.05495643615722656,
"learning_rate": 2.488912271385139e-07,
"loss": 0.0498,
"reward": 0.46035338938236237,
"reward_std": 0.9146044701337814,
"step": 77
},
{
"clip_fraction": 0.0,
"completion_length": 2959.0972442626953,
"dapo/avg_reward_std": 0.13832776496807733,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.1631944477558136,
"dapo/num_sampling_attempts": 6.0,
"dapo/sampling_efficiency": 30.868055555555557,
"dapo/total_prompts_processed": 36.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08914285714285715,
"grad_norm": 0.14289411902427673,
"kl": 0.23297691345214844,
"learning_rate": 2.374037332934512e-07,
"loss": 0.0742,
"reward": 0.49553669430315495,
"reward_std": 0.9023259580135345,
"step": 78
},
{
"clip_fraction": 0.0,
"completion_length": 2935.8159942626953,
"dapo/avg_reward_std": 0.2931290553374724,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.44696970080787485,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 58.854166666666664,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09028571428571429,
"grad_norm": 0.13638050854206085,
"kl": 0.03482818603515625,
"learning_rate": 2.2629708984760706e-07,
"loss": 0.0609,
"reward": 0.4563083341345191,
"reward_std": 0.9425384849309921,
"step": 79
},
{
"clip_fraction": 0.0,
"completion_length": 3111.340301513672,
"dapo/avg_reward_std": 0.22562272967518987,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3063063154349456,
"dapo/num_sampling_attempts": 4.625,
"dapo/sampling_efficiency": 28.819444444444446,
"dapo/total_prompts_processed": 27.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09142857142857143,
"grad_norm": 0.10739335417747498,
"kl": 0.008031845092773438,
"learning_rate": 2.1558482853517253e-07,
"loss": 0.0574,
"reward": 0.6980459969490767,
"reward_std": 0.9673654958605766,
"step": 80
},
{
"clip_fraction": 0.0,
"completion_length": 2921.6111450195312,
"dapo/avg_reward_std": 0.2788313144239886,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3333333386429425,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 34.27083333333333,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09257142857142857,
"grad_norm": 0.18038466572761536,
"kl": 0.016963958740234375,
"learning_rate": 2.0528000059645995e-07,
"loss": 0.0958,
"reward": 0.6405055914074183,
"reward_std": 0.9560460075736046,
"step": 81
},
{
"clip_fraction": 0.0,
"completion_length": 3220.687530517578,
"dapo/avg_reward_std": 0.1744266465688363,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2307692349721224,
"dapo/num_sampling_attempts": 4.875,
"dapo/sampling_efficiency": 26.666666666666664,
"dapo/total_prompts_processed": 29.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09371428571428571,
"grad_norm": 0.12377161532640457,
"kl": 0.009552001953125,
"learning_rate": 1.9539516087697517e-07,
"loss": 0.061,
"reward": 0.5073397234082222,
"reward_std": 0.9641925543546677,
"step": 82
},
{
"clip_fraction": 0.0,
"completion_length": 2663.1597442626953,
"dapo/avg_reward_std": 0.2496542421079451,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3333333413447103,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 40.451388888888886,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09485714285714286,
"grad_norm": 0.1273493468761444,
"kl": 0.04001617431640625,
"learning_rate": 1.8594235253127372e-07,
"loss": 0.0521,
"reward": 0.49824655149132013,
"reward_std": 0.9464590474963188,
"step": 83
},
{
"clip_fraction": 0.0,
"completion_length": 3073.2986450195312,
"dapo/avg_reward_std": 0.27911247177557513,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4318181892687624,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 60.3125,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.096,
"grad_norm": 0.14399568736553192,
"kl": 0.010408401489257812,
"learning_rate": 1.7693309235023127e-07,
"loss": 0.0657,
"reward": 0.624765045940876,
"reward_std": 0.954634428024292,
"step": 84
},
{
"clip_fraction": 0.0,
"completion_length": 3073.7535095214844,
"dapo/avg_reward_std": 0.17655213298024358,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.24324324888152046,
"dapo/num_sampling_attempts": 4.625,
"dapo/sampling_efficiency": 29.82142857142857,
"dapo/total_prompts_processed": 27.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09714285714285714,
"grad_norm": 0.12462300807237625,
"kl": 0.007053375244140625,
"learning_rate": 1.6837835672960831e-07,
"loss": 0.062,
"reward": 0.6820014184340835,
"reward_std": 0.8695997595787048,
"step": 85
},
{
"clip_fraction": 0.0,
"completion_length": 2741.204849243164,
"dapo/avg_reward_std": 0.21997538357973098,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2666666731238365,
"dapo/num_sampling_attempts": 5.0,
"dapo/sampling_efficiency": 27.896825396825395,
"dapo/total_prompts_processed": 30.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09828571428571428,
"grad_norm": 0.14978615939617157,
"kl": 0.025630950927734375,
"learning_rate": 1.6028856829700258e-07,
"loss": 0.0585,
"reward": 0.5304304007440805,
"reward_std": 0.9523463025689125,
"step": 86
},
{
"clip_fraction": 0.0,
"completion_length": 3223.7257080078125,
"dapo/avg_reward_std": 0.27104776600996655,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3666666716337204,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 43.333333333333336,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09942857142857142,
"grad_norm": 0.1086694523692131,
"kl": 0.009660720825195312,
"learning_rate": 1.5267358321348285e-07,
"loss": 0.058,
"reward": 0.5936380252242088,
"reward_std": 0.919317290186882,
"step": 87
},
{
"clip_fraction": 0.0,
"completion_length": 2934.5833740234375,
"dapo/avg_reward_std": 0.23462909049001232,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3333333376152762,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 52.84722222222222,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10057142857142858,
"grad_norm": 0.14571106433868408,
"kl": 0.02588653564453125,
"learning_rate": 1.4554267916537495e-07,
"loss": 0.0741,
"reward": 0.5716092269867659,
"reward_std": 0.9475584626197815,
"step": 88
},
{
"clip_fraction": 0.0,
"completion_length": 3017.2673950195312,
"dapo/avg_reward_std": 0.22858241697152457,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3333333383003871,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 45.416666666666664,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10171428571428572,
"grad_norm": 0.10647116601467133,
"kl": 0.034389495849609375,
"learning_rate": 1.3890454406082956e-07,
"loss": 0.0586,
"reward": 0.5356123449746519,
"reward_std": 0.9426311627030373,
"step": 89
},
{
"clip_fraction": 0.0,
"completion_length": 2540.9548950195312,
"dapo/avg_reward_std": 0.16863613526026408,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.22592593100335862,
"dapo/num_sampling_attempts": 5.625,
"dapo/sampling_efficiency": 28.75,
"dapo/total_prompts_processed": 33.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10285714285714286,
"grad_norm": 0.1207195371389389,
"kl": 0.7361793518066406,
"learning_rate": 1.3276726544494571e-07,
"loss": 0.0349,
"reward": 0.750616230070591,
"reward_std": 1.0088519006967545,
"step": 90
},
{
"clip_fraction": 0.0,
"completion_length": 3054.5833435058594,
"dapo/avg_reward_std": 0.2058313423767686,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.28645834047347307,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 41.36904761904762,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.104,
"grad_norm": 0.10659411549568176,
"kl": 0.009166717529296875,
"learning_rate": 1.2713832064634125e-07,
"loss": 0.06,
"reward": 0.49192704539746046,
"reward_std": 0.8957021087408066,
"step": 91
},
{
"clip_fraction": 0.0,
"completion_length": 2958.43408203125,
"dapo/avg_reward_std": 0.317311546076899,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.47101450160793634,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 44.166666666666664,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10514285714285715,
"grad_norm": 0.1002211645245552,
"kl": 0.00801849365234375,
"learning_rate": 1.220245676671809e-07,
"loss": 0.0508,
"reward": 0.7598672257736325,
"reward_std": 0.9218961223959923,
"step": 92
},
{
"clip_fraction": 0.0,
"completion_length": 3257.7881774902344,
"dapo/avg_reward_std": 0.2586492033941405,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.36309524678758215,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 41.5625,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10628571428571429,
"grad_norm": 0.12036111950874329,
"kl": 0.01373291015625,
"learning_rate": 1.1743223682775649e-07,
"loss": 0.0459,
"reward": 0.5575436241924763,
"reward_std": 0.9431066736578941,
"step": 93
},
{
"clip_fraction": 0.0,
"completion_length": 2740.1284942626953,
"dapo/avg_reward_std": 0.2375115204241968,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.35483871688765867,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 29.999999999999996,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10742857142857143,
"grad_norm": 0.14863841235637665,
"kl": 0.032642364501953125,
"learning_rate": 1.1336692317580158e-07,
"loss": 0.0742,
"reward": 0.5738632343709469,
"reward_std": 0.9468542039394379,
"step": 94
},
{
"clip_fraction": 0.0,
"completion_length": 2899.937515258789,
"dapo/avg_reward_std": 0.2901096656208947,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4206349246558689,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 64.58333333333333,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10857142857142857,
"grad_norm": 0.13841120898723602,
"kl": 0.012683868408203125,
"learning_rate": 1.0983357966978745e-07,
"loss": 0.0653,
"reward": 0.6555321607738733,
"reward_std": 0.9674765914678574,
"step": 95
},
{
"clip_fraction": 0.0,
"completion_length": 2926.1910247802734,
"dapo/avg_reward_std": 0.18252932499436772,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2696078485425781,
"dapo/num_sampling_attempts": 4.25,
"dapo/sampling_efficiency": 37.82738095238095,
"dapo/total_prompts_processed": 25.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10971428571428571,
"grad_norm": 0.13530230522155762,
"kl": 0.05282402038574219,
"learning_rate": 1.068365111445064e-07,
"loss": 0.0762,
"reward": 0.5449853939935565,
"reward_std": 0.952080488204956,
"step": 96
},
{
"clip_fraction": 0.0,
"completion_length": 2798.031280517578,
"dapo/avg_reward_std": 0.23633464597738707,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3461538478732109,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 38.541666666666664,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.11085714285714286,
"grad_norm": 0.1648494303226471,
"kl": 0.025691986083984375,
"learning_rate": 1.0437936906629334e-07,
"loss": 0.0939,
"reward": 0.673285935074091,
"reward_std": 0.979133740067482,
"step": 97
},
{
"clip_fraction": 0.0,
"completion_length": 3240.7361450195312,
"dapo/avg_reward_std": 0.2805523918225215,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3653846193964665,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 54.513888888888886,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.112,
"grad_norm": 0.12132810056209564,
"kl": 0.01453399658203125,
"learning_rate": 1.0246514708427701e-07,
"loss": 0.0557,
"reward": 0.5335402796044946,
"reward_std": 0.9456770345568657,
"step": 98
},
{
"clip_fraction": 0.0,
"completion_length": 2497.9132499694824,
"dapo/avg_reward_std": 0.2488528937101364,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3888888942698638,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 58.05555555555555,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.11314285714285714,
"grad_norm": 0.24999241530895233,
"kl": 0.028301239013671875,
"learning_rate": 1.0109617738307911e-07,
"loss": 0.1037,
"reward": 0.785055335611105,
"reward_std": 0.9553829357028008,
"step": 99
},
{
"clip_fraction": 0.0,
"completion_length": 3039.6284790039062,
"dapo/avg_reward_std": 0.2903642791012923,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.38194445086022216,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 46.24999999999999,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.11428571428571428,
"grad_norm": 0.14126254618167877,
"kl": 0.014410018920898438,
"learning_rate": 1.002741278414069e-07,
"loss": 0.0643,
"reward": 0.4948624651879072,
"reward_std": 0.9704382866621017,
"step": 100
},
{
"clip_fraction": 0.0,
"completion_length": 3318.513916015625,
"dapo/avg_reward_std": 0.22042016812733242,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.29523810063089645,
"dapo/num_sampling_attempts": 4.375,
"dapo/sampling_efficiency": 28.645833333333332,
"dapo/total_prompts_processed": 26.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.11542857142857142,
"grad_norm": 0.22150926291942596,
"kl": 0.011791229248046875,
"learning_rate": 1e-07,
"loss": 0.0631,
"reward": 0.46524661034345627,
"reward_std": 0.9665903598070145,
"step": 101
},
{
"clip_fraction": 0.0,
"completion_length": 3083.875,
"dapo/avg_reward_std": 0.21663353669232335,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3390804637095024,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 39.93055555555555,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.11657142857142858,
"grad_norm": 0.16289636492729187,
"kl": 0.008695602416992188,
"learning_rate": 6.203955092681039e-07,
"loss": 0.098,
"reward": 0.8642945289611816,
"reward_std": 1.031830094754696,
"step": 102
},
{
"clip_fraction": 0.0,
"completion_length": 3364.701446533203,
"dapo/avg_reward_std": 0.24887267331923207,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3172043090866458,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 31.69642857142857,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.11771428571428572,
"grad_norm": 0.08825232833623886,
"kl": 0.009820938110351562,
"learning_rate": 6.126278954320294e-07,
"loss": 0.0178,
"reward": 0.3627179069444537,
"reward_std": 0.8941863179206848,
"step": 103
},
{
"clip_fraction": 0.0,
"completion_length": 3255.3055725097656,
"dapo/avg_reward_std": 0.24808817549988074,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.33950618074999916,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 38.95833333333333,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.11885714285714286,
"grad_norm": 0.13638561964035034,
"kl": 0.011318206787109375,
"learning_rate": 6.048412045323164e-07,
"loss": 0.0643,
"reward": 0.5508436523377895,
"reward_std": 0.9409585371613503,
"step": 104
},
{
"clip_fraction": 0.0,
"completion_length": 3270.4930419921875,
"dapo/avg_reward_std": 0.23700118958950042,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3166666706403097,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 61.07142857142857,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12,
"grad_norm": 0.10357476025819778,
"kl": 0.0117034912109375,
"learning_rate": 5.97037808470444e-07,
"loss": 0.0278,
"reward": 0.4137148158624768,
"reward_std": 0.9205853268504143,
"step": 105
},
{
"clip_fraction": 0.0,
"completion_length": 3118.9584045410156,
"dapo/avg_reward_std": 0.22452521603554487,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3333333395421505,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 28.869047619047613,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12114285714285715,
"grad_norm": 0.11885393410921097,
"kl": 0.011783599853515625,
"learning_rate": 5.892200842364462e-07,
"loss": 0.0786,
"reward": 0.673494272865355,
"reward_std": 0.9388571679592133,
"step": 106
},
{
"clip_fraction": 0.0,
"completion_length": 3183.666717529297,
"dapo/avg_reward_std": 0.23609773551716523,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.30882353467099805,
"dapo/num_sampling_attempts": 4.25,
"dapo/sampling_efficiency": 37.74305555555556,
"dapo/total_prompts_processed": 25.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12228571428571429,
"grad_norm": 0.13629400730133057,
"kl": 0.0092010498046875,
"learning_rate": 5.813904131848564e-07,
"loss": 0.0615,
"reward": 0.5680118557065725,
"reward_std": 0.8982010260224342,
"step": 107
},
{
"clip_fraction": 0.0,
"completion_length": 3170.263916015625,
"dapo/avg_reward_std": 0.21017570431168014,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3018018079770578,
"dapo/num_sampling_attempts": 4.625,
"dapo/sampling_efficiency": 30.625,
"dapo/total_prompts_processed": 27.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12342857142857143,
"grad_norm": 0.1134539544582367,
"kl": 0.010692596435546875,
"learning_rate": 5.735511803093248e-07,
"loss": 0.0433,
"reward": 0.6368884779512882,
"reward_std": 0.9655679985880852,
"step": 108
},
{
"clip_fraction": 0.0,
"completion_length": 2938.5243530273438,
"dapo/avg_reward_std": 0.30796096875117374,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3974359052685591,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 38.95833333333333,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12457142857142857,
"grad_norm": 0.16064728796482086,
"kl": 0.014812469482421875,
"learning_rate": 5.657047735161255e-07,
"loss": 0.0874,
"reward": 0.4405923653393984,
"reward_std": 0.899710550904274,
"step": 109
},
{
"clip_fraction": 0.0,
"completion_length": 3333.5556030273438,
"dapo/avg_reward_std": 0.17683410130698105,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.28735632475080164,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 40.104166666666664,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12571428571428572,
"grad_norm": 0.1374766230583191,
"kl": 0.00823211669921875,
"learning_rate": 5.578535828967777e-07,
"loss": 0.0525,
"reward": 0.6373127717524767,
"reward_std": 0.949370414018631,
"step": 110
},
{
"clip_fraction": 0.0,
"completion_length": 3404.166717529297,
"dapo/avg_reward_std": 0.2707539377734065,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3437500074505806,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 28.124999999999996,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12685714285714286,
"grad_norm": 0.09096160531044006,
"kl": 0.0152435302734375,
"learning_rate": 5.5e-07,
"loss": 0.0286,
"reward": 0.4166172882542014,
"reward_std": 0.9417606145143509,
"step": 111
},
{
"clip_fraction": 0.0,
"completion_length": 3306.263946533203,
"dapo/avg_reward_std": 0.17227381931410896,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.21481482055452134,
"dapo/num_sampling_attempts": 5.625,
"dapo/sampling_efficiency": 27.395833333333332,
"dapo/total_prompts_processed": 33.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.128,
"grad_norm": 0.11950567364692688,
"kl": 0.01320648193359375,
"learning_rate": 5.421464171032224e-07,
"loss": 0.0449,
"reward": 0.4937558462843299,
"reward_std": 0.9720155894756317,
"step": 112
},
{
"clip_fraction": 0.0,
"completion_length": 3117.1979064941406,
"dapo/avg_reward_std": 0.30339551545106447,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3846153886272357,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 38.95833333333333,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12914285714285714,
"grad_norm": 0.15823398530483246,
"kl": 0.01418304443359375,
"learning_rate": 5.342952264838747e-07,
"loss": 0.0743,
"reward": 0.5596551271155477,
"reward_std": 0.8979872986674309,
"step": 113
},
{
"clip_fraction": 0.0,
"completion_length": 3239.031280517578,
"dapo/avg_reward_std": 0.24120492219924927,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.34000000298023225,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 56.770833333333336,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.13028571428571428,
"grad_norm": 0.20106364786624908,
"kl": 0.01206207275390625,
"learning_rate": 5.264488196906752e-07,
"loss": 0.0817,
"reward": 0.697497084736824,
"reward_std": 0.9489930346608162,
"step": 114
},
{
"clip_fraction": 0.0,
"completion_length": 3197.2430725097656,
"dapo/avg_reward_std": 0.20663932577157632,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.26495727056112045,
"dapo/num_sampling_attempts": 4.875,
"dapo/sampling_efficiency": 38.4375,
"dapo/total_prompts_processed": 29.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.13142857142857142,
"grad_norm": 0.15399962663650513,
"kl": 0.015567779541015625,
"learning_rate": 5.186095868151436e-07,
"loss": 0.0667,
"reward": 0.5802914081141353,
"reward_std": 0.9295158162713051,
"step": 115
},
{
"clip_fraction": 0.0,
"completion_length": 3272.6007080078125,
"dapo/avg_reward_std": 0.22710687816143035,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3166666701436043,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 37.61904761904762,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.13257142857142856,
"grad_norm": 0.140142023563385,
"kl": 0.01934814453125,
"learning_rate": 5.107799157635538e-07,
"loss": 0.0611,
"reward": 0.6176847349852324,
"reward_std": 0.944318100810051,
"step": 116
},
{
"clip_fraction": 0.0,
"completion_length": 3268.4305725097656,
"dapo/avg_reward_std": 0.23266587586238466,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.344827591345228,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 38.125,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1337142857142857,
"grad_norm": 0.1582440286874771,
"kl": 0.01198577880859375,
"learning_rate": 5.02962191529556e-07,
"loss": 0.0556,
"reward": 0.5785031230188906,
"reward_std": 0.954645112156868,
"step": 117
},
{
"clip_fraction": 0.0,
"completion_length": 2941.9722595214844,
"dapo/avg_reward_std": 0.24969401342027328,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3284313814604984,
"dapo/num_sampling_attempts": 4.25,
"dapo/sampling_efficiency": 27.20238095238095,
"dapo/total_prompts_processed": 25.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.13485714285714287,
"grad_norm": 0.1869765818119049,
"kl": 0.01676177978515625,
"learning_rate": 4.951587954676837e-07,
"loss": 0.1063,
"reward": 0.6486848145723343,
"reward_std": 0.9332743212580681,
"step": 118
},
{
"clip_fraction": 0.0,
"completion_length": 3206.982635498047,
"dapo/avg_reward_std": 0.20580977627209254,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.26666667333671024,
"dapo/num_sampling_attempts": 4.375,
"dapo/sampling_efficiency": 41.28472222222222,
"dapo/total_prompts_processed": 26.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.136,
"grad_norm": 0.13004696369171143,
"kl": 0.015842437744140625,
"learning_rate": 4.873721045679706e-07,
"loss": 0.0453,
"reward": 0.4798949249088764,
"reward_std": 0.9390313774347305,
"step": 119
},
{
"clip_fraction": 0.0,
"completion_length": 3015.545135498047,
"dapo/avg_reward_std": 0.22217401381461852,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3548387149649282,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 28.95833333333333,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.13714285714285715,
"grad_norm": 0.229897141456604,
"kl": 0.02198028564453125,
"learning_rate": 4.79604490731896e-07,
"loss": 0.0749,
"reward": 0.7311479561030865,
"reward_std": 0.9607837572693825,
"step": 120
},
{
"clip_fraction": 0.0,
"completion_length": 3098.656280517578,
"dapo/avg_reward_std": 0.22588159143924713,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.32777778506278993,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 44.613095238095234,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1382857142857143,
"grad_norm": 0.13800247013568878,
"kl": 0.014202117919921875,
"learning_rate": 4.7185832004988133e-07,
"loss": 0.0814,
"reward": 0.8461479842662811,
"reward_std": 0.9660850539803505,
"step": 121
},
{
"clip_fraction": 0.0,
"completion_length": 3064.3924255371094,
"dapo/avg_reward_std": 0.16500467896461488,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.19666667193174361,
"dapo/num_sampling_attempts": 6.25,
"dapo/sampling_efficiency": 21.07142857142857,
"dapo/total_prompts_processed": 37.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.13942857142857143,
"grad_norm": 0.1680934727191925,
"kl": 0.01361083984375,
"learning_rate": 4.641359520805548e-07,
"loss": 0.066,
"reward": 0.7812346797436476,
"reward_std": 0.9529108256101608,
"step": 122
},
{
"clip_fraction": 0.0,
"completion_length": 3097.4861755371094,
"dapo/avg_reward_std": 0.22939075000824466,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.33333334038334506,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 33.75,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.14057142857142857,
"grad_norm": 0.18081900477409363,
"kl": 0.014842987060546875,
"learning_rate": 4.5643973913200837e-07,
"loss": 0.0877,
"reward": 0.7531900368630886,
"reward_std": 0.9868133068084717,
"step": 123
},
{
"clip_fraction": 0.0,
"completion_length": 3203.888885498047,
"dapo/avg_reward_std": 0.24352495979379724,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.35185185737080044,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 43.05555555555556,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1417142857142857,
"grad_norm": 0.16807734966278076,
"kl": 0.0139007568359375,
"learning_rate": 4.4877202554526084e-07,
"loss": 0.0612,
"reward": 0.715996683575213,
"reward_std": 0.9595553278923035,
"step": 124
},
{
"clip_fraction": 0.0,
"completion_length": 2885.5625610351562,
"dapo/avg_reward_std": 0.2548297820612788,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.31770833814516664,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 27.20238095238095,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.14285714285714285,
"grad_norm": 0.16355834901332855,
"kl": 0.02027130126953125,
"learning_rate": 4.4113514698014953e-07,
"loss": 0.0597,
"reward": 0.8311022147536278,
"reward_std": 0.9600836709141731,
"step": 125
},
{
"clip_fraction": 0.0,
"completion_length": 3250.843780517578,
"dapo/avg_reward_std": 0.2203440727858708,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.32758621152105005,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 46.770833333333336,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.144,
"grad_norm": 0.18190248310565948,
"kl": 0.0158843994140625,
"learning_rate": 4.3353142970386557e-07,
"loss": 0.068,
"reward": 0.7400151332840323,
"reward_std": 0.9569809287786484,
"step": 126
},
{
"clip_fraction": 0.0,
"completion_length": 3264.420166015625,
"dapo/avg_reward_std": 0.25137073759521755,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.41666667429464205,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 40.11904761904761,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.14514285714285713,
"grad_norm": 0.17950685322284698,
"kl": 0.0223236083984375,
"learning_rate": 4.2596318988235037e-07,
"loss": 0.0528,
"reward": 0.5194851458072662,
"reward_std": 0.9414050430059433,
"step": 127
},
{
"clip_fraction": 0.0,
"completion_length": 2892.9132690429688,
"dapo/avg_reward_std": 0.2416491061449051,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2631579002267436,
"dapo/num_sampling_attempts": 4.75,
"dapo/sampling_efficiency": 26.9047619047619,
"dapo/total_prompts_processed": 28.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1462857142857143,
"grad_norm": 0.25602471828460693,
"kl": 0.02016448974609375,
"learning_rate": 4.1843273287476854e-07,
"loss": 0.0933,
"reward": 0.8592288717627525,
"reward_std": 0.9212958365678787,
"step": 128
},
{
"clip_fraction": 0.0,
"completion_length": 3146.6944580078125,
"dapo/avg_reward_std": 0.22558308675371366,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3218390854268238,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 54.07738095238095,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.14742857142857144,
"grad_norm": 0.21352027356624603,
"kl": 0.0198211669921875,
"learning_rate": 4.1094235253127374e-07,
"loss": 0.0679,
"reward": 0.5732525363564491,
"reward_std": 0.9645283669233322,
"step": 129
},
{
"clip_fraction": 0.0,
"completion_length": 3248.4236450195312,
"dapo/avg_reward_std": 0.35807471639580196,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5000000066227384,
"dapo/num_sampling_attempts": 2.25,
"dapo/sampling_efficiency": 51.041666666666664,
"dapo/total_prompts_processed": 13.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.14857142857142858,
"grad_norm": 0.1599435657262802,
"kl": 0.0216827392578125,
"learning_rate": 4.034943304942796e-07,
"loss": 0.0443,
"reward": 0.5955070666968822,
"reward_std": 0.9924386888742447,
"step": 130
},
{
"clip_fraction": 0.0,
"completion_length": 2958.5347595214844,
"dapo/avg_reward_std": 0.18185590389298228,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.23170731998071437,
"dapo/num_sampling_attempts": 5.125,
"dapo/sampling_efficiency": 24.945436507936506,
"dapo/total_prompts_processed": 30.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.14971428571428572,
"grad_norm": 0.21188445389270782,
"kl": 0.02074432373046875,
"learning_rate": 3.9609093550344907e-07,
"loss": 0.0628,
"reward": 0.8608505353331566,
"reward_std": 0.9059992283582687,
"step": 131
},
{
"clip_fraction": 0.0,
"completion_length": 3019.888931274414,
"dapo/avg_reward_std": 0.3038036392794715,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.36419753785486575,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 38.33333333333333,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.15085714285714286,
"grad_norm": 0.19752100110054016,
"kl": 0.024078369140625,
"learning_rate": 3.8873442270461485e-07,
"loss": 0.0698,
"reward": 0.7191393785178661,
"reward_std": 0.9548436179757118,
"step": 132
},
{
"clip_fraction": 0.0,
"completion_length": 3251.6909790039062,
"dapo/avg_reward_std": 0.17617152915114448,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.22222222494227545,
"dapo/num_sampling_attempts": 5.25,
"dapo/sampling_efficiency": 31.369047619047613,
"dapo/total_prompts_processed": 31.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.152,
"grad_norm": 0.1220565065741539,
"kl": 0.01824951171875,
"learning_rate": 3.8142703296283953e-07,
"loss": 0.0249,
"reward": 0.3546891317819245,
"reward_std": 0.9377138167619705,
"step": 133
},
{
"clip_fraction": 0.0,
"completion_length": 3146.545196533203,
"dapo/avg_reward_std": 0.2565364229679108,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.32666667103767394,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 47.08333333333333,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.15314285714285714,
"grad_norm": 0.15810362994670868,
"kl": 0.03081512451171875,
"learning_rate": 3.7417099217982686e-07,
"loss": 0.0306,
"reward": 0.5206232005730271,
"reward_std": 0.9619846642017365,
"step": 134
},
{
"clip_fraction": 0.0,
"completion_length": 3085.5972900390625,
"dapo/avg_reward_std": 0.30491976333515985,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.40476191469601225,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 31.666666666666664,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.15428571428571428,
"grad_norm": 0.2133372277021408,
"kl": 0.0204620361328125,
"learning_rate": 3.6696851061588994e-07,
"loss": 0.0681,
"reward": 0.7713347226381302,
"reward_std": 0.9403144493699074,
"step": 135
},
{
"clip_fraction": 0.0,
"completion_length": 3326.295196533203,
"dapo/avg_reward_std": 0.22884555886953306,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.24358974817471626,
"dapo/num_sampling_attempts": 4.875,
"dapo/sampling_efficiency": 25.868055555555557,
"dapo/total_prompts_processed": 29.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.15542857142857142,
"grad_norm": 0.18792302906513214,
"kl": 0.029754638671875,
"learning_rate": 3.5982178221668533e-07,
"loss": 0.0468,
"reward": 0.5651950668543577,
"reward_std": 0.9934203922748566,
"step": 136
},
{
"clip_fraction": 0.0,
"completion_length": 3265.2882080078125,
"dapo/avg_reward_std": 0.304972759137551,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.43055556155741215,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 54.375,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.15657142857142858,
"grad_norm": 0.13081717491149902,
"kl": 0.0223846435546875,
"learning_rate": 3.5273298394491515e-07,
"loss": 0.0443,
"reward": 0.5535581167787313,
"reward_std": 0.9467164501547813,
"step": 137
},
{
"clip_fraction": 0.0,
"completion_length": 2895.8646545410156,
"dapo/avg_reward_std": 0.2690910736719767,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3333333387970924,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 32.82738095238095,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.15771428571428572,
"grad_norm": 0.18165208399295807,
"kl": 0.032073974609375,
"learning_rate": 3.45704275117204e-07,
"loss": 0.0288,
"reward": 0.5253790076822042,
"reward_std": 0.9247673749923706,
"step": 138
},
{
"clip_fraction": 0.0,
"completion_length": 3049.8507080078125,
"dapo/avg_reward_std": 0.2440622321196965,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.33928572067192625,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 40.11904761904761,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.15885714285714286,
"grad_norm": 0.19676071405410767,
"kl": 0.03052520751953125,
"learning_rate": 3.387377967463493e-07,
"loss": 0.0477,
"reward": 0.6778539270162582,
"reward_std": 0.9344745948910713,
"step": 139
},
{
"clip_fraction": 0.0,
"completion_length": 3029.0486450195312,
"dapo/avg_reward_std": 0.3111469969153404,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4916666768491268,
"dapo/num_sampling_attempts": 2.5,
"dapo/sampling_efficiency": 41.666666666666664,
"dapo/total_prompts_processed": 15.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.16,
"grad_norm": 0.18594416975975037,
"kl": 0.0277557373046875,
"learning_rate": 3.3183567088914833e-07,
"loss": 0.0431,
"reward": 0.5210836753249168,
"reward_std": 0.9851464107632637,
"step": 140
},
{
"clip_fraction": 0.0,
"completion_length": 3151.5486755371094,
"dapo/avg_reward_std": 0.23511080997330802,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3095238127878734,
"dapo/num_sampling_attempts": 4.375,
"dapo/sampling_efficiency": 26.18055555555555,
"dapo/total_prompts_processed": 26.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.16114285714285714,
"grad_norm": 0.17807213962078094,
"kl": 0.0266265869140625,
"learning_rate": 3.250000000000001e-07,
"loss": 0.0498,
"reward": 0.5591800361871719,
"reward_std": 0.9730060175061226,
"step": 141
},
{
"clip_fraction": 0.0,
"completion_length": 2963.59033203125,
"dapo/avg_reward_std": 0.19928012508898973,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2812500069849193,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 38.02083333333333,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.16228571428571428,
"grad_norm": 0.24388359487056732,
"kl": 0.0318603515625,
"learning_rate": 3.182328662904756e-07,
"loss": 0.0567,
"reward": 0.7148469444364309,
"reward_std": 0.9495278596878052,
"step": 142
},
{
"clip_fraction": 0.0,
"completion_length": 3157.791717529297,
"dapo/avg_reward_std": 0.23966079843895777,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3214285767504147,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 39.166666666666664,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.16342857142857142,
"grad_norm": 0.20528583228588104,
"kl": 0.041290283203125,
"learning_rate": 3.115363310950578e-07,
"loss": 0.0443,
"reward": 0.5249591246247292,
"reward_std": 0.9509934857487679,
"step": 143
},
{
"clip_fraction": 0.0,
"completion_length": 3030.187530517578,
"dapo/avg_reward_std": 0.30880050485332805,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4375000099341075,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 41.04166666666666,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.16457142857142856,
"grad_norm": 0.15082307159900665,
"kl": 0.02729034423828125,
"learning_rate": 3.0491243424323783e-07,
"loss": 0.0511,
"reward": 0.5894143544137478,
"reward_std": 0.954010546207428,
"step": 144
},
{
"clip_fraction": 0.0,
"completion_length": 2973.3993225097656,
"dapo/avg_reward_std": 0.32683228328824043,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4236111181477706,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 48.66071428571428,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1657142857142857,
"grad_norm": 0.2588576078414917,
"kl": 0.038238525390625,
"learning_rate": 2.9836319343816397e-07,
"loss": 0.0611,
"reward": 0.6702784113585949,
"reward_std": 0.9678368121385574,
"step": 145
},
{
"clip_fraction": 0.0,
"completion_length": 3289.8368530273438,
"dapo/avg_reward_std": 0.29686578666722335,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.34567901823255753,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 51.57738095238095,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.16685714285714287,
"grad_norm": 0.2035798877477646,
"kl": 0.0394744873046875,
"learning_rate": 2.918906036420294e-07,
"loss": 0.0576,
"reward": 0.4602743685245514,
"reward_std": 0.9194413796067238,
"step": 146
},
{
"clip_fraction": 0.0,
"completion_length": 3068.7604064941406,
"dapo/avg_reward_std": 0.27814541943371296,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3437500069849193,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 36.666666666666664,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.168,
"grad_norm": 0.22469140589237213,
"kl": 0.030426025390625,
"learning_rate": 2.854966364683872e-07,
"loss": 0.0696,
"reward": 0.6243265215307474,
"reward_std": 0.9174878597259521,
"step": 147
},
{
"clip_fraction": 0.0,
"completion_length": 3041.357635498047,
"dapo/avg_reward_std": 0.2907161459326744,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.458333346247673,
"dapo/num_sampling_attempts": 2.5,
"dapo/sampling_efficiency": 57.70833333333333,
"dapo/total_prompts_processed": 15.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.16914285714285715,
"grad_norm": 0.3123789429664612,
"kl": 0.0328521728515625,
"learning_rate": 2.791832395815782e-07,
"loss": 0.0819,
"reward": 0.8250775411725044,
"reward_std": 0.9233218431472778,
"step": 148
},
{
"clip_fraction": 0.0,
"completion_length": 2433.0694732666016,
"dapo/avg_reward_std": 0.22243764168686336,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2777777839865949,
"dapo/num_sampling_attempts": 4.5,
"dapo/sampling_efficiency": 35.75892857142857,
"dapo/total_prompts_processed": 27.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1702857142857143,
"grad_norm": 0.2827485203742981,
"kl": 0.0386505126953125,
"learning_rate": 2.729523361034538e-07,
"loss": 0.0784,
"reward": 0.6995697831735015,
"reward_std": 0.9434132054448128,
"step": 149
},
{
"clip_fraction": 0.0,
"completion_length": 3096.59033203125,
"dapo/avg_reward_std": 0.347408726811409,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.541666672565043,
"dapo/num_sampling_attempts": 2.0,
"dapo/sampling_efficiency": 63.541666666666664,
"dapo/total_prompts_processed": 12.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.17142857142857143,
"grad_norm": 0.30529579520225525,
"kl": 0.03045654296875,
"learning_rate": 2.6680582402757324e-07,
"loss": 0.0868,
"reward": 0.7112221932038665,
"reward_std": 0.9602288007736206,
"step": 150
},
{
"clip_fraction": 0.0,
"completion_length": 3184.611083984375,
"dapo/avg_reward_std": 0.1674806038115887,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.20212766528129578,
"dapo/num_sampling_attempts": 5.875,
"dapo/sampling_efficiency": 23.749999999999996,
"dapo/total_prompts_processed": 35.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.17257142857142857,
"grad_norm": 0.19142813980579376,
"kl": 0.037353515625,
"learning_rate": 2.6074557564105724e-07,
"loss": 0.045,
"reward": 0.41017685225233436,
"reward_std": 0.9152907580137253,
"step": 151
},
{
"clip_fraction": 0.0,
"completion_length": 3437.3541564941406,
"dapo/avg_reward_std": 0.208841644014631,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2571428622518267,
"dapo/num_sampling_attempts": 4.375,
"dapo/sampling_efficiency": 40.416666666666664,
"dapo/total_prompts_processed": 26.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1737142857142857,
"grad_norm": 0.15321692824363708,
"kl": 0.03997802734375,
"learning_rate": 2.547734369542718e-07,
"loss": 0.0346,
"reward": 0.34562894329428673,
"reward_std": 0.856454074382782,
"step": 152
},
{
"clip_fraction": 0.0,
"completion_length": 3008.1285095214844,
"dapo/avg_reward_std": 0.3009934023022652,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5000000096857548,
"dapo/num_sampling_attempts": 2.5,
"dapo/sampling_efficiency": 43.75,
"dapo/total_prompts_processed": 15.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.17485714285714285,
"grad_norm": 0.20332548022270203,
"kl": 0.0509033203125,
"learning_rate": 2.488912271385139e-07,
"loss": 0.0536,
"reward": 0.7641689777374268,
"reward_std": 0.95648343116045,
"step": 153
},
{
"clip_fraction": 0.0,
"completion_length": 3165.52783203125,
"dapo/avg_reward_std": 0.2268627045246271,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.35256410905948055,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 40.625,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.176,
"grad_norm": 0.2415708601474762,
"kl": 0.032623291015625,
"learning_rate": 2.4310073797187573e-07,
"loss": 0.0658,
"reward": 0.6375892572104931,
"reward_std": 0.9544621706008911,
"step": 154
},
{
"clip_fraction": 0.0,
"completion_length": 3226.4652709960938,
"dapo/avg_reward_std": 0.2563069482644399,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.38333334078391396,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 31.249999999999996,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.17714285714285713,
"grad_norm": 0.2137623131275177,
"kl": 0.0427093505859375,
"learning_rate": 2.374037332934512e-07,
"loss": 0.0533,
"reward": 0.537381574511528,
"reward_std": 0.9281218275427818,
"step": 155
},
{
"clip_fraction": 0.0,
"completion_length": 2680.3090209960938,
"dapo/avg_reward_std": 0.22888225678241614,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3181818226973216,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 31.29960317460317,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1782857142857143,
"grad_norm": 0.3409210443496704,
"kl": 0.03851318359375,
"learning_rate": 2.3180194846605364e-07,
"loss": 0.0962,
"reward": 0.8820424377918243,
"reward_std": 0.9246840327978134,
"step": 156
},
{
"clip_fraction": 0.0,
"completion_length": 3045.3299255371094,
"dapo/avg_reward_std": 0.2491180575810946,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3653846222620744,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 45.83333333333332,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.17942857142857144,
"grad_norm": 0.23701035976409912,
"kl": 0.0436248779296875,
"learning_rate": 2.2629708984760706e-07,
"loss": 0.0414,
"reward": 0.6551959328353405,
"reward_std": 0.9744707196950912,
"step": 157
},
{
"clip_fraction": 0.0,
"completion_length": 2918.892364501953,
"dapo/avg_reward_std": 0.22537656256130764,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.33333333730697634,
"dapo/num_sampling_attempts": 4.375,
"dapo/sampling_efficiency": 39.93055555555556,
"dapo/total_prompts_processed": 26.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18057142857142858,
"grad_norm": 0.3551786541938782,
"kl": 0.0572357177734375,
"learning_rate": 2.2089083427137329e-07,
"loss": 0.0732,
"reward": 0.5248121619224548,
"reward_std": 0.9334831684827805,
"step": 158
},
{
"clip_fraction": 0.0,
"completion_length": 2874.0729446411133,
"dapo/avg_reward_std": 0.18832522351294756,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2812500046566129,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 38.69047619047618,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18171428571428572,
"grad_norm": 0.25500980019569397,
"kl": 0.03741455078125,
"learning_rate": 2.1558482853517253e-07,
"loss": 0.0537,
"reward": 0.7963100634515285,
"reward_std": 0.987776905298233,
"step": 159
},
{
"clip_fraction": 0.0,
"completion_length": 2940.701385498047,
"dapo/avg_reward_std": 0.16297742784023284,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.20000000536441803,
"dapo/num_sampling_attempts": 6.25,
"dapo/sampling_efficiency": 18.368055555555557,
"dapo/total_prompts_processed": 37.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18285714285714286,
"grad_norm": 0.2898014187812805,
"kl": 0.058013916015625,
"learning_rate": 2.1038068889975259e-07,
"loss": 0.037,
"reward": 0.5323189618065953,
"reward_std": 0.9483579620718956,
"step": 160
},
{
"clip_fraction": 0.0,
"completion_length": 3090.7882385253906,
"dapo/avg_reward_std": 0.3046227526664734,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3733333414793015,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 43.45238095238095,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.184,
"grad_norm": 0.28573325276374817,
"kl": 0.040771484375,
"learning_rate": 2.0528000059645995e-07,
"loss": 0.0511,
"reward": 0.6970310118049383,
"reward_std": 0.9432796016335487,
"step": 161
},
{
"clip_fraction": 0.0,
"completion_length": 3205.4270629882812,
"dapo/avg_reward_std": 0.36972329020500183,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5438596621940011,
"dapo/num_sampling_attempts": 2.375,
"dapo/sampling_efficiency": 55.625,
"dapo/total_prompts_processed": 14.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18514285714285714,
"grad_norm": 0.390523225069046,
"kl": 0.052459716796875,
"learning_rate": 2.0028431734436308e-07,
"loss": 0.0818,
"reward": 0.6346883065998554,
"reward_std": 0.9713371768593788,
"step": 162
},
{
"clip_fraction": 0.0,
"completion_length": 3082.107635498047,
"dapo/avg_reward_std": 0.2315557522158469,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3440860264724301,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 44.513888888888886,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18628571428571428,
"grad_norm": 0.31898149847984314,
"kl": 0.05328369140625,
"learning_rate": 1.9539516087697517e-07,
"loss": 0.0722,
"reward": 0.6942785531282425,
"reward_std": 0.9776681512594223,
"step": 163
},
{
"clip_fraction": 0.0,
"completion_length": 3027.0243530273438,
"dapo/avg_reward_std": 0.15836979811255997,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.22972973214613424,
"dapo/num_sampling_attempts": 4.625,
"dapo/sampling_efficiency": 41.69642857142857,
"dapo/total_prompts_processed": 27.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18742857142857142,
"grad_norm": 0.2931766211986542,
"kl": 0.033111572265625,
"learning_rate": 1.9061402047871833e-07,
"loss": 0.0754,
"reward": 0.944303285330534,
"reward_std": 0.9451126903295517,
"step": 164
},
{
"clip_fraction": 0.0,
"completion_length": 2894.260482788086,
"dapo/avg_reward_std": 0.224585828371346,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2916666716337204,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 37.5,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18857142857142858,
"grad_norm": 0.24178634583950043,
"kl": 0.0533447265625,
"learning_rate": 1.8594235253127372e-07,
"loss": 0.0505,
"reward": 0.6519163623452187,
"reward_std": 0.9615699052810669,
"step": 165
},
{
"clip_fraction": 0.0,
"completion_length": 3002.7882385253906,
"dapo/avg_reward_std": 0.29886600477942105,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3160919598464308,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 35.416666666666664,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18971428571428572,
"grad_norm": 0.31221655011177063,
"kl": 0.047943115234375,
"learning_rate": 1.8138158006995363e-07,
"loss": 0.066,
"reward": 0.6383479349315166,
"reward_std": 0.9029820337891579,
"step": 166
},
{
"clip_fraction": 0.0,
"completion_length": 2927.295150756836,
"dapo/avg_reward_std": 0.34752671499001353,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5438596621940011,
"dapo/num_sampling_attempts": 2.375,
"dapo/sampling_efficiency": 48.95833333333333,
"dapo/total_prompts_processed": 14.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.19085714285714286,
"grad_norm": 0.2697528600692749,
"kl": 0.045745849609375,
"learning_rate": 1.7693309235023127e-07,
"loss": 0.0483,
"reward": 0.8266985702211969,
"reward_std": 0.9544429406523705,
"step": 167
},
{
"clip_fraction": 0.0,
"completion_length": 3212.857666015625,
"dapo/avg_reward_std": 0.263968757220677,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3690476247242519,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 41.388888888888886,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.192,
"grad_norm": 0.27940821647644043,
"kl": 0.05059814453125,
"learning_rate": 1.7259824442455923e-07,
"loss": 0.0415,
"reward": 0.7715255841612816,
"reward_std": 0.95072440803051,
"step": 168
},
{
"clip_fraction": 0.0,
"completion_length": 3112.5799255371094,
"dapo/avg_reward_std": 0.22730760558231458,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3153153222960395,
"dapo/num_sampling_attempts": 4.625,
"dapo/sampling_efficiency": 26.249999999999996,
"dapo/total_prompts_processed": 27.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.19314285714285714,
"grad_norm": 0.4339730143547058,
"kl": 0.06396484375,
"learning_rate": 1.6837835672960831e-07,
"loss": 0.0777,
"reward": 0.5262689627707005,
"reward_std": 0.9779800549149513,
"step": 169
},
{
"clip_fraction": 0.0,
"completion_length": 3088.6632385253906,
"dapo/avg_reward_std": 0.2333034286275506,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.33333333721384406,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 38.263888888888886,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.19428571428571428,
"grad_norm": 0.48384836316108704,
"kl": 0.0555419921875,
"learning_rate": 1.6427471468404952e-07,
"loss": 0.0974,
"reward": 0.7407102398574352,
"reward_std": 0.9568767622113228,
"step": 170
},
{
"clip_fraction": 0.0,
"completion_length": 3099.347198486328,
"dapo/avg_reward_std": 0.17301563743282766,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.27941177127992406,
"dapo/num_sampling_attempts": 4.25,
"dapo/sampling_efficiency": 31.874999999999996,
"dapo/total_prompts_processed": 25.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.19542857142857142,
"grad_norm": 0.42263394594192505,
"kl": 0.0595703125,
"learning_rate": 1.6028856829700258e-07,
"loss": 0.0812,
"reward": 0.4282900430262089,
"reward_std": 0.914498083293438,
"step": 171
},
{
"clip_fraction": 0.0,
"completion_length": 3111.232696533203,
"dapo/avg_reward_std": 0.2433939976617694,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.33333334093913436,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 36.80555555555555,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.19657142857142856,
"grad_norm": 0.4814501404762268,
"kl": 0.05926513671875,
"learning_rate": 1.5642113178727193e-07,
"loss": 0.0843,
"reward": 0.6843680012971163,
"reward_std": 0.8743765726685524,
"step": 172
},
{
"clip_fraction": 0.0,
"completion_length": 3008.6563110351562,
"dapo/avg_reward_std": 0.25363275137814606,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.31818182811592566,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 38.78472222222222,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1977142857142857,
"grad_norm": 0.285697877407074,
"kl": 0.05755615234375,
"learning_rate": 1.5267358321348285e-07,
"loss": 0.0456,
"reward": 0.5798944532871246,
"reward_std": 0.984041191637516,
"step": 173
},
{
"clip_fraction": 0.0,
"completion_length": 3067.9791870117188,
"dapo/avg_reward_std": 0.3438388824462891,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4500000044703484,
"dapo/num_sampling_attempts": 2.5,
"dapo/sampling_efficiency": 48.33333333333333,
"dapo/total_prompts_processed": 15.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.19885714285714284,
"grad_norm": 0.43520498275756836,
"kl": 0.07098388671875,
"learning_rate": 1.4904706411523448e-07,
"loss": 0.0716,
"reward": 0.5646946905180812,
"reward_std": 0.9460153579711914,
"step": 174
},
{
"clip_fraction": 0.0,
"completion_length": 3223.2916870117188,
"dapo/avg_reward_std": 0.2690600073337555,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4133333420753479,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 41.45833333333333,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.2,
"grad_norm": 0.35144945979118347,
"kl": 0.06170654296875,
"learning_rate": 1.4554267916537495e-07,
"loss": 0.0348,
"reward": 0.556399748660624,
"reward_std": 0.9192204177379608,
"step": 175
},
{
"clip_fraction": 0.0,
"completion_length": 2946.0799102783203,
"dapo/avg_reward_std": 0.25316954652468365,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.37500000558793545,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 43.75,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.20114285714285715,
"grad_norm": 0.46807849407196045,
"kl": 0.063018798828125,
"learning_rate": 1.4216149583350755e-07,
"loss": 0.0796,
"reward": 0.6736351866275072,
"reward_std": 0.9649264737963676,
"step": 176
},
{
"clip_fraction": 0.0,
"completion_length": 3096.829864501953,
"dapo/avg_reward_std": 0.31567848042437907,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.482456142965116,
"dapo/num_sampling_attempts": 2.375,
"dapo/sampling_efficiency": 55.625,
"dapo/total_prompts_processed": 14.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.2022857142857143,
"grad_norm": 0.31731271743774414,
"kl": 0.055938720703125,
"learning_rate": 1.3890454406082956e-07,
"loss": 0.0386,
"reward": 0.681073285639286,
"reward_std": 0.9661536440253258,
"step": 177
},
{
"clip_fraction": 0.0,
"completion_length": 3235.8056030273438,
"dapo/avg_reward_std": 0.24198689542967697,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3448275898037286,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 47.08333333333333,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.20342857142857143,
"grad_norm": 0.4640950560569763,
"kl": 0.072052001953125,
"learning_rate": 1.3577281594640182e-07,
"loss": 0.0702,
"reward": 0.5520291309803724,
"reward_std": 0.9967257082462311,
"step": 178
},
{
"clip_fraction": 0.0,
"completion_length": 3237.77783203125,
"dapo/avg_reward_std": 0.30828417566689575,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4242424341765317,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 52.82738095238095,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.20457142857142857,
"grad_norm": 0.4502318203449249,
"kl": 0.07550048828125,
"learning_rate": 1.3276726544494571e-07,
"loss": 0.0614,
"reward": 0.6213867999613285,
"reward_std": 0.9431608989834785,
"step": 179
},
{
"clip_fraction": 0.0,
"completion_length": 2887.9236450195312,
"dapo/avg_reward_std": 0.2488611958645008,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3518518612340645,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 48.035714285714285,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.2057142857142857,
"grad_norm": 0.44646504521369934,
"kl": 0.073760986328125,
"learning_rate": 1.2988880807625927e-07,
"loss": 0.0683,
"reward": 0.5839751102030277,
"reward_std": 0.9090578481554985,
"step": 180
},
{
"clip_fraction": 0.0,
"completion_length": 3021.2916870117188,
"dapo/avg_reward_std": 0.20883248069069602,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2878787942004926,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 39.632936507936506,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.20685714285714285,
"grad_norm": 0.36042678356170654,
"kl": 0.07421875,
"learning_rate": 1.2713832064634125e-07,
"loss": 0.054,
"reward": 0.5517729418352246,
"reward_std": 0.9483400657773018,
"step": 181
},
{
"clip_fraction": 0.0,
"completion_length": 3249.2118530273438,
"dapo/avg_reward_std": 0.2615335573043142,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.33333333847778185,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 46.785714285714285,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.208,
"grad_norm": 0.4518042504787445,
"kl": 0.072021484375,
"learning_rate": 1.2451664098030743e-07,
"loss": 0.0654,
"reward": 0.686168298125267,
"reward_std": 0.9350233674049377,
"step": 182
},
{
"clip_fraction": 0.0,
"completion_length": 3221.6631774902344,
"dapo/avg_reward_std": 0.27866364789731574,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3686868738044392,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 28.4375,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.20914285714285713,
"grad_norm": 0.32408109307289124,
"kl": 0.062255859375,
"learning_rate": 1.220245676671809e-07,
"loss": 0.0384,
"reward": 0.6384344138205051,
"reward_std": 0.9783304929733276,
"step": 183
},
{
"clip_fraction": 0.0,
"completion_length": 3199.1354370117188,
"dapo/avg_reward_std": 0.2816663732131322,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.316666671137015,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 45.55555555555555,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.2102857142857143,
"grad_norm": 0.2197091430425644,
"kl": 0.07550048828125,
"learning_rate": 1.1966285981663407e-07,
"loss": 0.0211,
"reward": 0.45471471454948187,
"reward_std": 0.9136239141225815,
"step": 184
},
{
"clip_fraction": 0.0,
"completion_length": 3037.420166015625,
"dapo/avg_reward_std": 0.17516983683044846,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2657657728807346,
"dapo/num_sampling_attempts": 4.625,
"dapo/sampling_efficiency": 25.729166666666664,
"dapo/total_prompts_processed": 27.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.21142857142857144,
"grad_norm": 0.4012245535850525,
"kl": 0.091796875,
"learning_rate": 1.1743223682775649e-07,
"loss": 0.0442,
"reward": 0.7168623730540276,
"reward_std": 0.9515729621052742,
"step": 185
},
{
"clip_fraction": 0.0,
"completion_length": 3222.767364501953,
"dapo/avg_reward_std": 0.2550514280796051,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3444444512327512,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 43.64583333333333,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.21257142857142858,
"grad_norm": 0.4945845305919647,
"kl": 0.083465576171875,
"learning_rate": 1.1533337816991931e-07,
"loss": 0.0667,
"reward": 0.5391142014414072,
"reward_std": 0.9342528805136681,
"step": 186
},
{
"clip_fraction": 0.0,
"completion_length": 2858.5659942626953,
"dapo/avg_reward_std": 0.23423856112264818,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3118279609949358,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 40.0297619047619,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.21371428571428572,
"grad_norm": 0.4291866421699524,
"kl": 0.091796875,
"learning_rate": 1.1336692317580158e-07,
"loss": 0.0384,
"reward": 0.7481220848858356,
"reward_std": 0.9474795907735825,
"step": 187
},
{
"clip_fraction": 0.0,
"completion_length": 3123.170166015625,
"dapo/avg_reward_std": 0.1988734739857751,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.25675675997862946,
"dapo/num_sampling_attempts": 4.625,
"dapo/sampling_efficiency": 31.875,
"dapo/total_prompts_processed": 27.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.21485714285714286,
"grad_norm": 0.30453264713287354,
"kl": 0.080657958984375,
"learning_rate": 1.1153347084664419e-07,
"loss": 0.0273,
"reward": 0.6236942922696471,
"reward_std": 0.9715093299746513,
"step": 188
},
{
"clip_fraction": 0.0,
"completion_length": 2872.9618530273438,
"dapo/avg_reward_std": 0.21385114904372923,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3333333365378841,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 39.18154761904762,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.216,
"grad_norm": 0.5780288577079773,
"kl": 0.08612060546875,
"learning_rate": 1.0983357966978745e-07,
"loss": 0.0607,
"reward": 0.7514887787401676,
"reward_std": 1.0098591819405556,
"step": 189
},
{
"clip_fraction": 0.0,
"completion_length": 2937.093780517578,
"dapo/avg_reward_std": 0.1677520631575117,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.21895425284610076,
"dapo/num_sampling_attempts": 6.375,
"dapo/sampling_efficiency": 20.689484126984123,
"dapo/total_prompts_processed": 38.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.21714285714285714,
"grad_norm": 0.3947860896587372,
"kl": 0.076263427734375,
"learning_rate": 1.0826776744855121e-07,
"loss": 0.0487,
"reward": 0.6180934552103281,
"reward_std": 0.9050487726926804,
"step": 190
},
{
"clip_fraction": 0.0,
"completion_length": 3252.090301513672,
"dapo/avg_reward_std": 0.24265852073828378,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3055555621782939,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 38.020833333333336,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.21828571428571428,
"grad_norm": 0.48333072662353516,
"kl": 0.09661865234375,
"learning_rate": 1.068365111445064e-07,
"loss": 0.0584,
"reward": 0.4759152363985777,
"reward_std": 0.9479196071624756,
"step": 191
},
{
"clip_fraction": 0.0,
"completion_length": 3074.420166015625,
"dapo/avg_reward_std": 0.2189681170315578,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3333333371014431,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 46.45833333333333,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.21942857142857142,
"grad_norm": 0.5536202192306519,
"kl": 0.09814453125,
"learning_rate": 1.0554024673218806e-07,
"loss": 0.0731,
"reward": 0.48804986744653434,
"reward_std": 0.9367131069302559,
"step": 192
},
{
"clip_fraction": 0.0,
"completion_length": 3026.9097595214844,
"dapo/avg_reward_std": 0.21337791310774312,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.256756762395034,
"dapo/num_sampling_attempts": 4.625,
"dapo/sampling_efficiency": 30.3125,
"dapo/total_prompts_processed": 27.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.22057142857142858,
"grad_norm": 0.5239105224609375,
"kl": 0.0985107421875,
"learning_rate": 1.0437936906629334e-07,
"loss": 0.0561,
"reward": 0.45341441221535206,
"reward_std": 0.8912393003702164,
"step": 193
},
{
"clip_fraction": 0.0,
"completion_length": 2896.656280517578,
"dapo/avg_reward_std": 0.31374274492263793,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4333333417773247,
"dapo/num_sampling_attempts": 2.5,
"dapo/sampling_efficiency": 46.875,
"dapo/total_prompts_processed": 15.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.22171428571428572,
"grad_norm": 0.6310634016990662,
"kl": 0.108062744140625,
"learning_rate": 1.0335423176140511e-07,
"loss": 0.0809,
"reward": 0.6844924800097942,
"reward_std": 0.9649646729230881,
"step": 194
},
{
"clip_fraction": 0.0,
"completion_length": 3319.7048950195312,
"dapo/avg_reward_std": 0.21983732057340216,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.30303031251286017,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 29.479166666666664,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.22285714285714286,
"grad_norm": 0.47936248779296875,
"kl": 0.0997314453125,
"learning_rate": 1.0246514708427701e-07,
"loss": 0.0479,
"reward": 0.3993752491660416,
"reward_std": 0.9481607303023338,
"step": 195
},
{
"clip_fraction": 0.0,
"completion_length": 3298.1736450195312,
"dapo/avg_reward_std": 0.2514548934996128,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.28333333916962145,
"dapo/num_sampling_attempts": 5.0,
"dapo/sampling_efficiency": 33.13988095238095,
"dapo/total_prompts_processed": 30.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.224,
"grad_norm": 0.36350947618484497,
"kl": 0.1043701171875,
"learning_rate": 1.017123858587145e-07,
"loss": 0.0389,
"reward": 0.31427645590156317,
"reward_std": 0.8980218172073364,
"step": 196
},
{
"clip_fraction": 0.0,
"completion_length": 3260.4861450195312,
"dapo/avg_reward_std": 0.1836753969009106,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.24786325486806723,
"dapo/num_sampling_attempts": 4.875,
"dapo/sampling_efficiency": 28.154761904761905,
"dapo/total_prompts_processed": 29.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.22514285714285714,
"grad_norm": 0.3354601562023163,
"kl": 0.0946044921875,
"learning_rate": 1.0109617738307911e-07,
"loss": 0.0301,
"reward": 0.5015182960778475,
"reward_std": 0.9334053322672844,
"step": 197
},
{
"clip_fraction": 0.0,
"completion_length": 3031.3958129882812,
"dapo/avg_reward_std": 0.3008538554696476,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5196078463512308,
"dapo/num_sampling_attempts": 2.125,
"dapo/sampling_efficiency": 76.5625,
"dapo/total_prompts_processed": 12.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.22628571428571428,
"grad_norm": 0.48223650455474854,
"kl": 0.10247802734375,
"learning_rate": 1.0061670936044178e-07,
"loss": 0.0648,
"reward": 0.573589576408267,
"reward_std": 0.9578919112682343,
"step": 198
},
{
"clip_fraction": 0.0,
"completion_length": 2948.3854064941406,
"dapo/avg_reward_std": 0.43072181940078735,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.8333333373069763,
"dapo/num_sampling_attempts": 1.25,
"dapo/sampling_efficiency": 87.5,
"dapo/total_prompts_processed": 7.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.22742857142857142,
"grad_norm": 0.6141620874404907,
"kl": 0.09808349609375,
"learning_rate": 1.002741278414069e-07,
"loss": 0.0827,
"reward": 0.7053878791630268,
"reward_std": 0.9694960787892342,
"step": 199
},
{
"clip_fraction": 0.0,
"completion_length": 2714.482666015625,
"dapo/avg_reward_std": 0.26207208441149804,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.37096774914572317,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 30.624999999999993,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.22857142857142856,
"grad_norm": 0.2072688341140747,
"kl": 0.1064453125,
"learning_rate": 1.0006853717962393e-07,
"loss": 0.0122,
"reward": 0.5771910101175308,
"reward_std": 0.9156405553221703,
"step": 200
},
{
"epoch": 0.22857142857142856,
"step": 200,
"total_flos": 0.0,
"train_loss": 0.02940896774176508,
"train_runtime": 83918.4654,
"train_samples_per_second": 0.114,
"train_steps_per_second": 0.002
}
],
"logging_steps": 1,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}