| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.22857142857142856, | |
| "eval_steps": 500, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2216.625045776367, | |
| "dapo/avg_reward_std": 0.23920068350331536, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3735632248993578, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 33.86904761904762, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.001142857142857143, | |
| "grad_norm": 0.10874509066343307, | |
| "kl": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0468, | |
| "reward": 0.6486758906394243, | |
| "reward_std": 0.9342863708734512, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2926.4757690429688, | |
| "dapo/avg_reward_std": 0.24011585204040303, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3284313836518456, | |
| "dapo/num_sampling_attempts": 4.25, | |
| "dapo/sampling_efficiency": 26.874999999999993, | |
| "dapo/total_prompts_processed": 25.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.002285714285714286, | |
| "grad_norm": 0.12814132869243622, | |
| "kl": 0.0, | |
| "learning_rate": 1e-07, | |
| "loss": 0.0508, | |
| "reward": 0.2922485675662756, | |
| "reward_std": 0.9327598959207535, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2888.1527709960938, | |
| "dapo/avg_reward_std": 0.2903491040070852, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.36111111839612325, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 36.875, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.0034285714285714284, | |
| "grad_norm": 0.1155443787574768, | |
| "kl": 2.9146671295166016e-05, | |
| "learning_rate": 2e-07, | |
| "loss": 0.0647, | |
| "reward": 0.3509849710389972, | |
| "reward_std": 0.9315856546163559, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2535.718734741211, | |
| "dapo/avg_reward_std": 0.25628158891642533, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.35802469595714853, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 41.56249999999999, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.004571428571428572, | |
| "grad_norm": 0.14338600635528564, | |
| "kl": 2.1044164896011353e-05, | |
| "learning_rate": 3e-07, | |
| "loss": 0.0536, | |
| "reward": 0.5615630690008402, | |
| "reward_std": 0.9670609682798386, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2548.916702270508, | |
| "dapo/avg_reward_std": 0.2889887053391029, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.40804598814454573, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 36.875, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.005714285714285714, | |
| "grad_norm": 0.10121661424636841, | |
| "kl": 2.7820467948913574e-05, | |
| "learning_rate": 4e-07, | |
| "loss": 0.0263, | |
| "reward": 0.5986085031181574, | |
| "reward_std": 0.9444186091423035, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2357.579864501953, | |
| "dapo/avg_reward_std": 0.30308351665735245, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.36309524306229185, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 37.5, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.006857142857142857, | |
| "grad_norm": 0.171969935297966, | |
| "kl": 2.6032328605651855e-05, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0906, | |
| "reward": 0.4527070773765445, | |
| "reward_std": 0.9109365493059158, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2404.2534790039062, | |
| "dapo/avg_reward_std": 0.3077041815828394, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.41975309506610586, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 37.916666666666664, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.008, | |
| "grad_norm": 0.12406504899263382, | |
| "kl": 1.9066035747528076e-05, | |
| "learning_rate": 6e-07, | |
| "loss": 0.0645, | |
| "reward": 0.5808906648308039, | |
| "reward_std": 0.9664968773722649, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2833.3056030273438, | |
| "dapo/avg_reward_std": 0.2214778729023472, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.31182796435971416, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 36.577380952380956, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.009142857142857144, | |
| "grad_norm": 0.13480524718761444, | |
| "kl": 3.4965574741363525e-05, | |
| "learning_rate": 7e-07, | |
| "loss": 0.0738, | |
| "reward": 0.5177570842206478, | |
| "reward_std": 0.9147621840238571, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2965.6736450195312, | |
| "dapo/avg_reward_std": 0.2788830002148946, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3888888966154169, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 46.36904761904761, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.010285714285714285, | |
| "grad_norm": 0.08226096630096436, | |
| "kl": 1.4536082744598389e-05, | |
| "learning_rate": 8e-07, | |
| "loss": 0.0316, | |
| "reward": 0.5644797384738922, | |
| "reward_std": 0.9423079788684845, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2574.461814880371, | |
| "dapo/avg_reward_std": 0.3602010520065532, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.588235302883036, | |
| "dapo/num_sampling_attempts": 2.125, | |
| "dapo/sampling_efficiency": 61.45833333333333, | |
| "dapo/total_prompts_processed": 12.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.011428571428571429, | |
| "grad_norm": 0.1667146533727646, | |
| "kl": 2.9319897294044495e-05, | |
| "learning_rate": 9e-07, | |
| "loss": 0.0894, | |
| "reward": 0.6415909845381975, | |
| "reward_std": 0.9869548827409744, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2798.982666015625, | |
| "dapo/avg_reward_std": 0.15393146287117684, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.14880952797830105, | |
| "dapo/num_sampling_attempts": 7.0, | |
| "dapo/sampling_efficiency": 15.882936507936506, | |
| "dapo/total_prompts_processed": 42.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.012571428571428572, | |
| "grad_norm": 0.1166534572839737, | |
| "kl": 2.0567327737808228e-05, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0207, | |
| "reward": 0.2987014357931912, | |
| "reward_std": 0.868266686797142, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2377.555595397949, | |
| "dapo/avg_reward_std": 0.21645361091941595, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2968750037252903, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 38.125, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.013714285714285714, | |
| "grad_norm": 0.23483960330486298, | |
| "kl": 3.6854296922683716e-05, | |
| "learning_rate": 9.997258721585931e-07, | |
| "loss": 0.0491, | |
| "reward": 0.6348252706229687, | |
| "reward_std": 0.9863902181386948, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2688.1111755371094, | |
| "dapo/avg_reward_std": 0.34906478971242905, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.49166667461395264, | |
| "dapo/num_sampling_attempts": 2.5, | |
| "dapo/sampling_efficiency": 52.08333333333333, | |
| "dapo/total_prompts_processed": 15.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.014857142857142857, | |
| "grad_norm": 0.09364266693592072, | |
| "kl": 3.152713179588318e-05, | |
| "learning_rate": 9.989038226169207e-07, | |
| "loss": 0.0431, | |
| "reward": 0.5878111608326435, | |
| "reward_std": 0.9752944633364677, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2029.9132270812988, | |
| "dapo/avg_reward_std": 0.25792322993278505, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.36666666984558105, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 57.5, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.016, | |
| "grad_norm": 0.13894271850585938, | |
| "kl": 4.156678915023804e-05, | |
| "learning_rate": 9.975348529157229e-07, | |
| "loss": 0.0279, | |
| "reward": 0.5834919223561883, | |
| "reward_std": 0.9710095003247261, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2817.8576583862305, | |
| "dapo/avg_reward_std": 0.3106007158756256, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5333333484828472, | |
| "dapo/num_sampling_attempts": 2.5, | |
| "dapo/sampling_efficiency": 52.08333333333333, | |
| "dapo/total_prompts_processed": 15.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.017142857142857144, | |
| "grad_norm": 0.08778129518032074, | |
| "kl": 3.078579902648926e-05, | |
| "learning_rate": 9.956206309337066e-07, | |
| "loss": 0.0343, | |
| "reward": 0.6716702915728092, | |
| "reward_std": 0.99223193526268, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2570.2500076293945, | |
| "dapo/avg_reward_std": 0.244095021715531, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.35897436336829114, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 44.49404761904762, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.018285714285714287, | |
| "grad_norm": 0.07460447400808334, | |
| "kl": 0.00025935471057891846, | |
| "learning_rate": 9.931634888554935e-07, | |
| "loss": 0.0146, | |
| "reward": 0.7213943339884281, | |
| "reward_std": 0.9671430364251137, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2483.413215637207, | |
| "dapo/avg_reward_std": 0.2672279636065165, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.35000000496705375, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 29.166666666666664, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.019428571428571427, | |
| "grad_norm": 0.12397046387195587, | |
| "kl": 0.00022289156913757324, | |
| "learning_rate": 9.901664203302124e-07, | |
| "loss": 0.0624, | |
| "reward": 0.4952134042978287, | |
| "reward_std": 0.9074268043041229, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2537.8194580078125, | |
| "dapo/avg_reward_std": 0.34170445956681905, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5438596567040995, | |
| "dapo/num_sampling_attempts": 2.375, | |
| "dapo/sampling_efficiency": 48.95833333333333, | |
| "dapo/total_prompts_processed": 14.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.02057142857142857, | |
| "grad_norm": 0.1614188253879547, | |
| "kl": 0.0003694295883178711, | |
| "learning_rate": 9.866330768241983e-07, | |
| "loss": 0.1136, | |
| "reward": 0.6263789646327496, | |
| "reward_std": 0.9367138147354126, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2041.2916984558105, | |
| "dapo/avg_reward_std": 0.23441629879402393, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.31818182224577124, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 38.36805555555556, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.021714285714285714, | |
| "grad_norm": 0.2115960717201233, | |
| "kl": 0.0005898326635360718, | |
| "learning_rate": 9.825677631722435e-07, | |
| "loss": 0.0603, | |
| "reward": 0.6228582374751568, | |
| "reward_std": 0.9455358982086182, | |
| "step": 19 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2392.7882385253906, | |
| "dapo/avg_reward_std": 0.22908216629709516, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2952381010566439, | |
| "dapo/num_sampling_attempts": 4.375, | |
| "dapo/sampling_efficiency": 33.541666666666664, | |
| "dapo/total_prompts_processed": 26.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.022857142857142857, | |
| "grad_norm": 0.20383711159229279, | |
| "kl": 0.0008958578109741211, | |
| "learning_rate": 9.779754323328192e-07, | |
| "loss": 0.1313, | |
| "reward": 0.41653589624911547, | |
| "reward_std": 0.9027180448174477, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2966.260452270508, | |
| "dapo/avg_reward_std": 0.16204138861762152, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.25555555986033546, | |
| "dapo/num_sampling_attempts": 5.625, | |
| "dapo/sampling_efficiency": 22.84722222222222, | |
| "dapo/total_prompts_processed": 33.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.024, | |
| "grad_norm": 0.1098903939127922, | |
| "kl": 0.0002017766237258911, | |
| "learning_rate": 9.728616793536587e-07, | |
| "loss": 0.0825, | |
| "reward": 0.43902475386857986, | |
| "reward_std": 0.9111825451254845, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3016.357696533203, | |
| "dapo/avg_reward_std": 0.28799043401427893, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.42028986371081806, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 52.20238095238095, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.025142857142857144, | |
| "grad_norm": 0.1315963715314865, | |
| "kl": 0.0005468130111694336, | |
| "learning_rate": 9.672327345550543e-07, | |
| "loss": 0.0657, | |
| "reward": 0.5281127206981182, | |
| "reward_std": 0.9846171587705612, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2408.8333282470703, | |
| "dapo/avg_reward_std": 0.24506365811383282, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3703703780968984, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 40.74404761904761, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.026285714285714287, | |
| "grad_norm": 0.12457310408353806, | |
| "kl": 0.001109391450881958, | |
| "learning_rate": 9.610954559391704e-07, | |
| "loss": 0.0304, | |
| "reward": 0.6419337540864944, | |
| "reward_std": 0.9689808040857315, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2891.7777709960938, | |
| "dapo/avg_reward_std": 0.2580765459848487, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4420289954413538, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 51.785714285714285, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.027428571428571427, | |
| "grad_norm": 0.09673310071229935, | |
| "kl": 0.0006018728017807007, | |
| "learning_rate": 9.54457320834625e-07, | |
| "loss": 0.0143, | |
| "reward": 0.4589955974370241, | |
| "reward_std": 0.9405186697840691, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2994.8159790039062, | |
| "dapo/avg_reward_std": 0.24148962597052256, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3777777850627899, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 34.99999999999999, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.02857142857142857, | |
| "grad_norm": 0.12189235538244247, | |
| "kl": 0.0021944642066955566, | |
| "learning_rate": 9.473264167865171e-07, | |
| "loss": 0.0869, | |
| "reward": 0.4214114509522915, | |
| "reward_std": 0.918621838092804, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3106.2743530273438, | |
| "dapo/avg_reward_std": 0.21211836412549018, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.23750000558793544, | |
| "dapo/num_sampling_attempts": 5.0, | |
| "dapo/sampling_efficiency": 25.729166666666664, | |
| "dapo/total_prompts_processed": 30.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.029714285714285714, | |
| "grad_norm": 0.11006143689155579, | |
| "kl": 0.002092994749546051, | |
| "learning_rate": 9.397114317029974e-07, | |
| "loss": 0.0617, | |
| "reward": 0.4296974149765447, | |
| "reward_std": 0.9136241301894188, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2601.388946533203, | |
| "dapo/avg_reward_std": 0.24121128850513035, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.28240741416811943, | |
| "dapo/num_sampling_attempts": 4.5, | |
| "dapo/sampling_efficiency": 25.76388888888889, | |
| "dapo/total_prompts_processed": 27.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.030857142857142857, | |
| "grad_norm": 0.11345893889665604, | |
| "kl": 0.003206908702850342, | |
| "learning_rate": 9.316216432703916e-07, | |
| "loss": 0.0926, | |
| "reward": 0.5876726619899273, | |
| "reward_std": 0.9382903277873993, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2861.6180839538574, | |
| "dapo/avg_reward_std": 0.23961352888080809, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3055555605226093, | |
| "dapo/num_sampling_attempts": 4.5, | |
| "dapo/sampling_efficiency": 27.94642857142857, | |
| "dapo/total_prompts_processed": 27.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.032, | |
| "grad_norm": 0.1445908397436142, | |
| "kl": 0.0031346678733825684, | |
| "learning_rate": 9.230669076497687e-07, | |
| "loss": 0.0852, | |
| "reward": 0.40619770623743534, | |
| "reward_std": 0.9506878778338432, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2729.1875, | |
| "dapo/avg_reward_std": 0.24243796567122142, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.35555555919806164, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 39.93055555555555, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.03314285714285714, | |
| "grad_norm": 0.11093314737081528, | |
| "kl": 0.0027089565992355347, | |
| "learning_rate": 9.140576474687263e-07, | |
| "loss": 0.0604, | |
| "reward": 0.6693072468042374, | |
| "reward_std": 0.9926005378365517, | |
| "step": 29 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3155.7083740234375, | |
| "dapo/avg_reward_std": 0.222336781601752, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.311827961956301, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 42.93154761904761, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.03428571428571429, | |
| "grad_norm": 0.08208812773227692, | |
| "kl": 0.001552581787109375, | |
| "learning_rate": 9.046048391230247e-07, | |
| "loss": 0.0268, | |
| "reward": 0.521108225453645, | |
| "reward_std": 0.9469912871718407, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2657.559036254883, | |
| "dapo/avg_reward_std": 0.1865689324008094, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.259259263260497, | |
| "dapo/num_sampling_attempts": 4.5, | |
| "dapo/sampling_efficiency": 51.076388888888886, | |
| "dapo/total_prompts_processed": 27.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.03542857142857143, | |
| "grad_norm": 0.1316680908203125, | |
| "kl": 0.009428024291992188, | |
| "learning_rate": 8.9471999940354e-07, | |
| "loss": 0.0745, | |
| "reward": 0.6315789166837931, | |
| "reward_std": 0.9327967762947083, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3071.7535095214844, | |
| "dapo/avg_reward_std": 0.3048748767375946, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.40000000298023225, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 55.104166666666664, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.036571428571428574, | |
| "grad_norm": 0.10442829132080078, | |
| "kl": 0.0021753311157226562, | |
| "learning_rate": 8.844151714648274e-07, | |
| "loss": 0.0567, | |
| "reward": 0.5447857324033976, | |
| "reward_std": 0.921301856637001, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3025.826416015625, | |
| "dapo/avg_reward_std": 0.23097028769552708, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3385416748933494, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 38.95833333333333, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.037714285714285714, | |
| "grad_norm": 0.09167502820491791, | |
| "kl": 0.003194093704223633, | |
| "learning_rate": 8.737029101523929e-07, | |
| "loss": 0.0612, | |
| "reward": 0.5547973131760955, | |
| "reward_std": 0.9730775579810143, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2558.7812423706055, | |
| "dapo/avg_reward_std": 0.2557758816650936, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3452381007373333, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 42.113095238095234, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.038857142857142854, | |
| "grad_norm": 0.11055821925401688, | |
| "kl": 0.019285082817077637, | |
| "learning_rate": 8.625962667065487e-07, | |
| "loss": 0.0831, | |
| "reward": 0.5826370492577553, | |
| "reward_std": 0.9168377369642258, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2909.2361602783203, | |
| "dapo/avg_reward_std": 0.22593376713414345, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.30645161626800416, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 49.598214285714285, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.04, | |
| "grad_norm": 0.09941194951534271, | |
| "kl": 0.004673004150390625, | |
| "learning_rate": 8.511087728614862e-07, | |
| "loss": 0.0581, | |
| "reward": 0.5392080545425415, | |
| "reward_std": 0.9793680757284164, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2629.3333435058594, | |
| "dapo/avg_reward_std": 0.2632370889186859, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.338541675824672, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 29.513888888888886, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.04114285714285714, | |
| "grad_norm": 0.17353901267051697, | |
| "kl": 0.010207176208496094, | |
| "learning_rate": 8.392544243589427e-07, | |
| "loss": 0.0623, | |
| "reward": 0.5811682712519541, | |
| "reward_std": 0.9331383407115936, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3220.9409790039062, | |
| "dapo/avg_reward_std": 0.2187359256403787, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.29047619913305556, | |
| "dapo/num_sampling_attempts": 4.375, | |
| "dapo/sampling_efficiency": 36.25, | |
| "dapo/total_prompts_processed": 26.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.04228571428571429, | |
| "grad_norm": 0.10708803683519363, | |
| "kl": 0.0023801326751708984, | |
| "learning_rate": 8.270476638965461e-07, | |
| "loss": 0.0657, | |
| "reward": 0.48440539091825485, | |
| "reward_std": 0.9014616012573242, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3233.420135498047, | |
| "dapo/avg_reward_std": 0.2624325007200241, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.30208333721384406, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 30.119047619047617, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.04342857142857143, | |
| "grad_norm": 0.0923333689570427, | |
| "kl": 0.0031156539916992188, | |
| "learning_rate": 8.145033635316128e-07, | |
| "loss": 0.053, | |
| "reward": 0.45120809972286224, | |
| "reward_std": 0.9732232913374901, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2787.031280517578, | |
| "dapo/avg_reward_std": 0.1930955442644301, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.23412698933056422, | |
| "dapo/num_sampling_attempts": 5.25, | |
| "dapo/sampling_efficiency": 23.244047619047617, | |
| "dapo/total_prompts_processed": 31.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.044571428571428574, | |
| "grad_norm": 0.12707453966140747, | |
| "kl": 0.006325244903564453, | |
| "learning_rate": 8.01636806561836e-07, | |
| "loss": 0.0905, | |
| "reward": 0.5048832832835615, | |
| "reward_std": 0.9330806732177734, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2921.6180572509766, | |
| "dapo/avg_reward_std": 0.25906160804960465, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3009259340663751, | |
| "dapo/num_sampling_attempts": 4.5, | |
| "dapo/sampling_efficiency": 31.562499999999996, | |
| "dapo/total_prompts_processed": 27.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.045714285714285714, | |
| "grad_norm": 0.1152920126914978, | |
| "kl": 0.004504203796386719, | |
| "learning_rate": 7.884636689049422e-07, | |
| "loss": 0.0443, | |
| "reward": 0.3671413380652666, | |
| "reward_std": 0.9126428663730621, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3100.8194732666016, | |
| "dapo/avg_reward_std": 0.26266304695087933, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3985507280930229, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 59.895833333333336, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.046857142857142854, | |
| "grad_norm": 0.1462322324514389, | |
| "kl": 0.0058536529541015625, | |
| "learning_rate": 7.75e-07, | |
| "loss": 0.0836, | |
| "reward": 0.6537042334675789, | |
| "reward_std": 0.9643120691180229, | |
| "step": 41 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3083.8541870117188, | |
| "dapo/avg_reward_std": 0.2028282030540354, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.27941177215646296, | |
| "dapo/num_sampling_attempts": 4.25, | |
| "dapo/sampling_efficiency": 34.61309523809524, | |
| "dapo/total_prompts_processed": 25.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.048, | |
| "grad_norm": 0.11620575189590454, | |
| "kl": 0.005963563919067383, | |
| "learning_rate": 7.612622032536507e-07, | |
| "loss": 0.0756, | |
| "reward": 0.6132493373006582, | |
| "reward_std": 0.9271278157830238, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2860.6840209960938, | |
| "dapo/avg_reward_std": 0.2537354379892349, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.31666667262713116, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 27.916666666666664, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.04914285714285714, | |
| "grad_norm": 0.15706917643547058, | |
| "kl": 0.012288570404052734, | |
| "learning_rate": 7.472670160550848e-07, | |
| "loss": 0.0864, | |
| "reward": 0.4896182883530855, | |
| "reward_std": 0.9406783953309059, | |
| "step": 43 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3230.951416015625, | |
| "dapo/avg_reward_std": 0.2785276919603348, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4047619104385376, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 35.20833333333333, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.05028571428571429, | |
| "grad_norm": 0.10281670838594437, | |
| "kl": 0.0028905868530273438, | |
| "learning_rate": 7.330314893841101e-07, | |
| "loss": 0.0474, | |
| "reward": 0.5266857808455825, | |
| "reward_std": 0.9769049882888794, | |
| "step": 44 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2598.888885498047, | |
| "dapo/avg_reward_std": 0.25520460651471066, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3205128231873879, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 54.61309523809524, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.05142857142857143, | |
| "grad_norm": 0.20818237960338593, | |
| "kl": 0.0046825408935546875, | |
| "learning_rate": 7.185729670371604e-07, | |
| "loss": 0.111, | |
| "reward": 0.8208948634564877, | |
| "reward_std": 0.9365335553884506, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2529.66316986084, | |
| "dapo/avg_reward_std": 0.23859836988978916, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2916666753590107, | |
| "dapo/num_sampling_attempts": 4.5, | |
| "dapo/sampling_efficiency": 25.535714285714285, | |
| "dapo/total_prompts_processed": 27.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.052571428571428575, | |
| "grad_norm": 0.12924660742282867, | |
| "kl": 0.05440711975097656, | |
| "learning_rate": 7.039090644965509e-07, | |
| "loss": 0.058, | |
| "reward": 0.5307688321918249, | |
| "reward_std": 0.9391194358468056, | |
| "step": 46 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2737.288230895996, | |
| "dapo/avg_reward_std": 0.25754969901052016, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3678160998327979, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 41.14583333333333, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.053714285714285714, | |
| "grad_norm": 0.1452113687992096, | |
| "kl": 0.01877737045288086, | |
| "learning_rate": 6.890576474687263e-07, | |
| "loss": 0.0601, | |
| "reward": 0.5596560873091221, | |
| "reward_std": 0.9911476969718933, | |
| "step": 47 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2543.0694885253906, | |
| "dapo/avg_reward_std": 0.2434165603839434, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3717948794364929, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 34.37499999999999, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.054857142857142854, | |
| "grad_norm": 0.15664616227149963, | |
| "kl": 0.008816719055175781, | |
| "learning_rate": 6.740368101176495e-07, | |
| "loss": 0.0783, | |
| "reward": 0.7667456082999706, | |
| "reward_std": 0.9330208897590637, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3054.357666015625, | |
| "dapo/avg_reward_std": 0.16933719928448016, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.22222222693455526, | |
| "dapo/num_sampling_attempts": 4.875, | |
| "dapo/sampling_efficiency": 26.5625, | |
| "dapo/total_prompts_processed": 29.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.056, | |
| "grad_norm": 0.13884593546390533, | |
| "kl": 0.00569915771484375, | |
| "learning_rate": 6.588648530198504e-07, | |
| "loss": 0.0645, | |
| "reward": 0.7750914767384529, | |
| "reward_std": 0.9781928732991219, | |
| "step": 49 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3030.9652709960938, | |
| "dapo/avg_reward_std": 0.2089548914721518, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.28282828629016876, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 33.779761904761905, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.05714285714285714, | |
| "grad_norm": 0.13095000386238098, | |
| "kl": 0.005908966064453125, | |
| "learning_rate": 6.435602608679916e-07, | |
| "loss": 0.0854, | |
| "reward": 0.7626989148557186, | |
| "reward_std": 0.9684056863188744, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3176.8819274902344, | |
| "dapo/avg_reward_std": 0.2258962235516972, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.29629630057348144, | |
| "dapo/num_sampling_attempts": 4.5, | |
| "dapo/sampling_efficiency": 33.25892857142857, | |
| "dapo/total_prompts_processed": 27.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.05828571428571429, | |
| "grad_norm": 0.11041354387998581, | |
| "kl": 0.002262115478515625, | |
| "learning_rate": 6.281416799501187e-07, | |
| "loss": 0.0892, | |
| "reward": 0.6493857521563768, | |
| "reward_std": 0.9608959034085274, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2991.208366394043, | |
| "dapo/avg_reward_std": 0.23346692004374095, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3392857201397419, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 52.70833333333333, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.05942857142857143, | |
| "grad_norm": 0.13827170431613922, | |
| "kl": 0.014558792114257812, | |
| "learning_rate": 6.126278954320294e-07, | |
| "loss": 0.0435, | |
| "reward": 0.5274152141064405, | |
| "reward_std": 0.9937505125999451, | |
| "step": 52 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2921.013946533203, | |
| "dapo/avg_reward_std": 0.2715419438378564, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3390804626818361, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 51.5625, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.060571428571428575, | |
| "grad_norm": 0.09735170006752014, | |
| "kl": 0.009172439575195312, | |
| "learning_rate": 5.97037808470444e-07, | |
| "loss": 0.0541, | |
| "reward": 0.7217882052063942, | |
| "reward_std": 0.9594404622912407, | |
| "step": 53 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3133.46875, | |
| "dapo/avg_reward_std": 0.2624934350068753, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.35256411077884525, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 41.041666666666664, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.061714285714285715, | |
| "grad_norm": 0.10414379835128784, | |
| "kl": 0.010915756225585938, | |
| "learning_rate": 5.813904131848564e-07, | |
| "loss": 0.061, | |
| "reward": 0.5302782151848078, | |
| "reward_std": 0.9707583636045456, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3010.5938110351562, | |
| "dapo/avg_reward_std": 0.21664191484451295, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.24444444941149818, | |
| "dapo/num_sampling_attempts": 5.625, | |
| "dapo/sampling_efficiency": 19.791666666666664, | |
| "dapo/total_prompts_processed": 33.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.06285714285714286, | |
| "grad_norm": 0.11232081800699234, | |
| "kl": 0.012262344360351562, | |
| "learning_rate": 5.657047735161255e-07, | |
| "loss": 0.0561, | |
| "reward": 0.5284321270883083, | |
| "reward_std": 0.9165859594941139, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3144.951416015625, | |
| "dapo/avg_reward_std": 0.2279102834207671, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.34523809807641165, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 45.32738095238095, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.064, | |
| "grad_norm": 0.13161872327327728, | |
| "kl": 0.007735252380371094, | |
| "learning_rate": 5.5e-07, | |
| "loss": 0.0717, | |
| "reward": 0.6519734226167202, | |
| "reward_std": 0.9642440155148506, | |
| "step": 56 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3222.6111450195312, | |
| "dapo/avg_reward_std": 0.2675224413042483, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4492753724689069, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 45.535714285714285, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.06514285714285714, | |
| "grad_norm": 0.09332293272018433, | |
| "kl": 0.0064525604248046875, | |
| "learning_rate": 5.342952264838747e-07, | |
| "loss": 0.0302, | |
| "reward": 0.5501165799796581, | |
| "reward_std": 0.9585564360022545, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2679.9236907958984, | |
| "dapo/avg_reward_std": 0.17708626160254845, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.24358974741055414, | |
| "dapo/num_sampling_attempts": 4.875, | |
| "dapo/sampling_efficiency": 28.91865079365079, | |
| "dapo/total_prompts_processed": 29.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.06628571428571428, | |
| "grad_norm": 0.16309793293476105, | |
| "kl": 0.01690673828125, | |
| "learning_rate": 5.186095868151436e-07, | |
| "loss": 0.0846, | |
| "reward": 0.8469000309705734, | |
| "reward_std": 0.9497043192386627, | |
| "step": 58 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2847.048629760742, | |
| "dapo/avg_reward_std": 0.2622834824282548, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3563218476443455, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 29.999999999999993, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.06742857142857143, | |
| "grad_norm": 0.09638360142707825, | |
| "kl": 0.0057086944580078125, | |
| "learning_rate": 5.02962191529556e-07, | |
| "loss": 0.0634, | |
| "reward": 0.6089529246091843, | |
| "reward_std": 0.9450863003730774, | |
| "step": 59 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3119.9132385253906, | |
| "dapo/avg_reward_std": 0.19833819533503333, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2674418656631958, | |
| "dapo/num_sampling_attempts": 5.375, | |
| "dapo/sampling_efficiency": 29.563492063492063, | |
| "dapo/total_prompts_processed": 32.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.06857142857142857, | |
| "grad_norm": 0.1252850890159607, | |
| "kl": 0.008715629577636719, | |
| "learning_rate": 4.873721045679706e-07, | |
| "loss": 0.0666, | |
| "reward": 0.5249154977500439, | |
| "reward_std": 0.947566568851471, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2844.795181274414, | |
| "dapo/avg_reward_std": 0.2648707001373686, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.35632184610284606, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 39.791666666666664, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.06971428571428571, | |
| "grad_norm": 0.10366301238536835, | |
| "kl": 0.056069374084472656, | |
| "learning_rate": 4.7185832004988133e-07, | |
| "loss": 0.037, | |
| "reward": 0.5161248315125704, | |
| "reward_std": 0.9692364558577538, | |
| "step": 61 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3053.951446533203, | |
| "dapo/avg_reward_std": 0.21576767837679064, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.25225225574261434, | |
| "dapo/num_sampling_attempts": 4.625, | |
| "dapo/sampling_efficiency": 37.013888888888886, | |
| "dapo/total_prompts_processed": 27.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.07085714285714285, | |
| "grad_norm": 0.14441759884357452, | |
| "kl": 0.009164810180664062, | |
| "learning_rate": 4.5643973913200837e-07, | |
| "loss": 0.0609, | |
| "reward": 0.6510533541440964, | |
| "reward_std": 0.9361515268683434, | |
| "step": 62 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3326.781280517578, | |
| "dapo/avg_reward_std": 0.2158982500885472, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3181818254066236, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 44.49404761904761, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.072, | |
| "grad_norm": 0.12127737700939178, | |
| "kl": 0.031108856201171875, | |
| "learning_rate": 4.4113514698014953e-07, | |
| "loss": 0.0463, | |
| "reward": 0.45860649459064007, | |
| "reward_std": 0.9209225550293922, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3208.6319885253906, | |
| "dapo/avg_reward_std": 0.28419332668699065, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3563218440475135, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 53.591269841269835, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.07314285714285715, | |
| "grad_norm": 0.13326792418956757, | |
| "kl": 0.0061321258544921875, | |
| "learning_rate": 4.2596318988235037e-07, | |
| "loss": 0.0614, | |
| "reward": 0.5644803196191788, | |
| "reward_std": 0.9919605851173401, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2597.437530517578, | |
| "dapo/avg_reward_std": 0.2766759342380932, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3928571529686451, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 32.08333333333333, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.07428571428571429, | |
| "grad_norm": 0.10434358566999435, | |
| "kl": 0.049472808837890625, | |
| "learning_rate": 4.1094235253127374e-07, | |
| "loss": 0.0312, | |
| "reward": 0.393868962302804, | |
| "reward_std": 0.9459580257534981, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2630.0833587646484, | |
| "dapo/avg_reward_std": 0.25837596147148695, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.35802469595714853, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 31.666666666666664, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.07542857142857143, | |
| "grad_norm": 0.11327924579381943, | |
| "kl": 0.23560714721679688, | |
| "learning_rate": 3.9609093550344907e-07, | |
| "loss": 0.0563, | |
| "reward": 0.674448698759079, | |
| "reward_std": 0.9591537117958069, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3343.3159790039062, | |
| "dapo/avg_reward_std": 0.2785816714167595, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.41666667101283866, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 42.08333333333333, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.07657142857142857, | |
| "grad_norm": 0.10341926664113998, | |
| "kl": 0.005463600158691406, | |
| "learning_rate": 3.8142703296283953e-07, | |
| "loss": 0.0653, | |
| "reward": 0.42072685062885284, | |
| "reward_std": 0.9649706333875656, | |
| "step": 67 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2880.0590438842773, | |
| "dapo/avg_reward_std": 0.2447407204243872, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2916666728754838, | |
| "dapo/num_sampling_attempts": 4.5, | |
| "dapo/sampling_efficiency": 28.591269841269842, | |
| "dapo/total_prompts_processed": 27.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.07771428571428571, | |
| "grad_norm": 0.15764088928699493, | |
| "kl": 0.011991500854492188, | |
| "learning_rate": 3.6696851061588994e-07, | |
| "loss": 0.1004, | |
| "reward": 0.537701515480876, | |
| "reward_std": 0.9107673466205597, | |
| "step": 68 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2839.0069580078125, | |
| "dapo/avg_reward_std": 0.21828406437849388, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.26495726865071517, | |
| "dapo/num_sampling_attempts": 4.875, | |
| "dapo/sampling_efficiency": 32.39583333333333, | |
| "dapo/total_prompts_processed": 29.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.07885714285714286, | |
| "grad_norm": 0.1426348239183426, | |
| "kl": 0.16588592529296875, | |
| "learning_rate": 3.5273298394491515e-07, | |
| "loss": 0.065, | |
| "reward": 0.5752462260425091, | |
| "reward_std": 0.9265653118491173, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3154.9479370117188, | |
| "dapo/avg_reward_std": 0.24686445650600253, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.40476191185769583, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 58.75, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08, | |
| "grad_norm": 0.09042708575725555, | |
| "kl": 0.015224456787109375, | |
| "learning_rate": 3.387377967463493e-07, | |
| "loss": 0.0278, | |
| "reward": 0.5091124139726162, | |
| "reward_std": 0.9951601624488831, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2558.7118377685547, | |
| "dapo/avg_reward_std": 0.24922772922686168, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.35119048452803064, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 41.979166666666664, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08114285714285714, | |
| "grad_norm": 0.18424691259860992, | |
| "kl": 0.012338638305664062, | |
| "learning_rate": 3.250000000000001e-07, | |
| "loss": 0.135, | |
| "reward": 0.80832345969975, | |
| "reward_std": 0.9256910160183907, | |
| "step": 71 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2797.5659790039062, | |
| "dapo/avg_reward_std": 0.3421325541677929, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4603174655210404, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 52.916666666666664, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08228571428571428, | |
| "grad_norm": 0.10505988448858261, | |
| "kl": 0.027385711669921875, | |
| "learning_rate": 3.115363310950578e-07, | |
| "loss": 0.0435, | |
| "reward": 0.5198174491524696, | |
| "reward_std": 0.932801865041256, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3024.5243225097656, | |
| "dapo/avg_reward_std": 0.26287247288611626, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.33333334038334506, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 40.0297619047619, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08342857142857144, | |
| "grad_norm": 0.09084703773260117, | |
| "kl": 0.09223747253417969, | |
| "learning_rate": 2.9836319343816397e-07, | |
| "loss": 0.0314, | |
| "reward": 0.3449883237481117, | |
| "reward_std": 0.9521737843751907, | |
| "step": 73 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2648.7257080078125, | |
| "dapo/avg_reward_std": 0.2678213362340574, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.38271605582148943, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 40.0, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08457142857142858, | |
| "grad_norm": 0.15155129134655, | |
| "kl": 1.0743579864501953, | |
| "learning_rate": 2.854966364683872e-07, | |
| "loss": 0.0851, | |
| "reward": 0.7227161657065153, | |
| "reward_std": 0.9239719212055206, | |
| "step": 74 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2659.388900756836, | |
| "dapo/avg_reward_std": 0.28101804742106684, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.37037037699310865, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 34.791666666666664, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08571428571428572, | |
| "grad_norm": 0.1127755343914032, | |
| "kl": 0.02587890625, | |
| "learning_rate": 2.729523361034538e-07, | |
| "loss": 0.0523, | |
| "reward": 0.7372388476505876, | |
| "reward_std": 0.918749064207077, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2402.364585876465, | |
| "dapo/avg_reward_std": 0.26893362632164586, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.36538462111583125, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 48.854166666666664, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08685714285714285, | |
| "grad_norm": 0.14693324267864227, | |
| "kl": 0.12501144409179688, | |
| "learning_rate": 2.6074557564105724e-07, | |
| "loss": 0.0747, | |
| "reward": 0.6182113699615002, | |
| "reward_std": 0.9421844929456711, | |
| "step": 76 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2970.1146392822266, | |
| "dapo/avg_reward_std": 0.2118390180170536, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.25000000521540644, | |
| "dapo/num_sampling_attempts": 5.0, | |
| "dapo/sampling_efficiency": 30.53571428571428, | |
| "dapo/total_prompts_processed": 30.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.088, | |
| "grad_norm": 0.12072475999593735, | |
| "kl": 0.05495643615722656, | |
| "learning_rate": 2.488912271385139e-07, | |
| "loss": 0.0498, | |
| "reward": 0.46035338938236237, | |
| "reward_std": 0.9146044701337814, | |
| "step": 77 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2959.0972442626953, | |
| "dapo/avg_reward_std": 0.13832776496807733, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.1631944477558136, | |
| "dapo/num_sampling_attempts": 6.0, | |
| "dapo/sampling_efficiency": 30.868055555555557, | |
| "dapo/total_prompts_processed": 36.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08914285714285715, | |
| "grad_norm": 0.14289411902427673, | |
| "kl": 0.23297691345214844, | |
| "learning_rate": 2.374037332934512e-07, | |
| "loss": 0.0742, | |
| "reward": 0.49553669430315495, | |
| "reward_std": 0.9023259580135345, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2935.8159942626953, | |
| "dapo/avg_reward_std": 0.2931290553374724, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.44696970080787485, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 58.854166666666664, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09028571428571429, | |
| "grad_norm": 0.13638050854206085, | |
| "kl": 0.03482818603515625, | |
| "learning_rate": 2.2629708984760706e-07, | |
| "loss": 0.0609, | |
| "reward": 0.4563083341345191, | |
| "reward_std": 0.9425384849309921, | |
| "step": 79 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3111.340301513672, | |
| "dapo/avg_reward_std": 0.22562272967518987, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3063063154349456, | |
| "dapo/num_sampling_attempts": 4.625, | |
| "dapo/sampling_efficiency": 28.819444444444446, | |
| "dapo/total_prompts_processed": 27.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09142857142857143, | |
| "grad_norm": 0.10739335417747498, | |
| "kl": 0.008031845092773438, | |
| "learning_rate": 2.1558482853517253e-07, | |
| "loss": 0.0574, | |
| "reward": 0.6980459969490767, | |
| "reward_std": 0.9673654958605766, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2921.6111450195312, | |
| "dapo/avg_reward_std": 0.2788313144239886, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3333333386429425, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 34.27083333333333, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09257142857142857, | |
| "grad_norm": 0.18038466572761536, | |
| "kl": 0.016963958740234375, | |
| "learning_rate": 2.0528000059645995e-07, | |
| "loss": 0.0958, | |
| "reward": 0.6405055914074183, | |
| "reward_std": 0.9560460075736046, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3220.687530517578, | |
| "dapo/avg_reward_std": 0.1744266465688363, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2307692349721224, | |
| "dapo/num_sampling_attempts": 4.875, | |
| "dapo/sampling_efficiency": 26.666666666666664, | |
| "dapo/total_prompts_processed": 29.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09371428571428571, | |
| "grad_norm": 0.12377161532640457, | |
| "kl": 0.009552001953125, | |
| "learning_rate": 1.9539516087697517e-07, | |
| "loss": 0.061, | |
| "reward": 0.5073397234082222, | |
| "reward_std": 0.9641925543546677, | |
| "step": 82 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2663.1597442626953, | |
| "dapo/avg_reward_std": 0.2496542421079451, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3333333413447103, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 40.451388888888886, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09485714285714286, | |
| "grad_norm": 0.1273493468761444, | |
| "kl": 0.04001617431640625, | |
| "learning_rate": 1.8594235253127372e-07, | |
| "loss": 0.0521, | |
| "reward": 0.49824655149132013, | |
| "reward_std": 0.9464590474963188, | |
| "step": 83 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3073.2986450195312, | |
| "dapo/avg_reward_std": 0.27911247177557513, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4318181892687624, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 60.3125, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.096, | |
| "grad_norm": 0.14399568736553192, | |
| "kl": 0.010408401489257812, | |
| "learning_rate": 1.7693309235023127e-07, | |
| "loss": 0.0657, | |
| "reward": 0.624765045940876, | |
| "reward_std": 0.954634428024292, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3073.7535095214844, | |
| "dapo/avg_reward_std": 0.17655213298024358, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.24324324888152046, | |
| "dapo/num_sampling_attempts": 4.625, | |
| "dapo/sampling_efficiency": 29.82142857142857, | |
| "dapo/total_prompts_processed": 27.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09714285714285714, | |
| "grad_norm": 0.12462300807237625, | |
| "kl": 0.007053375244140625, | |
| "learning_rate": 1.6837835672960831e-07, | |
| "loss": 0.062, | |
| "reward": 0.6820014184340835, | |
| "reward_std": 0.8695997595787048, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2741.204849243164, | |
| "dapo/avg_reward_std": 0.21997538357973098, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2666666731238365, | |
| "dapo/num_sampling_attempts": 5.0, | |
| "dapo/sampling_efficiency": 27.896825396825395, | |
| "dapo/total_prompts_processed": 30.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09828571428571428, | |
| "grad_norm": 0.14978615939617157, | |
| "kl": 0.025630950927734375, | |
| "learning_rate": 1.6028856829700258e-07, | |
| "loss": 0.0585, | |
| "reward": 0.5304304007440805, | |
| "reward_std": 0.9523463025689125, | |
| "step": 86 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3223.7257080078125, | |
| "dapo/avg_reward_std": 0.27104776600996655, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3666666716337204, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 43.333333333333336, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09942857142857142, | |
| "grad_norm": 0.1086694523692131, | |
| "kl": 0.009660720825195312, | |
| "learning_rate": 1.5267358321348285e-07, | |
| "loss": 0.058, | |
| "reward": 0.5936380252242088, | |
| "reward_std": 0.919317290186882, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2934.5833740234375, | |
| "dapo/avg_reward_std": 0.23462909049001232, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3333333376152762, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 52.84722222222222, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10057142857142858, | |
| "grad_norm": 0.14571106433868408, | |
| "kl": 0.02588653564453125, | |
| "learning_rate": 1.4554267916537495e-07, | |
| "loss": 0.0741, | |
| "reward": 0.5716092269867659, | |
| "reward_std": 0.9475584626197815, | |
| "step": 88 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3017.2673950195312, | |
| "dapo/avg_reward_std": 0.22858241697152457, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3333333383003871, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 45.416666666666664, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10171428571428572, | |
| "grad_norm": 0.10647116601467133, | |
| "kl": 0.034389495849609375, | |
| "learning_rate": 1.3890454406082956e-07, | |
| "loss": 0.0586, | |
| "reward": 0.5356123449746519, | |
| "reward_std": 0.9426311627030373, | |
| "step": 89 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2540.9548950195312, | |
| "dapo/avg_reward_std": 0.16863613526026408, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.22592593100335862, | |
| "dapo/num_sampling_attempts": 5.625, | |
| "dapo/sampling_efficiency": 28.75, | |
| "dapo/total_prompts_processed": 33.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10285714285714286, | |
| "grad_norm": 0.1207195371389389, | |
| "kl": 0.7361793518066406, | |
| "learning_rate": 1.3276726544494571e-07, | |
| "loss": 0.0349, | |
| "reward": 0.750616230070591, | |
| "reward_std": 1.0088519006967545, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3054.5833435058594, | |
| "dapo/avg_reward_std": 0.2058313423767686, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.28645834047347307, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 41.36904761904762, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.104, | |
| "grad_norm": 0.10659411549568176, | |
| "kl": 0.009166717529296875, | |
| "learning_rate": 1.2713832064634125e-07, | |
| "loss": 0.06, | |
| "reward": 0.49192704539746046, | |
| "reward_std": 0.8957021087408066, | |
| "step": 91 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2958.43408203125, | |
| "dapo/avg_reward_std": 0.317311546076899, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.47101450160793634, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 44.166666666666664, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10514285714285715, | |
| "grad_norm": 0.1002211645245552, | |
| "kl": 0.00801849365234375, | |
| "learning_rate": 1.220245676671809e-07, | |
| "loss": 0.0508, | |
| "reward": 0.7598672257736325, | |
| "reward_std": 0.9218961223959923, | |
| "step": 92 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3257.7881774902344, | |
| "dapo/avg_reward_std": 0.2586492033941405, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.36309524678758215, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 41.5625, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10628571428571429, | |
| "grad_norm": 0.12036111950874329, | |
| "kl": 0.01373291015625, | |
| "learning_rate": 1.1743223682775649e-07, | |
| "loss": 0.0459, | |
| "reward": 0.5575436241924763, | |
| "reward_std": 0.9431066736578941, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2740.1284942626953, | |
| "dapo/avg_reward_std": 0.2375115204241968, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.35483871688765867, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 29.999999999999996, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10742857142857143, | |
| "grad_norm": 0.14863841235637665, | |
| "kl": 0.032642364501953125, | |
| "learning_rate": 1.1336692317580158e-07, | |
| "loss": 0.0742, | |
| "reward": 0.5738632343709469, | |
| "reward_std": 0.9468542039394379, | |
| "step": 94 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2899.937515258789, | |
| "dapo/avg_reward_std": 0.2901096656208947, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4206349246558689, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 64.58333333333333, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10857142857142857, | |
| "grad_norm": 0.13841120898723602, | |
| "kl": 0.012683868408203125, | |
| "learning_rate": 1.0983357966978745e-07, | |
| "loss": 0.0653, | |
| "reward": 0.6555321607738733, | |
| "reward_std": 0.9674765914678574, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2926.1910247802734, | |
| "dapo/avg_reward_std": 0.18252932499436772, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2696078485425781, | |
| "dapo/num_sampling_attempts": 4.25, | |
| "dapo/sampling_efficiency": 37.82738095238095, | |
| "dapo/total_prompts_processed": 25.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10971428571428571, | |
| "grad_norm": 0.13530230522155762, | |
| "kl": 0.05282402038574219, | |
| "learning_rate": 1.068365111445064e-07, | |
| "loss": 0.0762, | |
| "reward": 0.5449853939935565, | |
| "reward_std": 0.952080488204956, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2798.031280517578, | |
| "dapo/avg_reward_std": 0.23633464597738707, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3461538478732109, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 38.541666666666664, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.11085714285714286, | |
| "grad_norm": 0.1648494303226471, | |
| "kl": 0.025691986083984375, | |
| "learning_rate": 1.0437936906629334e-07, | |
| "loss": 0.0939, | |
| "reward": 0.673285935074091, | |
| "reward_std": 0.979133740067482, | |
| "step": 97 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3240.7361450195312, | |
| "dapo/avg_reward_std": 0.2805523918225215, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3653846193964665, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 54.513888888888886, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.112, | |
| "grad_norm": 0.12132810056209564, | |
| "kl": 0.01453399658203125, | |
| "learning_rate": 1.0246514708427701e-07, | |
| "loss": 0.0557, | |
| "reward": 0.5335402796044946, | |
| "reward_std": 0.9456770345568657, | |
| "step": 98 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2497.9132499694824, | |
| "dapo/avg_reward_std": 0.2488528937101364, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3888888942698638, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 58.05555555555555, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.11314285714285714, | |
| "grad_norm": 0.24999241530895233, | |
| "kl": 0.028301239013671875, | |
| "learning_rate": 1.0109617738307911e-07, | |
| "loss": 0.1037, | |
| "reward": 0.785055335611105, | |
| "reward_std": 0.9553829357028008, | |
| "step": 99 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3039.6284790039062, | |
| "dapo/avg_reward_std": 0.2903642791012923, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.38194445086022216, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 46.24999999999999, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.11428571428571428, | |
| "grad_norm": 0.14126254618167877, | |
| "kl": 0.014410018920898438, | |
| "learning_rate": 1.002741278414069e-07, | |
| "loss": 0.0643, | |
| "reward": 0.4948624651879072, | |
| "reward_std": 0.9704382866621017, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3318.513916015625, | |
| "dapo/avg_reward_std": 0.22042016812733242, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.29523810063089645, | |
| "dapo/num_sampling_attempts": 4.375, | |
| "dapo/sampling_efficiency": 28.645833333333332, | |
| "dapo/total_prompts_processed": 26.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.11542857142857142, | |
| "grad_norm": 0.22150926291942596, | |
| "kl": 0.011791229248046875, | |
| "learning_rate": 1e-07, | |
| "loss": 0.0631, | |
| "reward": 0.46524661034345627, | |
| "reward_std": 0.9665903598070145, | |
| "step": 101 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3083.875, | |
| "dapo/avg_reward_std": 0.21663353669232335, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3390804637095024, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 39.93055555555555, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.11657142857142858, | |
| "grad_norm": 0.16289636492729187, | |
| "kl": 0.008695602416992188, | |
| "learning_rate": 6.203955092681039e-07, | |
| "loss": 0.098, | |
| "reward": 0.8642945289611816, | |
| "reward_std": 1.031830094754696, | |
| "step": 102 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3364.701446533203, | |
| "dapo/avg_reward_std": 0.24887267331923207, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3172043090866458, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 31.69642857142857, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.11771428571428572, | |
| "grad_norm": 0.08825232833623886, | |
| "kl": 0.009820938110351562, | |
| "learning_rate": 6.126278954320294e-07, | |
| "loss": 0.0178, | |
| "reward": 0.3627179069444537, | |
| "reward_std": 0.8941863179206848, | |
| "step": 103 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3255.3055725097656, | |
| "dapo/avg_reward_std": 0.24808817549988074, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.33950618074999916, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 38.95833333333333, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.11885714285714286, | |
| "grad_norm": 0.13638561964035034, | |
| "kl": 0.011318206787109375, | |
| "learning_rate": 6.048412045323164e-07, | |
| "loss": 0.0643, | |
| "reward": 0.5508436523377895, | |
| "reward_std": 0.9409585371613503, | |
| "step": 104 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3270.4930419921875, | |
| "dapo/avg_reward_std": 0.23700118958950042, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3166666706403097, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 61.07142857142857, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12, | |
| "grad_norm": 0.10357476025819778, | |
| "kl": 0.0117034912109375, | |
| "learning_rate": 5.97037808470444e-07, | |
| "loss": 0.0278, | |
| "reward": 0.4137148158624768, | |
| "reward_std": 0.9205853268504143, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3118.9584045410156, | |
| "dapo/avg_reward_std": 0.22452521603554487, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3333333395421505, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 28.869047619047613, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12114285714285715, | |
| "grad_norm": 0.11885393410921097, | |
| "kl": 0.011783599853515625, | |
| "learning_rate": 5.892200842364462e-07, | |
| "loss": 0.0786, | |
| "reward": 0.673494272865355, | |
| "reward_std": 0.9388571679592133, | |
| "step": 106 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3183.666717529297, | |
| "dapo/avg_reward_std": 0.23609773551716523, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.30882353467099805, | |
| "dapo/num_sampling_attempts": 4.25, | |
| "dapo/sampling_efficiency": 37.74305555555556, | |
| "dapo/total_prompts_processed": 25.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12228571428571429, | |
| "grad_norm": 0.13629400730133057, | |
| "kl": 0.0092010498046875, | |
| "learning_rate": 5.813904131848564e-07, | |
| "loss": 0.0615, | |
| "reward": 0.5680118557065725, | |
| "reward_std": 0.8982010260224342, | |
| "step": 107 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3170.263916015625, | |
| "dapo/avg_reward_std": 0.21017570431168014, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3018018079770578, | |
| "dapo/num_sampling_attempts": 4.625, | |
| "dapo/sampling_efficiency": 30.625, | |
| "dapo/total_prompts_processed": 27.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12342857142857143, | |
| "grad_norm": 0.1134539544582367, | |
| "kl": 0.010692596435546875, | |
| "learning_rate": 5.735511803093248e-07, | |
| "loss": 0.0433, | |
| "reward": 0.6368884779512882, | |
| "reward_std": 0.9655679985880852, | |
| "step": 108 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2938.5243530273438, | |
| "dapo/avg_reward_std": 0.30796096875117374, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3974359052685591, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 38.95833333333333, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12457142857142857, | |
| "grad_norm": 0.16064728796482086, | |
| "kl": 0.014812469482421875, | |
| "learning_rate": 5.657047735161255e-07, | |
| "loss": 0.0874, | |
| "reward": 0.4405923653393984, | |
| "reward_std": 0.899710550904274, | |
| "step": 109 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3333.5556030273438, | |
| "dapo/avg_reward_std": 0.17683410130698105, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.28735632475080164, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 40.104166666666664, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12571428571428572, | |
| "grad_norm": 0.1374766230583191, | |
| "kl": 0.00823211669921875, | |
| "learning_rate": 5.578535828967777e-07, | |
| "loss": 0.0525, | |
| "reward": 0.6373127717524767, | |
| "reward_std": 0.949370414018631, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3404.166717529297, | |
| "dapo/avg_reward_std": 0.2707539377734065, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3437500074505806, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 28.124999999999996, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12685714285714286, | |
| "grad_norm": 0.09096160531044006, | |
| "kl": 0.0152435302734375, | |
| "learning_rate": 5.5e-07, | |
| "loss": 0.0286, | |
| "reward": 0.4166172882542014, | |
| "reward_std": 0.9417606145143509, | |
| "step": 111 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3306.263946533203, | |
| "dapo/avg_reward_std": 0.17227381931410896, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.21481482055452134, | |
| "dapo/num_sampling_attempts": 5.625, | |
| "dapo/sampling_efficiency": 27.395833333333332, | |
| "dapo/total_prompts_processed": 33.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.128, | |
| "grad_norm": 0.11950567364692688, | |
| "kl": 0.01320648193359375, | |
| "learning_rate": 5.421464171032224e-07, | |
| "loss": 0.0449, | |
| "reward": 0.4937558462843299, | |
| "reward_std": 0.9720155894756317, | |
| "step": 112 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3117.1979064941406, | |
| "dapo/avg_reward_std": 0.30339551545106447, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3846153886272357, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 38.95833333333333, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12914285714285714, | |
| "grad_norm": 0.15823398530483246, | |
| "kl": 0.01418304443359375, | |
| "learning_rate": 5.342952264838747e-07, | |
| "loss": 0.0743, | |
| "reward": 0.5596551271155477, | |
| "reward_std": 0.8979872986674309, | |
| "step": 113 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3239.031280517578, | |
| "dapo/avg_reward_std": 0.24120492219924927, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.34000000298023225, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 56.770833333333336, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.13028571428571428, | |
| "grad_norm": 0.20106364786624908, | |
| "kl": 0.01206207275390625, | |
| "learning_rate": 5.264488196906752e-07, | |
| "loss": 0.0817, | |
| "reward": 0.697497084736824, | |
| "reward_std": 0.9489930346608162, | |
| "step": 114 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3197.2430725097656, | |
| "dapo/avg_reward_std": 0.20663932577157632, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.26495727056112045, | |
| "dapo/num_sampling_attempts": 4.875, | |
| "dapo/sampling_efficiency": 38.4375, | |
| "dapo/total_prompts_processed": 29.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.13142857142857142, | |
| "grad_norm": 0.15399962663650513, | |
| "kl": 0.015567779541015625, | |
| "learning_rate": 5.186095868151436e-07, | |
| "loss": 0.0667, | |
| "reward": 0.5802914081141353, | |
| "reward_std": 0.9295158162713051, | |
| "step": 115 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3272.6007080078125, | |
| "dapo/avg_reward_std": 0.22710687816143035, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3166666701436043, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 37.61904761904762, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.13257142857142856, | |
| "grad_norm": 0.140142023563385, | |
| "kl": 0.01934814453125, | |
| "learning_rate": 5.107799157635538e-07, | |
| "loss": 0.0611, | |
| "reward": 0.6176847349852324, | |
| "reward_std": 0.944318100810051, | |
| "step": 116 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3268.4305725097656, | |
| "dapo/avg_reward_std": 0.23266587586238466, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.344827591345228, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 38.125, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1337142857142857, | |
| "grad_norm": 0.1582440286874771, | |
| "kl": 0.01198577880859375, | |
| "learning_rate": 5.02962191529556e-07, | |
| "loss": 0.0556, | |
| "reward": 0.5785031230188906, | |
| "reward_std": 0.954645112156868, | |
| "step": 117 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2941.9722595214844, | |
| "dapo/avg_reward_std": 0.24969401342027328, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3284313814604984, | |
| "dapo/num_sampling_attempts": 4.25, | |
| "dapo/sampling_efficiency": 27.20238095238095, | |
| "dapo/total_prompts_processed": 25.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.13485714285714287, | |
| "grad_norm": 0.1869765818119049, | |
| "kl": 0.01676177978515625, | |
| "learning_rate": 4.951587954676837e-07, | |
| "loss": 0.1063, | |
| "reward": 0.6486848145723343, | |
| "reward_std": 0.9332743212580681, | |
| "step": 118 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3206.982635498047, | |
| "dapo/avg_reward_std": 0.20580977627209254, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.26666667333671024, | |
| "dapo/num_sampling_attempts": 4.375, | |
| "dapo/sampling_efficiency": 41.28472222222222, | |
| "dapo/total_prompts_processed": 26.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.136, | |
| "grad_norm": 0.13004696369171143, | |
| "kl": 0.015842437744140625, | |
| "learning_rate": 4.873721045679706e-07, | |
| "loss": 0.0453, | |
| "reward": 0.4798949249088764, | |
| "reward_std": 0.9390313774347305, | |
| "step": 119 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3015.545135498047, | |
| "dapo/avg_reward_std": 0.22217401381461852, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3548387149649282, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 28.95833333333333, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.13714285714285715, | |
| "grad_norm": 0.229897141456604, | |
| "kl": 0.02198028564453125, | |
| "learning_rate": 4.79604490731896e-07, | |
| "loss": 0.0749, | |
| "reward": 0.7311479561030865, | |
| "reward_std": 0.9607837572693825, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3098.656280517578, | |
| "dapo/avg_reward_std": 0.22588159143924713, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.32777778506278993, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 44.613095238095234, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1382857142857143, | |
| "grad_norm": 0.13800247013568878, | |
| "kl": 0.014202117919921875, | |
| "learning_rate": 4.7185832004988133e-07, | |
| "loss": 0.0814, | |
| "reward": 0.8461479842662811, | |
| "reward_std": 0.9660850539803505, | |
| "step": 121 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3064.3924255371094, | |
| "dapo/avg_reward_std": 0.16500467896461488, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.19666667193174361, | |
| "dapo/num_sampling_attempts": 6.25, | |
| "dapo/sampling_efficiency": 21.07142857142857, | |
| "dapo/total_prompts_processed": 37.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.13942857142857143, | |
| "grad_norm": 0.1680934727191925, | |
| "kl": 0.01361083984375, | |
| "learning_rate": 4.641359520805548e-07, | |
| "loss": 0.066, | |
| "reward": 0.7812346797436476, | |
| "reward_std": 0.9529108256101608, | |
| "step": 122 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3097.4861755371094, | |
| "dapo/avg_reward_std": 0.22939075000824466, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.33333334038334506, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 33.75, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.14057142857142857, | |
| "grad_norm": 0.18081900477409363, | |
| "kl": 0.014842987060546875, | |
| "learning_rate": 4.5643973913200837e-07, | |
| "loss": 0.0877, | |
| "reward": 0.7531900368630886, | |
| "reward_std": 0.9868133068084717, | |
| "step": 123 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3203.888885498047, | |
| "dapo/avg_reward_std": 0.24352495979379724, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.35185185737080044, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 43.05555555555556, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1417142857142857, | |
| "grad_norm": 0.16807734966278076, | |
| "kl": 0.0139007568359375, | |
| "learning_rate": 4.4877202554526084e-07, | |
| "loss": 0.0612, | |
| "reward": 0.715996683575213, | |
| "reward_std": 0.9595553278923035, | |
| "step": 124 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2885.5625610351562, | |
| "dapo/avg_reward_std": 0.2548297820612788, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.31770833814516664, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 27.20238095238095, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.14285714285714285, | |
| "grad_norm": 0.16355834901332855, | |
| "kl": 0.02027130126953125, | |
| "learning_rate": 4.4113514698014953e-07, | |
| "loss": 0.0597, | |
| "reward": 0.8311022147536278, | |
| "reward_std": 0.9600836709141731, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3250.843780517578, | |
| "dapo/avg_reward_std": 0.2203440727858708, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.32758621152105005, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 46.770833333333336, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.144, | |
| "grad_norm": 0.18190248310565948, | |
| "kl": 0.0158843994140625, | |
| "learning_rate": 4.3353142970386557e-07, | |
| "loss": 0.068, | |
| "reward": 0.7400151332840323, | |
| "reward_std": 0.9569809287786484, | |
| "step": 126 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3264.420166015625, | |
| "dapo/avg_reward_std": 0.25137073759521755, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.41666667429464205, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 40.11904761904761, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.14514285714285713, | |
| "grad_norm": 0.17950685322284698, | |
| "kl": 0.0223236083984375, | |
| "learning_rate": 4.2596318988235037e-07, | |
| "loss": 0.0528, | |
| "reward": 0.5194851458072662, | |
| "reward_std": 0.9414050430059433, | |
| "step": 127 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2892.9132690429688, | |
| "dapo/avg_reward_std": 0.2416491061449051, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2631579002267436, | |
| "dapo/num_sampling_attempts": 4.75, | |
| "dapo/sampling_efficiency": 26.9047619047619, | |
| "dapo/total_prompts_processed": 28.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1462857142857143, | |
| "grad_norm": 0.25602471828460693, | |
| "kl": 0.02016448974609375, | |
| "learning_rate": 4.1843273287476854e-07, | |
| "loss": 0.0933, | |
| "reward": 0.8592288717627525, | |
| "reward_std": 0.9212958365678787, | |
| "step": 128 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3146.6944580078125, | |
| "dapo/avg_reward_std": 0.22558308675371366, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3218390854268238, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 54.07738095238095, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.14742857142857144, | |
| "grad_norm": 0.21352027356624603, | |
| "kl": 0.0198211669921875, | |
| "learning_rate": 4.1094235253127374e-07, | |
| "loss": 0.0679, | |
| "reward": 0.5732525363564491, | |
| "reward_std": 0.9645283669233322, | |
| "step": 129 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3248.4236450195312, | |
| "dapo/avg_reward_std": 0.35807471639580196, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5000000066227384, | |
| "dapo/num_sampling_attempts": 2.25, | |
| "dapo/sampling_efficiency": 51.041666666666664, | |
| "dapo/total_prompts_processed": 13.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.14857142857142858, | |
| "grad_norm": 0.1599435657262802, | |
| "kl": 0.0216827392578125, | |
| "learning_rate": 4.034943304942796e-07, | |
| "loss": 0.0443, | |
| "reward": 0.5955070666968822, | |
| "reward_std": 0.9924386888742447, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2958.5347595214844, | |
| "dapo/avg_reward_std": 0.18185590389298228, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.23170731998071437, | |
| "dapo/num_sampling_attempts": 5.125, | |
| "dapo/sampling_efficiency": 24.945436507936506, | |
| "dapo/total_prompts_processed": 30.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.14971428571428572, | |
| "grad_norm": 0.21188445389270782, | |
| "kl": 0.02074432373046875, | |
| "learning_rate": 3.9609093550344907e-07, | |
| "loss": 0.0628, | |
| "reward": 0.8608505353331566, | |
| "reward_std": 0.9059992283582687, | |
| "step": 131 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3019.888931274414, | |
| "dapo/avg_reward_std": 0.3038036392794715, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.36419753785486575, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 38.33333333333333, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.15085714285714286, | |
| "grad_norm": 0.19752100110054016, | |
| "kl": 0.024078369140625, | |
| "learning_rate": 3.8873442270461485e-07, | |
| "loss": 0.0698, | |
| "reward": 0.7191393785178661, | |
| "reward_std": 0.9548436179757118, | |
| "step": 132 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3251.6909790039062, | |
| "dapo/avg_reward_std": 0.17617152915114448, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.22222222494227545, | |
| "dapo/num_sampling_attempts": 5.25, | |
| "dapo/sampling_efficiency": 31.369047619047613, | |
| "dapo/total_prompts_processed": 31.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.152, | |
| "grad_norm": 0.1220565065741539, | |
| "kl": 0.01824951171875, | |
| "learning_rate": 3.8142703296283953e-07, | |
| "loss": 0.0249, | |
| "reward": 0.3546891317819245, | |
| "reward_std": 0.9377138167619705, | |
| "step": 133 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3146.545196533203, | |
| "dapo/avg_reward_std": 0.2565364229679108, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.32666667103767394, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 47.08333333333333, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.15314285714285714, | |
| "grad_norm": 0.15810362994670868, | |
| "kl": 0.03081512451171875, | |
| "learning_rate": 3.7417099217982686e-07, | |
| "loss": 0.0306, | |
| "reward": 0.5206232005730271, | |
| "reward_std": 0.9619846642017365, | |
| "step": 134 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3085.5972900390625, | |
| "dapo/avg_reward_std": 0.30491976333515985, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.40476191469601225, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 31.666666666666664, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.15428571428571428, | |
| "grad_norm": 0.2133372277021408, | |
| "kl": 0.0204620361328125, | |
| "learning_rate": 3.6696851061588994e-07, | |
| "loss": 0.0681, | |
| "reward": 0.7713347226381302, | |
| "reward_std": 0.9403144493699074, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3326.295196533203, | |
| "dapo/avg_reward_std": 0.22884555886953306, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.24358974817471626, | |
| "dapo/num_sampling_attempts": 4.875, | |
| "dapo/sampling_efficiency": 25.868055555555557, | |
| "dapo/total_prompts_processed": 29.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.15542857142857142, | |
| "grad_norm": 0.18792302906513214, | |
| "kl": 0.029754638671875, | |
| "learning_rate": 3.5982178221668533e-07, | |
| "loss": 0.0468, | |
| "reward": 0.5651950668543577, | |
| "reward_std": 0.9934203922748566, | |
| "step": 136 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3265.2882080078125, | |
| "dapo/avg_reward_std": 0.304972759137551, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.43055556155741215, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 54.375, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.15657142857142858, | |
| "grad_norm": 0.13081717491149902, | |
| "kl": 0.0223846435546875, | |
| "learning_rate": 3.5273298394491515e-07, | |
| "loss": 0.0443, | |
| "reward": 0.5535581167787313, | |
| "reward_std": 0.9467164501547813, | |
| "step": 137 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2895.8646545410156, | |
| "dapo/avg_reward_std": 0.2690910736719767, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3333333387970924, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 32.82738095238095, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.15771428571428572, | |
| "grad_norm": 0.18165208399295807, | |
| "kl": 0.032073974609375, | |
| "learning_rate": 3.45704275117204e-07, | |
| "loss": 0.0288, | |
| "reward": 0.5253790076822042, | |
| "reward_std": 0.9247673749923706, | |
| "step": 138 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3049.8507080078125, | |
| "dapo/avg_reward_std": 0.2440622321196965, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.33928572067192625, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 40.11904761904761, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.15885714285714286, | |
| "grad_norm": 0.19676071405410767, | |
| "kl": 0.03052520751953125, | |
| "learning_rate": 3.387377967463493e-07, | |
| "loss": 0.0477, | |
| "reward": 0.6778539270162582, | |
| "reward_std": 0.9344745948910713, | |
| "step": 139 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3029.0486450195312, | |
| "dapo/avg_reward_std": 0.3111469969153404, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4916666768491268, | |
| "dapo/num_sampling_attempts": 2.5, | |
| "dapo/sampling_efficiency": 41.666666666666664, | |
| "dapo/total_prompts_processed": 15.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.16, | |
| "grad_norm": 0.18594416975975037, | |
| "kl": 0.0277557373046875, | |
| "learning_rate": 3.3183567088914833e-07, | |
| "loss": 0.0431, | |
| "reward": 0.5210836753249168, | |
| "reward_std": 0.9851464107632637, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3151.5486755371094, | |
| "dapo/avg_reward_std": 0.23511080997330802, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3095238127878734, | |
| "dapo/num_sampling_attempts": 4.375, | |
| "dapo/sampling_efficiency": 26.18055555555555, | |
| "dapo/total_prompts_processed": 26.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.16114285714285714, | |
| "grad_norm": 0.17807213962078094, | |
| "kl": 0.0266265869140625, | |
| "learning_rate": 3.250000000000001e-07, | |
| "loss": 0.0498, | |
| "reward": 0.5591800361871719, | |
| "reward_std": 0.9730060175061226, | |
| "step": 141 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2963.59033203125, | |
| "dapo/avg_reward_std": 0.19928012508898973, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2812500069849193, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 38.02083333333333, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.16228571428571428, | |
| "grad_norm": 0.24388359487056732, | |
| "kl": 0.0318603515625, | |
| "learning_rate": 3.182328662904756e-07, | |
| "loss": 0.0567, | |
| "reward": 0.7148469444364309, | |
| "reward_std": 0.9495278596878052, | |
| "step": 142 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3157.791717529297, | |
| "dapo/avg_reward_std": 0.23966079843895777, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3214285767504147, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 39.166666666666664, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.16342857142857142, | |
| "grad_norm": 0.20528583228588104, | |
| "kl": 0.041290283203125, | |
| "learning_rate": 3.115363310950578e-07, | |
| "loss": 0.0443, | |
| "reward": 0.5249591246247292, | |
| "reward_std": 0.9509934857487679, | |
| "step": 143 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3030.187530517578, | |
| "dapo/avg_reward_std": 0.30880050485332805, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4375000099341075, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 41.04166666666666, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.16457142857142856, | |
| "grad_norm": 0.15082307159900665, | |
| "kl": 0.02729034423828125, | |
| "learning_rate": 3.0491243424323783e-07, | |
| "loss": 0.0511, | |
| "reward": 0.5894143544137478, | |
| "reward_std": 0.954010546207428, | |
| "step": 144 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2973.3993225097656, | |
| "dapo/avg_reward_std": 0.32683228328824043, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4236111181477706, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 48.66071428571428, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1657142857142857, | |
| "grad_norm": 0.2588576078414917, | |
| "kl": 0.038238525390625, | |
| "learning_rate": 2.9836319343816397e-07, | |
| "loss": 0.0611, | |
| "reward": 0.6702784113585949, | |
| "reward_std": 0.9678368121385574, | |
| "step": 145 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3289.8368530273438, | |
| "dapo/avg_reward_std": 0.29686578666722335, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.34567901823255753, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 51.57738095238095, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.16685714285714287, | |
| "grad_norm": 0.2035798877477646, | |
| "kl": 0.0394744873046875, | |
| "learning_rate": 2.918906036420294e-07, | |
| "loss": 0.0576, | |
| "reward": 0.4602743685245514, | |
| "reward_std": 0.9194413796067238, | |
| "step": 146 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3068.7604064941406, | |
| "dapo/avg_reward_std": 0.27814541943371296, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3437500069849193, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 36.666666666666664, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.168, | |
| "grad_norm": 0.22469140589237213, | |
| "kl": 0.030426025390625, | |
| "learning_rate": 2.854966364683872e-07, | |
| "loss": 0.0696, | |
| "reward": 0.6243265215307474, | |
| "reward_std": 0.9174878597259521, | |
| "step": 147 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3041.357635498047, | |
| "dapo/avg_reward_std": 0.2907161459326744, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.458333346247673, | |
| "dapo/num_sampling_attempts": 2.5, | |
| "dapo/sampling_efficiency": 57.70833333333333, | |
| "dapo/total_prompts_processed": 15.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.16914285714285715, | |
| "grad_norm": 0.3123789429664612, | |
| "kl": 0.0328521728515625, | |
| "learning_rate": 2.791832395815782e-07, | |
| "loss": 0.0819, | |
| "reward": 0.8250775411725044, | |
| "reward_std": 0.9233218431472778, | |
| "step": 148 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2433.0694732666016, | |
| "dapo/avg_reward_std": 0.22243764168686336, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2777777839865949, | |
| "dapo/num_sampling_attempts": 4.5, | |
| "dapo/sampling_efficiency": 35.75892857142857, | |
| "dapo/total_prompts_processed": 27.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1702857142857143, | |
| "grad_norm": 0.2827485203742981, | |
| "kl": 0.0386505126953125, | |
| "learning_rate": 2.729523361034538e-07, | |
| "loss": 0.0784, | |
| "reward": 0.6995697831735015, | |
| "reward_std": 0.9434132054448128, | |
| "step": 149 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3096.59033203125, | |
| "dapo/avg_reward_std": 0.347408726811409, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.541666672565043, | |
| "dapo/num_sampling_attempts": 2.0, | |
| "dapo/sampling_efficiency": 63.541666666666664, | |
| "dapo/total_prompts_processed": 12.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.17142857142857143, | |
| "grad_norm": 0.30529579520225525, | |
| "kl": 0.03045654296875, | |
| "learning_rate": 2.6680582402757324e-07, | |
| "loss": 0.0868, | |
| "reward": 0.7112221932038665, | |
| "reward_std": 0.9602288007736206, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3184.611083984375, | |
| "dapo/avg_reward_std": 0.1674806038115887, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.20212766528129578, | |
| "dapo/num_sampling_attempts": 5.875, | |
| "dapo/sampling_efficiency": 23.749999999999996, | |
| "dapo/total_prompts_processed": 35.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.17257142857142857, | |
| "grad_norm": 0.19142813980579376, | |
| "kl": 0.037353515625, | |
| "learning_rate": 2.6074557564105724e-07, | |
| "loss": 0.045, | |
| "reward": 0.41017685225233436, | |
| "reward_std": 0.9152907580137253, | |
| "step": 151 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3437.3541564941406, | |
| "dapo/avg_reward_std": 0.208841644014631, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2571428622518267, | |
| "dapo/num_sampling_attempts": 4.375, | |
| "dapo/sampling_efficiency": 40.416666666666664, | |
| "dapo/total_prompts_processed": 26.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1737142857142857, | |
| "grad_norm": 0.15321692824363708, | |
| "kl": 0.03997802734375, | |
| "learning_rate": 2.547734369542718e-07, | |
| "loss": 0.0346, | |
| "reward": 0.34562894329428673, | |
| "reward_std": 0.856454074382782, | |
| "step": 152 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3008.1285095214844, | |
| "dapo/avg_reward_std": 0.3009934023022652, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5000000096857548, | |
| "dapo/num_sampling_attempts": 2.5, | |
| "dapo/sampling_efficiency": 43.75, | |
| "dapo/total_prompts_processed": 15.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.17485714285714285, | |
| "grad_norm": 0.20332548022270203, | |
| "kl": 0.0509033203125, | |
| "learning_rate": 2.488912271385139e-07, | |
| "loss": 0.0536, | |
| "reward": 0.7641689777374268, | |
| "reward_std": 0.95648343116045, | |
| "step": 153 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3165.52783203125, | |
| "dapo/avg_reward_std": 0.2268627045246271, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.35256410905948055, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 40.625, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.176, | |
| "grad_norm": 0.2415708601474762, | |
| "kl": 0.032623291015625, | |
| "learning_rate": 2.4310073797187573e-07, | |
| "loss": 0.0658, | |
| "reward": 0.6375892572104931, | |
| "reward_std": 0.9544621706008911, | |
| "step": 154 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3226.4652709960938, | |
| "dapo/avg_reward_std": 0.2563069482644399, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.38333334078391396, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 31.249999999999996, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.17714285714285713, | |
| "grad_norm": 0.2137623131275177, | |
| "kl": 0.0427093505859375, | |
| "learning_rate": 2.374037332934512e-07, | |
| "loss": 0.0533, | |
| "reward": 0.537381574511528, | |
| "reward_std": 0.9281218275427818, | |
| "step": 155 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2680.3090209960938, | |
| "dapo/avg_reward_std": 0.22888225678241614, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3181818226973216, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 31.29960317460317, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1782857142857143, | |
| "grad_norm": 0.3409210443496704, | |
| "kl": 0.03851318359375, | |
| "learning_rate": 2.3180194846605364e-07, | |
| "loss": 0.0962, | |
| "reward": 0.8820424377918243, | |
| "reward_std": 0.9246840327978134, | |
| "step": 156 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3045.3299255371094, | |
| "dapo/avg_reward_std": 0.2491180575810946, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3653846222620744, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 45.83333333333332, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.17942857142857144, | |
| "grad_norm": 0.23701035976409912, | |
| "kl": 0.0436248779296875, | |
| "learning_rate": 2.2629708984760706e-07, | |
| "loss": 0.0414, | |
| "reward": 0.6551959328353405, | |
| "reward_std": 0.9744707196950912, | |
| "step": 157 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2918.892364501953, | |
| "dapo/avg_reward_std": 0.22537656256130764, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.33333333730697634, | |
| "dapo/num_sampling_attempts": 4.375, | |
| "dapo/sampling_efficiency": 39.93055555555556, | |
| "dapo/total_prompts_processed": 26.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18057142857142858, | |
| "grad_norm": 0.3551786541938782, | |
| "kl": 0.0572357177734375, | |
| "learning_rate": 2.2089083427137329e-07, | |
| "loss": 0.0732, | |
| "reward": 0.5248121619224548, | |
| "reward_std": 0.9334831684827805, | |
| "step": 158 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2874.0729446411133, | |
| "dapo/avg_reward_std": 0.18832522351294756, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2812500046566129, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 38.69047619047618, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18171428571428572, | |
| "grad_norm": 0.25500980019569397, | |
| "kl": 0.03741455078125, | |
| "learning_rate": 2.1558482853517253e-07, | |
| "loss": 0.0537, | |
| "reward": 0.7963100634515285, | |
| "reward_std": 0.987776905298233, | |
| "step": 159 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2940.701385498047, | |
| "dapo/avg_reward_std": 0.16297742784023284, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.20000000536441803, | |
| "dapo/num_sampling_attempts": 6.25, | |
| "dapo/sampling_efficiency": 18.368055555555557, | |
| "dapo/total_prompts_processed": 37.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18285714285714286, | |
| "grad_norm": 0.2898014187812805, | |
| "kl": 0.058013916015625, | |
| "learning_rate": 2.1038068889975259e-07, | |
| "loss": 0.037, | |
| "reward": 0.5323189618065953, | |
| "reward_std": 0.9483579620718956, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3090.7882385253906, | |
| "dapo/avg_reward_std": 0.3046227526664734, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3733333414793015, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 43.45238095238095, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.184, | |
| "grad_norm": 0.28573325276374817, | |
| "kl": 0.040771484375, | |
| "learning_rate": 2.0528000059645995e-07, | |
| "loss": 0.0511, | |
| "reward": 0.6970310118049383, | |
| "reward_std": 0.9432796016335487, | |
| "step": 161 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3205.4270629882812, | |
| "dapo/avg_reward_std": 0.36972329020500183, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5438596621940011, | |
| "dapo/num_sampling_attempts": 2.375, | |
| "dapo/sampling_efficiency": 55.625, | |
| "dapo/total_prompts_processed": 14.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18514285714285714, | |
| "grad_norm": 0.390523225069046, | |
| "kl": 0.052459716796875, | |
| "learning_rate": 2.0028431734436308e-07, | |
| "loss": 0.0818, | |
| "reward": 0.6346883065998554, | |
| "reward_std": 0.9713371768593788, | |
| "step": 162 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3082.107635498047, | |
| "dapo/avg_reward_std": 0.2315557522158469, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3440860264724301, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 44.513888888888886, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18628571428571428, | |
| "grad_norm": 0.31898149847984314, | |
| "kl": 0.05328369140625, | |
| "learning_rate": 1.9539516087697517e-07, | |
| "loss": 0.0722, | |
| "reward": 0.6942785531282425, | |
| "reward_std": 0.9776681512594223, | |
| "step": 163 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3027.0243530273438, | |
| "dapo/avg_reward_std": 0.15836979811255997, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.22972973214613424, | |
| "dapo/num_sampling_attempts": 4.625, | |
| "dapo/sampling_efficiency": 41.69642857142857, | |
| "dapo/total_prompts_processed": 27.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18742857142857142, | |
| "grad_norm": 0.2931766211986542, | |
| "kl": 0.033111572265625, | |
| "learning_rate": 1.9061402047871833e-07, | |
| "loss": 0.0754, | |
| "reward": 0.944303285330534, | |
| "reward_std": 0.9451126903295517, | |
| "step": 164 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2894.260482788086, | |
| "dapo/avg_reward_std": 0.224585828371346, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2916666716337204, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 37.5, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18857142857142858, | |
| "grad_norm": 0.24178634583950043, | |
| "kl": 0.0533447265625, | |
| "learning_rate": 1.8594235253127372e-07, | |
| "loss": 0.0505, | |
| "reward": 0.6519163623452187, | |
| "reward_std": 0.9615699052810669, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3002.7882385253906, | |
| "dapo/avg_reward_std": 0.29886600477942105, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3160919598464308, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 35.416666666666664, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18971428571428572, | |
| "grad_norm": 0.31221655011177063, | |
| "kl": 0.047943115234375, | |
| "learning_rate": 1.8138158006995363e-07, | |
| "loss": 0.066, | |
| "reward": 0.6383479349315166, | |
| "reward_std": 0.9029820337891579, | |
| "step": 166 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2927.295150756836, | |
| "dapo/avg_reward_std": 0.34752671499001353, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5438596621940011, | |
| "dapo/num_sampling_attempts": 2.375, | |
| "dapo/sampling_efficiency": 48.95833333333333, | |
| "dapo/total_prompts_processed": 14.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.19085714285714286, | |
| "grad_norm": 0.2697528600692749, | |
| "kl": 0.045745849609375, | |
| "learning_rate": 1.7693309235023127e-07, | |
| "loss": 0.0483, | |
| "reward": 0.8266985702211969, | |
| "reward_std": 0.9544429406523705, | |
| "step": 167 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3212.857666015625, | |
| "dapo/avg_reward_std": 0.263968757220677, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3690476247242519, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 41.388888888888886, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.192, | |
| "grad_norm": 0.27940821647644043, | |
| "kl": 0.05059814453125, | |
| "learning_rate": 1.7259824442455923e-07, | |
| "loss": 0.0415, | |
| "reward": 0.7715255841612816, | |
| "reward_std": 0.95072440803051, | |
| "step": 168 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3112.5799255371094, | |
| "dapo/avg_reward_std": 0.22730760558231458, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3153153222960395, | |
| "dapo/num_sampling_attempts": 4.625, | |
| "dapo/sampling_efficiency": 26.249999999999996, | |
| "dapo/total_prompts_processed": 27.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.19314285714285714, | |
| "grad_norm": 0.4339730143547058, | |
| "kl": 0.06396484375, | |
| "learning_rate": 1.6837835672960831e-07, | |
| "loss": 0.0777, | |
| "reward": 0.5262689627707005, | |
| "reward_std": 0.9779800549149513, | |
| "step": 169 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3088.6632385253906, | |
| "dapo/avg_reward_std": 0.2333034286275506, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.33333333721384406, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 38.263888888888886, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.19428571428571428, | |
| "grad_norm": 0.48384836316108704, | |
| "kl": 0.0555419921875, | |
| "learning_rate": 1.6427471468404952e-07, | |
| "loss": 0.0974, | |
| "reward": 0.7407102398574352, | |
| "reward_std": 0.9568767622113228, | |
| "step": 170 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3099.347198486328, | |
| "dapo/avg_reward_std": 0.17301563743282766, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.27941177127992406, | |
| "dapo/num_sampling_attempts": 4.25, | |
| "dapo/sampling_efficiency": 31.874999999999996, | |
| "dapo/total_prompts_processed": 25.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.19542857142857142, | |
| "grad_norm": 0.42263394594192505, | |
| "kl": 0.0595703125, | |
| "learning_rate": 1.6028856829700258e-07, | |
| "loss": 0.0812, | |
| "reward": 0.4282900430262089, | |
| "reward_std": 0.914498083293438, | |
| "step": 171 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3111.232696533203, | |
| "dapo/avg_reward_std": 0.2433939976617694, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.33333334093913436, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 36.80555555555555, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.19657142857142856, | |
| "grad_norm": 0.4814501404762268, | |
| "kl": 0.05926513671875, | |
| "learning_rate": 1.5642113178727193e-07, | |
| "loss": 0.0843, | |
| "reward": 0.6843680012971163, | |
| "reward_std": 0.8743765726685524, | |
| "step": 172 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3008.6563110351562, | |
| "dapo/avg_reward_std": 0.25363275137814606, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.31818182811592566, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 38.78472222222222, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1977142857142857, | |
| "grad_norm": 0.285697877407074, | |
| "kl": 0.05755615234375, | |
| "learning_rate": 1.5267358321348285e-07, | |
| "loss": 0.0456, | |
| "reward": 0.5798944532871246, | |
| "reward_std": 0.984041191637516, | |
| "step": 173 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3067.9791870117188, | |
| "dapo/avg_reward_std": 0.3438388824462891, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4500000044703484, | |
| "dapo/num_sampling_attempts": 2.5, | |
| "dapo/sampling_efficiency": 48.33333333333333, | |
| "dapo/total_prompts_processed": 15.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.19885714285714284, | |
| "grad_norm": 0.43520498275756836, | |
| "kl": 0.07098388671875, | |
| "learning_rate": 1.4904706411523448e-07, | |
| "loss": 0.0716, | |
| "reward": 0.5646946905180812, | |
| "reward_std": 0.9460153579711914, | |
| "step": 174 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3223.2916870117188, | |
| "dapo/avg_reward_std": 0.2690600073337555, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4133333420753479, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 41.45833333333333, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.2, | |
| "grad_norm": 0.35144945979118347, | |
| "kl": 0.06170654296875, | |
| "learning_rate": 1.4554267916537495e-07, | |
| "loss": 0.0348, | |
| "reward": 0.556399748660624, | |
| "reward_std": 0.9192204177379608, | |
| "step": 175 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2946.0799102783203, | |
| "dapo/avg_reward_std": 0.25316954652468365, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.37500000558793545, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 43.75, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.20114285714285715, | |
| "grad_norm": 0.46807849407196045, | |
| "kl": 0.063018798828125, | |
| "learning_rate": 1.4216149583350755e-07, | |
| "loss": 0.0796, | |
| "reward": 0.6736351866275072, | |
| "reward_std": 0.9649264737963676, | |
| "step": 176 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3096.829864501953, | |
| "dapo/avg_reward_std": 0.31567848042437907, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.482456142965116, | |
| "dapo/num_sampling_attempts": 2.375, | |
| "dapo/sampling_efficiency": 55.625, | |
| "dapo/total_prompts_processed": 14.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.2022857142857143, | |
| "grad_norm": 0.31731271743774414, | |
| "kl": 0.055938720703125, | |
| "learning_rate": 1.3890454406082956e-07, | |
| "loss": 0.0386, | |
| "reward": 0.681073285639286, | |
| "reward_std": 0.9661536440253258, | |
| "step": 177 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3235.8056030273438, | |
| "dapo/avg_reward_std": 0.24198689542967697, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3448275898037286, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 47.08333333333333, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.20342857142857143, | |
| "grad_norm": 0.4640950560569763, | |
| "kl": 0.072052001953125, | |
| "learning_rate": 1.3577281594640182e-07, | |
| "loss": 0.0702, | |
| "reward": 0.5520291309803724, | |
| "reward_std": 0.9967257082462311, | |
| "step": 178 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3237.77783203125, | |
| "dapo/avg_reward_std": 0.30828417566689575, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4242424341765317, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 52.82738095238095, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.20457142857142857, | |
| "grad_norm": 0.4502318203449249, | |
| "kl": 0.07550048828125, | |
| "learning_rate": 1.3276726544494571e-07, | |
| "loss": 0.0614, | |
| "reward": 0.6213867999613285, | |
| "reward_std": 0.9431608989834785, | |
| "step": 179 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2887.9236450195312, | |
| "dapo/avg_reward_std": 0.2488611958645008, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3518518612340645, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 48.035714285714285, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.2057142857142857, | |
| "grad_norm": 0.44646504521369934, | |
| "kl": 0.073760986328125, | |
| "learning_rate": 1.2988880807625927e-07, | |
| "loss": 0.0683, | |
| "reward": 0.5839751102030277, | |
| "reward_std": 0.9090578481554985, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3021.2916870117188, | |
| "dapo/avg_reward_std": 0.20883248069069602, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2878787942004926, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 39.632936507936506, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.20685714285714285, | |
| "grad_norm": 0.36042678356170654, | |
| "kl": 0.07421875, | |
| "learning_rate": 1.2713832064634125e-07, | |
| "loss": 0.054, | |
| "reward": 0.5517729418352246, | |
| "reward_std": 0.9483400657773018, | |
| "step": 181 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3249.2118530273438, | |
| "dapo/avg_reward_std": 0.2615335573043142, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.33333333847778185, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 46.785714285714285, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.208, | |
| "grad_norm": 0.4518042504787445, | |
| "kl": 0.072021484375, | |
| "learning_rate": 1.2451664098030743e-07, | |
| "loss": 0.0654, | |
| "reward": 0.686168298125267, | |
| "reward_std": 0.9350233674049377, | |
| "step": 182 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3221.6631774902344, | |
| "dapo/avg_reward_std": 0.27866364789731574, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3686868738044392, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 28.4375, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.20914285714285713, | |
| "grad_norm": 0.32408109307289124, | |
| "kl": 0.062255859375, | |
| "learning_rate": 1.220245676671809e-07, | |
| "loss": 0.0384, | |
| "reward": 0.6384344138205051, | |
| "reward_std": 0.9783304929733276, | |
| "step": 183 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3199.1354370117188, | |
| "dapo/avg_reward_std": 0.2816663732131322, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.316666671137015, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 45.55555555555555, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.2102857142857143, | |
| "grad_norm": 0.2197091430425644, | |
| "kl": 0.07550048828125, | |
| "learning_rate": 1.1966285981663407e-07, | |
| "loss": 0.0211, | |
| "reward": 0.45471471454948187, | |
| "reward_std": 0.9136239141225815, | |
| "step": 184 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3037.420166015625, | |
| "dapo/avg_reward_std": 0.17516983683044846, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2657657728807346, | |
| "dapo/num_sampling_attempts": 4.625, | |
| "dapo/sampling_efficiency": 25.729166666666664, | |
| "dapo/total_prompts_processed": 27.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.21142857142857144, | |
| "grad_norm": 0.4012245535850525, | |
| "kl": 0.091796875, | |
| "learning_rate": 1.1743223682775649e-07, | |
| "loss": 0.0442, | |
| "reward": 0.7168623730540276, | |
| "reward_std": 0.9515729621052742, | |
| "step": 185 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3222.767364501953, | |
| "dapo/avg_reward_std": 0.2550514280796051, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3444444512327512, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 43.64583333333333, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.21257142857142858, | |
| "grad_norm": 0.4945845305919647, | |
| "kl": 0.083465576171875, | |
| "learning_rate": 1.1533337816991931e-07, | |
| "loss": 0.0667, | |
| "reward": 0.5391142014414072, | |
| "reward_std": 0.9342528805136681, | |
| "step": 186 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2858.5659942626953, | |
| "dapo/avg_reward_std": 0.23423856112264818, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3118279609949358, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 40.0297619047619, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.21371428571428572, | |
| "grad_norm": 0.4291866421699524, | |
| "kl": 0.091796875, | |
| "learning_rate": 1.1336692317580158e-07, | |
| "loss": 0.0384, | |
| "reward": 0.7481220848858356, | |
| "reward_std": 0.9474795907735825, | |
| "step": 187 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3123.170166015625, | |
| "dapo/avg_reward_std": 0.1988734739857751, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.25675675997862946, | |
| "dapo/num_sampling_attempts": 4.625, | |
| "dapo/sampling_efficiency": 31.875, | |
| "dapo/total_prompts_processed": 27.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.21485714285714286, | |
| "grad_norm": 0.30453264713287354, | |
| "kl": 0.080657958984375, | |
| "learning_rate": 1.1153347084664419e-07, | |
| "loss": 0.0273, | |
| "reward": 0.6236942922696471, | |
| "reward_std": 0.9715093299746513, | |
| "step": 188 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2872.9618530273438, | |
| "dapo/avg_reward_std": 0.21385114904372923, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3333333365378841, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 39.18154761904762, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.216, | |
| "grad_norm": 0.5780288577079773, | |
| "kl": 0.08612060546875, | |
| "learning_rate": 1.0983357966978745e-07, | |
| "loss": 0.0607, | |
| "reward": 0.7514887787401676, | |
| "reward_std": 1.0098591819405556, | |
| "step": 189 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2937.093780517578, | |
| "dapo/avg_reward_std": 0.1677520631575117, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.21895425284610076, | |
| "dapo/num_sampling_attempts": 6.375, | |
| "dapo/sampling_efficiency": 20.689484126984123, | |
| "dapo/total_prompts_processed": 38.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.21714285714285714, | |
| "grad_norm": 0.3947860896587372, | |
| "kl": 0.076263427734375, | |
| "learning_rate": 1.0826776744855121e-07, | |
| "loss": 0.0487, | |
| "reward": 0.6180934552103281, | |
| "reward_std": 0.9050487726926804, | |
| "step": 190 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3252.090301513672, | |
| "dapo/avg_reward_std": 0.24265852073828378, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3055555621782939, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 38.020833333333336, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.21828571428571428, | |
| "grad_norm": 0.48333072662353516, | |
| "kl": 0.09661865234375, | |
| "learning_rate": 1.068365111445064e-07, | |
| "loss": 0.0584, | |
| "reward": 0.4759152363985777, | |
| "reward_std": 0.9479196071624756, | |
| "step": 191 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3074.420166015625, | |
| "dapo/avg_reward_std": 0.2189681170315578, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3333333371014431, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 46.45833333333333, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.21942857142857142, | |
| "grad_norm": 0.5536202192306519, | |
| "kl": 0.09814453125, | |
| "learning_rate": 1.0554024673218806e-07, | |
| "loss": 0.0731, | |
| "reward": 0.48804986744653434, | |
| "reward_std": 0.9367131069302559, | |
| "step": 192 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3026.9097595214844, | |
| "dapo/avg_reward_std": 0.21337791310774312, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.256756762395034, | |
| "dapo/num_sampling_attempts": 4.625, | |
| "dapo/sampling_efficiency": 30.3125, | |
| "dapo/total_prompts_processed": 27.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.22057142857142858, | |
| "grad_norm": 0.5239105224609375, | |
| "kl": 0.0985107421875, | |
| "learning_rate": 1.0437936906629334e-07, | |
| "loss": 0.0561, | |
| "reward": 0.45341441221535206, | |
| "reward_std": 0.8912393003702164, | |
| "step": 193 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2896.656280517578, | |
| "dapo/avg_reward_std": 0.31374274492263793, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4333333417773247, | |
| "dapo/num_sampling_attempts": 2.5, | |
| "dapo/sampling_efficiency": 46.875, | |
| "dapo/total_prompts_processed": 15.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.22171428571428572, | |
| "grad_norm": 0.6310634016990662, | |
| "kl": 0.108062744140625, | |
| "learning_rate": 1.0335423176140511e-07, | |
| "loss": 0.0809, | |
| "reward": 0.6844924800097942, | |
| "reward_std": 0.9649646729230881, | |
| "step": 194 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3319.7048950195312, | |
| "dapo/avg_reward_std": 0.21983732057340216, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.30303031251286017, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 29.479166666666664, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.22285714285714286, | |
| "grad_norm": 0.47936248779296875, | |
| "kl": 0.0997314453125, | |
| "learning_rate": 1.0246514708427701e-07, | |
| "loss": 0.0479, | |
| "reward": 0.3993752491660416, | |
| "reward_std": 0.9481607303023338, | |
| "step": 195 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3298.1736450195312, | |
| "dapo/avg_reward_std": 0.2514548934996128, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.28333333916962145, | |
| "dapo/num_sampling_attempts": 5.0, | |
| "dapo/sampling_efficiency": 33.13988095238095, | |
| "dapo/total_prompts_processed": 30.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.224, | |
| "grad_norm": 0.36350947618484497, | |
| "kl": 0.1043701171875, | |
| "learning_rate": 1.017123858587145e-07, | |
| "loss": 0.0389, | |
| "reward": 0.31427645590156317, | |
| "reward_std": 0.8980218172073364, | |
| "step": 196 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3260.4861450195312, | |
| "dapo/avg_reward_std": 0.1836753969009106, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.24786325486806723, | |
| "dapo/num_sampling_attempts": 4.875, | |
| "dapo/sampling_efficiency": 28.154761904761905, | |
| "dapo/total_prompts_processed": 29.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.22514285714285714, | |
| "grad_norm": 0.3354601562023163, | |
| "kl": 0.0946044921875, | |
| "learning_rate": 1.0109617738307911e-07, | |
| "loss": 0.0301, | |
| "reward": 0.5015182960778475, | |
| "reward_std": 0.9334053322672844, | |
| "step": 197 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3031.3958129882812, | |
| "dapo/avg_reward_std": 0.3008538554696476, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5196078463512308, | |
| "dapo/num_sampling_attempts": 2.125, | |
| "dapo/sampling_efficiency": 76.5625, | |
| "dapo/total_prompts_processed": 12.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.22628571428571428, | |
| "grad_norm": 0.48223650455474854, | |
| "kl": 0.10247802734375, | |
| "learning_rate": 1.0061670936044178e-07, | |
| "loss": 0.0648, | |
| "reward": 0.573589576408267, | |
| "reward_std": 0.9578919112682343, | |
| "step": 198 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2948.3854064941406, | |
| "dapo/avg_reward_std": 0.43072181940078735, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.8333333373069763, | |
| "dapo/num_sampling_attempts": 1.25, | |
| "dapo/sampling_efficiency": 87.5, | |
| "dapo/total_prompts_processed": 7.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.22742857142857142, | |
| "grad_norm": 0.6141620874404907, | |
| "kl": 0.09808349609375, | |
| "learning_rate": 1.002741278414069e-07, | |
| "loss": 0.0827, | |
| "reward": 0.7053878791630268, | |
| "reward_std": 0.9694960787892342, | |
| "step": 199 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2714.482666015625, | |
| "dapo/avg_reward_std": 0.26207208441149804, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.37096774914572317, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 30.624999999999993, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.22857142857142856, | |
| "grad_norm": 0.2072688341140747, | |
| "kl": 0.1064453125, | |
| "learning_rate": 1.0006853717962393e-07, | |
| "loss": 0.0122, | |
| "reward": 0.5771910101175308, | |
| "reward_std": 0.9156405553221703, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.22857142857142856, | |
| "step": 200, | |
| "total_flos": 0.0, | |
| "train_loss": 0.02940896774176508, | |
| "train_runtime": 83918.4654, | |
| "train_samples_per_second": 0.114, | |
| "train_steps_per_second": 0.002 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 6, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |