{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9795918367346939, "eval_steps": 500, "global_step": 24, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "completion_length": 394.7544860839844, "epoch": 0.04081632653061224, "grad_norm": 0.5584128667460793, "kl": 0.0, "learning_rate": 1.6666666666666667e-06, "loss": 0.0001, "reward": 1.335937574505806, "reward_std": 0.6638279929757118, "rewards/format_reward": 0.09375000419095159, "rewards/instruction_follow_reward": 0.2232143022119999, "rewards/tag_count_reward": 0.572544664144516, "step": 1 }, { "completion_length": 407.3616256713867, "epoch": 0.08163265306122448, "grad_norm": 0.5809418852960418, "kl": 0.0, "learning_rate": 3.3333333333333333e-06, "loss": 0.0, "reward": 1.359151840209961, "reward_std": 0.7929508537054062, "rewards/format_reward": 0.10267857555299997, "rewards/instruction_follow_reward": 0.23392858356237411, "rewards/tag_count_reward": 0.5546875149011612, "step": 2 }, { "completion_length": 425.6607360839844, "epoch": 0.12244897959183673, "grad_norm": 0.5488272016876289, "kl": 0.0003504753112792969, "learning_rate": 5e-06, "loss": 0.0, "reward": 1.3707590103149414, "reward_std": 0.8819468468427658, "rewards/format_reward": 0.11160714738070965, "rewards/instruction_follow_reward": 0.23184525407850742, "rewards/tag_count_reward": 0.5636160969734192, "step": 3 }, { "completion_length": 394.3437728881836, "epoch": 0.16326530612244897, "grad_norm": 0.6044636536170302, "kl": 0.001239776611328125, "learning_rate": 4.97486935900654e-06, "loss": 0.0001, "reward": 1.467633992433548, "reward_std": 0.8601260483264923, "rewards/format_reward": 0.19642858393490314, "rewards/instruction_follow_reward": 0.2220982275903225, "rewards/tag_count_reward": 0.6049107313156128, "step": 4 }, { "completion_length": 416.5223388671875, "epoch": 0.20408163265306123, "grad_norm": 0.5931555061780838, "kl": 0.01104736328125, "learning_rate": 4.900038813018817e-06, "loss": 0.0004, "reward": 1.9301143288612366, "reward_std": 0.8981596827507019, "rewards/format_reward": 0.5133928880095482, "rewards/instruction_follow_reward": 0.21628808975219727, "rewards/tag_count_reward": 0.767857164144516, "step": 5 }, { "completion_length": 420.4241256713867, "epoch": 0.24489795918367346, "grad_norm": 0.6761697930052907, "kl": 0.03021240234375, "learning_rate": 4.777179952780443e-06, "loss": 0.0012, "reward": 2.2354912161827087, "reward_std": 0.8338466733694077, "rewards/format_reward": 0.7232143133878708, "rewards/instruction_follow_reward": 0.2187500111758709, "rewards/tag_count_reward": 0.856026828289032, "step": 6 }, { "completion_length": 394.3393096923828, "epoch": 0.2857142857142857, "grad_norm": 2.459773044170621, "kl": 0.13006591796875, "learning_rate": 4.609037242210989e-06, "loss": 0.0052, "reward": 2.178348273038864, "reward_std": 1.000592678785324, "rewards/format_reward": 0.7544643133878708, "rewards/instruction_follow_reward": 0.18854167126119137, "rewards/tag_count_reward": 0.8582589477300644, "step": 7 }, { "completion_length": 365.56697845458984, "epoch": 0.32653061224489793, "grad_norm": 0.6203686756559748, "kl": 0.0291900634765625, "learning_rate": 4.39936671161711e-06, "loss": 0.0012, "reward": 2.3113840222358704, "reward_std": 0.9184626936912537, "rewards/format_reward": 0.7098214477300644, "rewards/instruction_follow_reward": 0.2444196529686451, "rewards/tag_count_reward": 0.8683035969734192, "step": 8 }, { "completion_length": 381.06251525878906, "epoch": 0.3673469387755102, "grad_norm": 0.5837189440797563, "kl": 0.016021728515625, "learning_rate": 4.152852054182151e-06, "loss": 0.0006, "reward": 2.10881707072258, "reward_std": 0.9214754402637482, "rewards/format_reward": 0.7008928954601288, "rewards/instruction_follow_reward": 0.1835937611758709, "rewards/tag_count_reward": 0.8571428954601288, "step": 9 }, { "completion_length": 352.6785888671875, "epoch": 0.40816326530612246, "grad_norm": 0.6626187128387949, "kl": 0.0197906494140625, "learning_rate": 3.875e-06, "loss": 0.0008, "reward": 2.1169643998146057, "reward_std": 0.8091428875923157, "rewards/format_reward": 0.6607143133878708, "rewards/instruction_follow_reward": 0.1985863298177719, "rewards/tag_count_reward": 0.8604911118745804, "step": 10 }, { "completion_length": 369.41966247558594, "epoch": 0.4489795918367347, "grad_norm": 0.6348377712692719, "kl": 0.0179595947265625, "learning_rate": 3.5720173048243896e-06, "loss": 0.0007, "reward": 2.5970983505249023, "reward_std": 1.0680624097585678, "rewards/format_reward": 0.6026786118745804, "rewards/instruction_follow_reward": 0.388392873108387, "rewards/tag_count_reward": 0.8292410969734192, "step": 11 }, { "completion_length": 375.0268020629883, "epoch": 0.4897959183673469, "grad_norm": 0.6179040419604938, "kl": 0.0157623291015625, "learning_rate": 3.2506721014017075e-06, "loss": 0.0006, "reward": 2.2924107909202576, "reward_std": 1.0153658390045166, "rewards/format_reward": 0.6071428805589676, "rewards/instruction_follow_reward": 0.2786458507180214, "rewards/tag_count_reward": 0.84933041036129, "step": 12 }, { "completion_length": 381.9241256713867, "epoch": 0.5306122448979592, "grad_norm": 0.6671138772835987, "kl": 0.03009033203125, "learning_rate": 2.918142710569455e-06, "loss": 0.0012, "reward": 2.4388394355773926, "reward_std": 1.0967597514390945, "rewards/format_reward": 0.6205357313156128, "rewards/instruction_follow_reward": 0.32633931189775467, "rewards/tag_count_reward": 0.8392857313156128, "step": 13 }, { "completion_length": 363.4107208251953, "epoch": 0.5714285714285714, "grad_norm": 0.6026307189636135, "kl": 0.017669677734375, "learning_rate": 2.5818572894305453e-06, "loss": 0.0007, "reward": 2.404017984867096, "reward_std": 1.1934520304203033, "rewards/format_reward": 0.5714285969734192, "rewards/instruction_follow_reward": 0.3214285895228386, "rewards/tag_count_reward": 0.8683036118745804, "step": 14 }, { "completion_length": 424.05358123779297, "epoch": 0.6122448979591837, "grad_norm": 0.5671014284506704, "kl": 0.0147705078125, "learning_rate": 2.2493278985982932e-06, "loss": 0.0006, "reward": 2.2103636860847473, "reward_std": 1.0440057069063187, "rewards/format_reward": 0.651785746216774, "rewards/instruction_follow_reward": 0.2267431989312172, "rewards/tag_count_reward": 0.8783482760190964, "step": 15 }, { "completion_length": 361.37947845458984, "epoch": 0.6530612244897959, "grad_norm": 0.6208470872600845, "kl": 0.019134521484375, "learning_rate": 1.9279826951756115e-06, "loss": 0.0008, "reward": 2.465401828289032, "reward_std": 1.0677553862333298, "rewards/format_reward": 0.6964285969734192, "rewards/instruction_follow_reward": 0.2924107313156128, "rewards/tag_count_reward": 0.8917411118745804, "step": 16 }, { "completion_length": 367.0848388671875, "epoch": 0.6938775510204082, "grad_norm": 0.6513303765560159, "kl": 0.0290679931640625, "learning_rate": 1.6250000000000007e-06, "loss": 0.0012, "reward": 2.10491082072258, "reward_std": 0.9648873805999756, "rewards/format_reward": 0.6339286118745804, "rewards/instruction_follow_reward": 0.19642857927829027, "rewards/tag_count_reward": 0.8816964775323868, "step": 17 }, { "completion_length": 378.62501525878906, "epoch": 0.7346938775510204, "grad_norm": 0.5707111135110878, "kl": 0.0172271728515625, "learning_rate": 1.3471479458178499e-06, "loss": 0.0007, "reward": 2.5422155261039734, "reward_std": 0.8247152641415596, "rewards/format_reward": 0.7812500298023224, "rewards/instruction_follow_reward": 0.2793247886002064, "rewards/tag_count_reward": 0.9229911118745804, "step": 18 }, { "completion_length": 354.74108123779297, "epoch": 0.7755102040816326, "grad_norm": 0.5544484847237519, "kl": 0.023040771484375, "learning_rate": 1.1006332883828912e-06, "loss": 0.0009, "reward": 2.662946581840515, "reward_std": 1.0467701256275177, "rewards/format_reward": 0.7366071790456772, "rewards/instruction_follow_reward": 0.3437500149011612, "rewards/tag_count_reward": 0.895089328289032, "step": 19 }, { "completion_length": 383.00447845458984, "epoch": 0.8163265306122449, "grad_norm": 0.510823042158658, "kl": 0.0205535888671875, "learning_rate": 8.909627577890121e-07, "loss": 0.0008, "reward": 2.3368303775787354, "reward_std": 0.9361487179994583, "rewards/format_reward": 0.7633928954601288, "rewards/instruction_follow_reward": 0.22500000894069672, "rewards/tag_count_reward": 0.8984375596046448, "step": 20 }, { "completion_length": 357.4151916503906, "epoch": 0.8571428571428571, "grad_norm": 0.5515191742215413, "kl": 0.0199737548828125, "learning_rate": 7.228200472195574e-07, "loss": 0.0008, "reward": 2.494419753551483, "reward_std": 0.9745698273181915, "rewards/format_reward": 0.7767857611179352, "rewards/instruction_follow_reward": 0.2700892984867096, "rewards/tag_count_reward": 0.9073661118745804, "step": 21 }, { "completion_length": 352.87947845458984, "epoch": 0.8979591836734694, "grad_norm": 0.6878725311389932, "kl": 0.036224365234375, "learning_rate": 5.999611869811834e-07, "loss": 0.0014, "reward": 2.4720982909202576, "reward_std": 0.9217582643032074, "rewards/format_reward": 0.7276785969734192, "rewards/instruction_follow_reward": 0.2857142984867096, "rewards/tag_count_reward": 0.887276828289032, "step": 22 }, { "completion_length": 373.3035888671875, "epoch": 0.9387755102040817, "grad_norm": 0.5789373606479209, "kl": 0.02337646484375, "learning_rate": 5.251306409934609e-07, "loss": 0.0009, "reward": 2.023437589406967, "reward_std": 0.9313821792602539, "rewards/format_reward": 0.7500000149011612, "rewards/instruction_follow_reward": 0.12946429289877415, "rewards/tag_count_reward": 0.8850446790456772, "step": 23 }, { "completion_length": 393.03572845458984, "epoch": 0.9795918367346939, "grad_norm": 0.59441068526159, "kl": 0.03009033203125, "learning_rate": 5.000000000000001e-07, "loss": 0.0012, "reward": 2.5097524523735046, "reward_std": 0.8981894552707672, "rewards/format_reward": 0.7857143133878708, "rewards/instruction_follow_reward": 0.2729680556803942, "rewards/tag_count_reward": 0.9051339775323868, "step": 24 }, { "epoch": 0.9795918367346939, "step": 24, "total_flos": 0.0, "train_loss": 0.0009309215234963494, "train_runtime": 2265.8224, "train_samples_per_second": 0.345, "train_steps_per_second": 0.011 } ], "logging_steps": 1, "max_steps": 24, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }