Training in progress, step 39, checkpoint

fda23b8 verified 25 days ago

8.25 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.016586921850079744,
	"eval_steps": 13,
	"global_step": 39,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0004253056884635832,
	"grad_norm": 0.2786005139350891,
	"learning_rate": 5e-06,
	"loss": 0.8579,
	"step": 1
	},
	{
	"epoch": 0.0004253056884635832,
	"eval_loss": 0.9026059508323669,
	"eval_runtime": 91.1092,
	"eval_samples_per_second": 10.866,
	"eval_steps_per_second": 5.433,
	"step": 1
	},
	{
	"epoch": 0.0008506113769271664,
	"grad_norm": 0.2709323465824127,
	"learning_rate": 1e-05,
	"loss": 0.8977,
	"step": 2
	},
	{
	"epoch": 0.0012759170653907496,
	"grad_norm": 0.2819572687149048,
	"learning_rate": 1.5e-05,
	"loss": 0.7939,
	"step": 3
	},
	{
	"epoch": 0.0017012227538543328,
	"grad_norm": 0.29136133193969727,
	"learning_rate": 2e-05,
	"loss": 0.9395,
	"step": 4
	},
	{
	"epoch": 0.002126528442317916,
	"grad_norm": 0.26331567764282227,
	"learning_rate": 2.5e-05,
	"loss": 0.9607,
	"step": 5
	},
	{
	"epoch": 0.002551834130781499,
	"grad_norm": 0.2830480933189392,
	"learning_rate": 3e-05,
	"loss": 0.8113,
	"step": 6
	},
	{
	"epoch": 0.0029771398192450825,
	"grad_norm": 0.26591408252716064,
	"learning_rate": 3.5e-05,
	"loss": 0.8025,
	"step": 7
	},
	{
	"epoch": 0.0034024455077086655,
	"grad_norm": 0.26501235365867615,
	"learning_rate": 4e-05,
	"loss": 0.7995,
	"step": 8
	},
	{
	"epoch": 0.003827751196172249,
	"grad_norm": 0.3157069683074951,
	"learning_rate": 4.5e-05,
	"loss": 0.8294,
	"step": 9
	},
	{
	"epoch": 0.004253056884635832,
	"grad_norm": 0.2955096960067749,
	"learning_rate": 5e-05,
	"loss": 0.8991,
	"step": 10
	},
	{
	"epoch": 0.004678362573099415,
	"grad_norm": 0.34986257553100586,
	"learning_rate": 4.99229333433282e-05,
	"loss": 0.9643,
	"step": 11
	},
	{
	"epoch": 0.005103668261562998,
	"grad_norm": 0.323536217212677,
	"learning_rate": 4.9692208514878444e-05,
	"loss": 0.8256,
	"step": 12
	},
	{
	"epoch": 0.005528973950026581,
	"grad_norm": 0.3692857623100281,
	"learning_rate": 4.9309248009941914e-05,
	"loss": 0.8779,
	"step": 13
	},
	{
	"epoch": 0.005528973950026581,
	"eval_loss": 0.8817930221557617,
	"eval_runtime": 89.8917,
	"eval_samples_per_second": 11.013,
	"eval_steps_per_second": 5.507,
	"step": 13
	},
	{
	"epoch": 0.005954279638490165,
	"grad_norm": 0.4119487404823303,
	"learning_rate": 4.877641290737884e-05,
	"loss": 0.9006,
	"step": 14
	},
	{
	"epoch": 0.006379585326953748,
	"grad_norm": 0.3977135419845581,
	"learning_rate": 4.8096988312782174e-05,
	"loss": 0.9069,
	"step": 15
	},
	{
	"epoch": 0.006804891015417331,
	"grad_norm": 0.45003339648246765,
	"learning_rate": 4.72751631047092e-05,
	"loss": 0.9576,
	"step": 16
	},
	{
	"epoch": 0.007230196703880914,
	"grad_norm": 0.40881747007369995,
	"learning_rate": 4.6316004108852305e-05,
	"loss": 0.7697,
	"step": 17
	},
	{
	"epoch": 0.007655502392344498,
	"grad_norm": 0.5239385962486267,
	"learning_rate": 4.522542485937369e-05,
	"loss": 0.8861,
	"step": 18
	},
	{
	"epoch": 0.00808080808080808,
	"grad_norm": 0.4427054822444916,
	"learning_rate": 4.401014914000078e-05,
	"loss": 0.8403,
	"step": 19
	},
	{
	"epoch": 0.008506113769271665,
	"grad_norm": 0.4336467683315277,
	"learning_rate": 4.267766952966369e-05,
	"loss": 0.8088,
	"step": 20
	},
	{
	"epoch": 0.008931419457735247,
	"grad_norm": 0.46326103806495667,
	"learning_rate": 4.123620120825459e-05,
	"loss": 0.7963,
	"step": 21
	},
	{
	"epoch": 0.00935672514619883,
	"grad_norm": 0.4596688449382782,
	"learning_rate": 3.969463130731183e-05,
	"loss": 0.7968,
	"step": 22
	},
	{
	"epoch": 0.009782030834662414,
	"grad_norm": 0.4667969346046448,
	"learning_rate": 3.8062464117898724e-05,
	"loss": 0.8305,
	"step": 23
	},
	{
	"epoch": 0.010207336523125997,
	"grad_norm": 0.3937626779079437,
	"learning_rate": 3.634976249348867e-05,
	"loss": 0.7507,
	"step": 24
	},
	{
	"epoch": 0.01063264221158958,
	"grad_norm": 0.41750970482826233,
	"learning_rate": 3.456708580912725e-05,
	"loss": 0.785,
	"step": 25
	},
	{
	"epoch": 0.011057947900053162,
	"grad_norm": 0.415423721075058,
	"learning_rate": 3.272542485937369e-05,
	"loss": 0.8375,
	"step": 26
	},
	{
	"epoch": 0.011057947900053162,
	"eval_loss": 0.7622473239898682,
	"eval_runtime": 89.8141,
	"eval_samples_per_second": 11.023,
	"eval_steps_per_second": 5.511,
	"step": 26
	},
	{
	"epoch": 0.011483253588516746,
	"grad_norm": 0.3313671350479126,
	"learning_rate": 3.083613409639764e-05,
	"loss": 0.7642,
	"step": 27
	},
	{
	"epoch": 0.01190855927698033,
	"grad_norm": 0.37808409333229065,
	"learning_rate": 2.8910861626005776e-05,
	"loss": 0.7655,
	"step": 28
	},
	{
	"epoch": 0.012333864965443912,
	"grad_norm": 0.3844619691371918,
	"learning_rate": 2.6961477393196126e-05,
	"loss": 0.8025,
	"step": 29
	},
	{
	"epoch": 0.012759170653907496,
	"grad_norm": 0.37186184525489807,
	"learning_rate": 2.5e-05,
	"loss": 0.8364,
	"step": 30
	},
	{
	"epoch": 0.01318447634237108,
	"grad_norm": 0.310320645570755,
	"learning_rate": 2.303852260680388e-05,
	"loss": 0.6388,
	"step": 31
	},
	{
	"epoch": 0.013609782030834662,
	"grad_norm": 0.3105718493461609,
	"learning_rate": 2.1089138373994223e-05,
	"loss": 0.7279,
	"step": 32
	},
	{
	"epoch": 0.014035087719298246,
	"grad_norm": 0.3364258110523224,
	"learning_rate": 1.9163865903602374e-05,
	"loss": 0.7568,
	"step": 33
	},
	{
	"epoch": 0.014460393407761828,
	"grad_norm": 0.34743359684944153,
	"learning_rate": 1.7274575140626318e-05,
	"loss": 0.7706,
	"step": 34
	},
	{
	"epoch": 0.014885699096225412,
	"grad_norm": 0.29443031549453735,
	"learning_rate": 1.5432914190872757e-05,
	"loss": 0.7154,
	"step": 35
	},
	{
	"epoch": 0.015311004784688996,
	"grad_norm": 0.3357723653316498,
	"learning_rate": 1.3650237506511331e-05,
	"loss": 0.752,
	"step": 36
	},
	{
	"epoch": 0.01573631047315258,
	"grad_norm": 0.2919500470161438,
	"learning_rate": 1.1937535882101281e-05,
	"loss": 0.7965,
	"step": 37
	},
	{
	"epoch": 0.01616161616161616,
	"grad_norm": 0.27403074502944946,
	"learning_rate": 1.0305368692688174e-05,
	"loss": 0.7265,
	"step": 38
	},
	{
	"epoch": 0.016586921850079744,
	"grad_norm": 0.27928847074508667,
	"learning_rate": 8.763798791745411e-06,
	"loss": 0.6562,
	"step": 39
	},
	{
	"epoch": 0.016586921850079744,
	"eval_loss": 0.7195438146591187,
	"eval_runtime": 90.1408,
	"eval_samples_per_second": 10.983,
	"eval_steps_per_second": 5.491,
	"step": 39
	}
	],
	"logging_steps": 1,
	"max_steps": 50,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 13,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 2.366738559664128e+16,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}