finsight-ai / trainer_state.json

Update README.md to include detailed model information for FinSight AI, a financial advisory chatbot. Added sections on model details, usage examples, training details, limitations, and future improvements. Changed license from Apache-2.0 to MIT and updated language and tags for better categorization.

da00d1b 1 day ago

raw

history blame contribute delete

8.17 kB

	{
	"best_metric": 1.5792005062103271,
	"best_model_checkpoint": "qlora_output/checkpoint-1200",
	"epoch": 1.4679393049437102,
	"eval_steps": 600,
	"global_step": 1500,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.03915810083210964,
	"grad_norm": 0.07319402694702148,
	"learning_rate": 3.555555555555556e-05,
	"loss": 2.4428,
	"step": 40
	},
	{
	"epoch": 0.07831620166421928,
	"grad_norm": 0.04330237954854965,
	"learning_rate": 7.111111111111112e-05,
	"loss": 2.268,
	"step": 80
	},
	{
	"epoch": 0.11747430249632893,
	"grad_norm": 0.05867455527186394,
	"learning_rate": 0.00010666666666666667,
	"loss": 2.1806,
	"step": 120
	},
	{
	"epoch": 0.15663240332843856,
	"grad_norm": 0.06936266273260117,
	"learning_rate": 0.00014222222222222224,
	"loss": 2.0778,
	"step": 160
	},
	{
	"epoch": 0.19579050416054822,
	"grad_norm": 0.08056484907865524,
	"learning_rate": 0.00017777777777777779,
	"loss": 2.0382,
	"step": 200
	},
	{
	"epoch": 0.23494860499265785,
	"grad_norm": 0.0779654011130333,
	"learning_rate": 0.0001999317060143023,
	"loss": 1.9227,
	"step": 240
	},
	{
	"epoch": 0.2741067058247675,
	"grad_norm": 0.11802724003791809,
	"learning_rate": 0.00019908312530915603,
	"loss": 1.9139,
	"step": 280
	},
	{
	"epoch": 0.3132648066568771,
	"grad_norm": 0.0852489247918129,
	"learning_rate": 0.00019727282722446047,
	"loss": 1.9423,
	"step": 320
	},
	{
	"epoch": 0.3524229074889868,
	"grad_norm": 0.1409972459077835,
	"learning_rate": 0.00019451838281608197,
	"loss": 1.8484,
	"step": 360
	},
	{
	"epoch": 0.39158100832109644,
	"grad_norm": 0.11129080504179001,
	"learning_rate": 0.00019084652718195238,
	"loss": 1.7694,
	"step": 400
	},
	{
	"epoch": 0.43073910915320607,
	"grad_norm": 0.10179898887872696,
	"learning_rate": 0.00018629289996673897,
	"loss": 1.8026,
	"step": 440
	},
	{
	"epoch": 0.4698972099853157,
	"grad_norm": 0.14124783873558044,
	"learning_rate": 0.00018090169943749476,
	"loss": 1.8217,
	"step": 480
	},
	{
	"epoch": 0.5090553108174254,
	"grad_norm": 0.16184218227863312,
	"learning_rate": 0.0001747252534878891,
	"loss": 1.7847,
	"step": 520
	},
	{
	"epoch": 0.548213411649535,
	"grad_norm": 0.11349498480558395,
	"learning_rate": 0.00016782351173492342,
	"loss": 1.6622,
	"step": 560
	},
	{
	"epoch": 0.5873715124816447,
	"grad_norm": 0.08884529024362564,
	"learning_rate": 0.00016026346363792567,
	"loss": 1.7633,
	"step": 600
	},
	{
	"epoch": 0.5873715124816447,
	"eval_loss": 1.6572695970535278,
	"eval_runtime": 1912.2507,
	"eval_samples_per_second": 1.425,
	"eval_steps_per_second": 0.713,
	"step": 600
	},
	{
	"epoch": 0.6265296133137542,
	"grad_norm": 0.09996389597654343,
	"learning_rate": 0.0001521184882876585,
	"loss": 1.6764,
	"step": 640
	},
	{
	"epoch": 0.6656877141458639,
	"grad_norm": 0.12769252061843872,
	"learning_rate": 0.00014346764217659653,
	"loss": 1.7871,
	"step": 680
	},
	{
	"epoch": 0.7048458149779736,
	"grad_norm": 0.13380451500415802,
	"learning_rate": 0.00013439489186339282,
	"loss": 1.7167,
	"step": 720
	},
	{
	"epoch": 0.7440039158100832,
	"grad_norm": 0.11822285503149033,
	"learning_rate": 0.0001249882989794231,
	"loss": 1.6789,
	"step": 760
	},
	{
	"epoch": 0.7831620166421929,
	"grad_norm": 0.12109290808439255,
	"learning_rate": 0.00011533916548786857,
	"loss": 1.583,
	"step": 800
	},
	{
	"epoch": 0.8223201174743024,
	"grad_norm": 0.12838001549243927,
	"learning_rate": 0.000105541147491597,
	"loss": 1.7412,
	"step": 840
	},
	{
	"epoch": 0.8614782183064121,
	"grad_norm": 0.16042716801166534,
	"learning_rate": 9.568934619137046e-05,
	"loss": 1.6519,
	"step": 880
	},
	{
	"epoch": 0.9006363191385218,
	"grad_norm": 0.1427149474620819,
	"learning_rate": 8.587938481769089e-05,
	"loss": 1.6598,
	"step": 920
	},
	{
	"epoch": 0.9397944199706314,
	"grad_norm": 0.118178591132164,
	"learning_rate": 7.620648049573815e-05,
	"loss": 1.7378,
	"step": 960
	},
	{
	"epoch": 0.9789525208027411,
	"grad_norm": 0.1253277212381363,
	"learning_rate": 6.676452005203406e-05,
	"loss": 1.6451,
	"step": 1000
	},
	{
	"epoch": 1.0176211453744493,
	"grad_norm": 0.15462452173233032,
	"learning_rate": 5.764514873320761e-05,
	"loss": 1.6475,
	"step": 1040
	},
	{
	"epoch": 1.056779246206559,
	"grad_norm": 0.106235072016716,
	"learning_rate": 4.893688068190932e-05,
	"loss": 1.6686,
	"step": 1080
	},
	{
	"epoch": 1.0959373470386686,
	"grad_norm": 0.09717393666505814,
	"learning_rate": 4.072423980374452e-05,
	"loss": 1.6824,
	"step": 1120
	},
	{
	"epoch": 1.1350954478707782,
	"grad_norm": 0.13711334764957428,
	"learning_rate": 3.308693936411421e-05,
	"loss": 1.6147,
	"step": 1160
	},
	{
	"epoch": 1.174253548702888,
	"grad_norm": 0.1265803724527359,
	"learning_rate": 2.6099108277934103e-05,
	"loss": 1.6174,
	"step": 1200
	},
	{
	"epoch": 1.174253548702888,
	"eval_loss": 1.5792005062103271,
	"eval_runtime": 1903.0333,
	"eval_samples_per_second": 1.432,
	"eval_steps_per_second": 0.716,
	"step": 1200
	},
	{
	"epoch": 1.2134116495349976,
	"grad_norm": 0.09578167647123337,
	"learning_rate": 1.982857160199334e-05,
	"loss": 1.6246,
	"step": 1240
	},
	{
	"epoch": 1.2525697503671072,
	"grad_norm": 0.14227357506752014,
	"learning_rate": 1.4336192213613742e-05,
	"loss": 1.5548,
	"step": 1280
	},
	{
	"epoch": 1.2917278511992167,
	"grad_norm": 0.1526080220937729,
	"learning_rate": 9.675280065387116e-06,
	"loss": 1.5454,
	"step": 1320
	},
	{
	"epoch": 1.3308859520313265,
	"grad_norm": 0.17356757819652557,
	"learning_rate": 5.891074749862857e-06,
	"loss": 1.5555,
	"step": 1360
	},
	{
	"epoch": 1.3700440528634361,
	"grad_norm": 0.1258653849363327,
	"learning_rate": 3.0203063964990617e-06,
	"loss": 1.5775,
	"step": 1400
	},
	{
	"epoch": 1.4092021536955457,
	"grad_norm": 0.12249883264303207,
	"learning_rate": 1.0908391628854041e-06,
	"loss": 1.5619,
	"step": 1440
	},
	{
	"epoch": 1.4483602545276555,
	"grad_norm": 0.1455027014017105,
	"learning_rate": 1.2140078057101266e-07,
	"loss": 1.5342,
	"step": 1480
	},
	{
	"epoch": 1.4679393049437102,
	"step": 1500,
	"total_flos": 1.105565365842985e+17,
	"train_loss": 1.763797264099121,
	"train_runtime": 30390.5778,
	"train_samples_per_second": 0.395,
	"train_steps_per_second": 0.049
	}
	],
	"logging_steps": 40,
	"max_steps": 1500,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 2,
	"save_steps": 600,
	"stateful_callbacks": {
	"EarlyStoppingCallback": {
	"args": {
	"early_stopping_patience": 3,
	"early_stopping_threshold": 0.05
	},
	"attributes": {
	"early_stopping_patience_counter": 0
	}
	},
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.105565365842985e+17,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}