finsight-ai / trainer_state.json
zahemen9900's picture
Update README.md to include detailed model information for FinSight AI, a financial advisory chatbot. Added sections on model details, usage examples, training details, limitations, and future improvements. Changed license from Apache-2.0 to MIT and updated language and tags for better categorization.
da00d1b
{
"best_metric": 1.5792005062103271,
"best_model_checkpoint": "qlora_output/checkpoint-1200",
"epoch": 1.4679393049437102,
"eval_steps": 600,
"global_step": 1500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03915810083210964,
"grad_norm": 0.07319402694702148,
"learning_rate": 3.555555555555556e-05,
"loss": 2.4428,
"step": 40
},
{
"epoch": 0.07831620166421928,
"grad_norm": 0.04330237954854965,
"learning_rate": 7.111111111111112e-05,
"loss": 2.268,
"step": 80
},
{
"epoch": 0.11747430249632893,
"grad_norm": 0.05867455527186394,
"learning_rate": 0.00010666666666666667,
"loss": 2.1806,
"step": 120
},
{
"epoch": 0.15663240332843856,
"grad_norm": 0.06936266273260117,
"learning_rate": 0.00014222222222222224,
"loss": 2.0778,
"step": 160
},
{
"epoch": 0.19579050416054822,
"grad_norm": 0.08056484907865524,
"learning_rate": 0.00017777777777777779,
"loss": 2.0382,
"step": 200
},
{
"epoch": 0.23494860499265785,
"grad_norm": 0.0779654011130333,
"learning_rate": 0.0001999317060143023,
"loss": 1.9227,
"step": 240
},
{
"epoch": 0.2741067058247675,
"grad_norm": 0.11802724003791809,
"learning_rate": 0.00019908312530915603,
"loss": 1.9139,
"step": 280
},
{
"epoch": 0.3132648066568771,
"grad_norm": 0.0852489247918129,
"learning_rate": 0.00019727282722446047,
"loss": 1.9423,
"step": 320
},
{
"epoch": 0.3524229074889868,
"grad_norm": 0.1409972459077835,
"learning_rate": 0.00019451838281608197,
"loss": 1.8484,
"step": 360
},
{
"epoch": 0.39158100832109644,
"grad_norm": 0.11129080504179001,
"learning_rate": 0.00019084652718195238,
"loss": 1.7694,
"step": 400
},
{
"epoch": 0.43073910915320607,
"grad_norm": 0.10179898887872696,
"learning_rate": 0.00018629289996673897,
"loss": 1.8026,
"step": 440
},
{
"epoch": 0.4698972099853157,
"grad_norm": 0.14124783873558044,
"learning_rate": 0.00018090169943749476,
"loss": 1.8217,
"step": 480
},
{
"epoch": 0.5090553108174254,
"grad_norm": 0.16184218227863312,
"learning_rate": 0.0001747252534878891,
"loss": 1.7847,
"step": 520
},
{
"epoch": 0.548213411649535,
"grad_norm": 0.11349498480558395,
"learning_rate": 0.00016782351173492342,
"loss": 1.6622,
"step": 560
},
{
"epoch": 0.5873715124816447,
"grad_norm": 0.08884529024362564,
"learning_rate": 0.00016026346363792567,
"loss": 1.7633,
"step": 600
},
{
"epoch": 0.5873715124816447,
"eval_loss": 1.6572695970535278,
"eval_runtime": 1912.2507,
"eval_samples_per_second": 1.425,
"eval_steps_per_second": 0.713,
"step": 600
},
{
"epoch": 0.6265296133137542,
"grad_norm": 0.09996389597654343,
"learning_rate": 0.0001521184882876585,
"loss": 1.6764,
"step": 640
},
{
"epoch": 0.6656877141458639,
"grad_norm": 0.12769252061843872,
"learning_rate": 0.00014346764217659653,
"loss": 1.7871,
"step": 680
},
{
"epoch": 0.7048458149779736,
"grad_norm": 0.13380451500415802,
"learning_rate": 0.00013439489186339282,
"loss": 1.7167,
"step": 720
},
{
"epoch": 0.7440039158100832,
"grad_norm": 0.11822285503149033,
"learning_rate": 0.0001249882989794231,
"loss": 1.6789,
"step": 760
},
{
"epoch": 0.7831620166421929,
"grad_norm": 0.12109290808439255,
"learning_rate": 0.00011533916548786857,
"loss": 1.583,
"step": 800
},
{
"epoch": 0.8223201174743024,
"grad_norm": 0.12838001549243927,
"learning_rate": 0.000105541147491597,
"loss": 1.7412,
"step": 840
},
{
"epoch": 0.8614782183064121,
"grad_norm": 0.16042716801166534,
"learning_rate": 9.568934619137046e-05,
"loss": 1.6519,
"step": 880
},
{
"epoch": 0.9006363191385218,
"grad_norm": 0.1427149474620819,
"learning_rate": 8.587938481769089e-05,
"loss": 1.6598,
"step": 920
},
{
"epoch": 0.9397944199706314,
"grad_norm": 0.118178591132164,
"learning_rate": 7.620648049573815e-05,
"loss": 1.7378,
"step": 960
},
{
"epoch": 0.9789525208027411,
"grad_norm": 0.1253277212381363,
"learning_rate": 6.676452005203406e-05,
"loss": 1.6451,
"step": 1000
},
{
"epoch": 1.0176211453744493,
"grad_norm": 0.15462452173233032,
"learning_rate": 5.764514873320761e-05,
"loss": 1.6475,
"step": 1040
},
{
"epoch": 1.056779246206559,
"grad_norm": 0.106235072016716,
"learning_rate": 4.893688068190932e-05,
"loss": 1.6686,
"step": 1080
},
{
"epoch": 1.0959373470386686,
"grad_norm": 0.09717393666505814,
"learning_rate": 4.072423980374452e-05,
"loss": 1.6824,
"step": 1120
},
{
"epoch": 1.1350954478707782,
"grad_norm": 0.13711334764957428,
"learning_rate": 3.308693936411421e-05,
"loss": 1.6147,
"step": 1160
},
{
"epoch": 1.174253548702888,
"grad_norm": 0.1265803724527359,
"learning_rate": 2.6099108277934103e-05,
"loss": 1.6174,
"step": 1200
},
{
"epoch": 1.174253548702888,
"eval_loss": 1.5792005062103271,
"eval_runtime": 1903.0333,
"eval_samples_per_second": 1.432,
"eval_steps_per_second": 0.716,
"step": 1200
},
{
"epoch": 1.2134116495349976,
"grad_norm": 0.09578167647123337,
"learning_rate": 1.982857160199334e-05,
"loss": 1.6246,
"step": 1240
},
{
"epoch": 1.2525697503671072,
"grad_norm": 0.14227357506752014,
"learning_rate": 1.4336192213613742e-05,
"loss": 1.5548,
"step": 1280
},
{
"epoch": 1.2917278511992167,
"grad_norm": 0.1526080220937729,
"learning_rate": 9.675280065387116e-06,
"loss": 1.5454,
"step": 1320
},
{
"epoch": 1.3308859520313265,
"grad_norm": 0.17356757819652557,
"learning_rate": 5.891074749862857e-06,
"loss": 1.5555,
"step": 1360
},
{
"epoch": 1.3700440528634361,
"grad_norm": 0.1258653849363327,
"learning_rate": 3.0203063964990617e-06,
"loss": 1.5775,
"step": 1400
},
{
"epoch": 1.4092021536955457,
"grad_norm": 0.12249883264303207,
"learning_rate": 1.0908391628854041e-06,
"loss": 1.5619,
"step": 1440
},
{
"epoch": 1.4483602545276555,
"grad_norm": 0.1455027014017105,
"learning_rate": 1.2140078057101266e-07,
"loss": 1.5342,
"step": 1480
},
{
"epoch": 1.4679393049437102,
"step": 1500,
"total_flos": 1.105565365842985e+17,
"train_loss": 1.763797264099121,
"train_runtime": 30390.5778,
"train_samples_per_second": 0.395,
"train_steps_per_second": 0.049
}
],
"logging_steps": 40,
"max_steps": 1500,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 600,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.05
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.105565365842985e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}