Update README.md to include detailed model information for FinSight AI, a financial advisory chatbot. Added sections on model details, usage examples, training details, limitations, and future improvements. Changed license from Apache-2.0 to MIT and updated language and tags for better categorization.
da00d1b
{ | |
"best_metric": 1.5792005062103271, | |
"best_model_checkpoint": "qlora_output/checkpoint-1200", | |
"epoch": 1.4679393049437102, | |
"eval_steps": 600, | |
"global_step": 1500, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.03915810083210964, | |
"grad_norm": 0.07319402694702148, | |
"learning_rate": 3.555555555555556e-05, | |
"loss": 2.4428, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.07831620166421928, | |
"grad_norm": 0.04330237954854965, | |
"learning_rate": 7.111111111111112e-05, | |
"loss": 2.268, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.11747430249632893, | |
"grad_norm": 0.05867455527186394, | |
"learning_rate": 0.00010666666666666667, | |
"loss": 2.1806, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.15663240332843856, | |
"grad_norm": 0.06936266273260117, | |
"learning_rate": 0.00014222222222222224, | |
"loss": 2.0778, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.19579050416054822, | |
"grad_norm": 0.08056484907865524, | |
"learning_rate": 0.00017777777777777779, | |
"loss": 2.0382, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.23494860499265785, | |
"grad_norm": 0.0779654011130333, | |
"learning_rate": 0.0001999317060143023, | |
"loss": 1.9227, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.2741067058247675, | |
"grad_norm": 0.11802724003791809, | |
"learning_rate": 0.00019908312530915603, | |
"loss": 1.9139, | |
"step": 280 | |
}, | |
{ | |
"epoch": 0.3132648066568771, | |
"grad_norm": 0.0852489247918129, | |
"learning_rate": 0.00019727282722446047, | |
"loss": 1.9423, | |
"step": 320 | |
}, | |
{ | |
"epoch": 0.3524229074889868, | |
"grad_norm": 0.1409972459077835, | |
"learning_rate": 0.00019451838281608197, | |
"loss": 1.8484, | |
"step": 360 | |
}, | |
{ | |
"epoch": 0.39158100832109644, | |
"grad_norm": 0.11129080504179001, | |
"learning_rate": 0.00019084652718195238, | |
"loss": 1.7694, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.43073910915320607, | |
"grad_norm": 0.10179898887872696, | |
"learning_rate": 0.00018629289996673897, | |
"loss": 1.8026, | |
"step": 440 | |
}, | |
{ | |
"epoch": 0.4698972099853157, | |
"grad_norm": 0.14124783873558044, | |
"learning_rate": 0.00018090169943749476, | |
"loss": 1.8217, | |
"step": 480 | |
}, | |
{ | |
"epoch": 0.5090553108174254, | |
"grad_norm": 0.16184218227863312, | |
"learning_rate": 0.0001747252534878891, | |
"loss": 1.7847, | |
"step": 520 | |
}, | |
{ | |
"epoch": 0.548213411649535, | |
"grad_norm": 0.11349498480558395, | |
"learning_rate": 0.00016782351173492342, | |
"loss": 1.6622, | |
"step": 560 | |
}, | |
{ | |
"epoch": 0.5873715124816447, | |
"grad_norm": 0.08884529024362564, | |
"learning_rate": 0.00016026346363792567, | |
"loss": 1.7633, | |
"step": 600 | |
}, | |
{ | |
"epoch": 0.5873715124816447, | |
"eval_loss": 1.6572695970535278, | |
"eval_runtime": 1912.2507, | |
"eval_samples_per_second": 1.425, | |
"eval_steps_per_second": 0.713, | |
"step": 600 | |
}, | |
{ | |
"epoch": 0.6265296133137542, | |
"grad_norm": 0.09996389597654343, | |
"learning_rate": 0.0001521184882876585, | |
"loss": 1.6764, | |
"step": 640 | |
}, | |
{ | |
"epoch": 0.6656877141458639, | |
"grad_norm": 0.12769252061843872, | |
"learning_rate": 0.00014346764217659653, | |
"loss": 1.7871, | |
"step": 680 | |
}, | |
{ | |
"epoch": 0.7048458149779736, | |
"grad_norm": 0.13380451500415802, | |
"learning_rate": 0.00013439489186339282, | |
"loss": 1.7167, | |
"step": 720 | |
}, | |
{ | |
"epoch": 0.7440039158100832, | |
"grad_norm": 0.11822285503149033, | |
"learning_rate": 0.0001249882989794231, | |
"loss": 1.6789, | |
"step": 760 | |
}, | |
{ | |
"epoch": 0.7831620166421929, | |
"grad_norm": 0.12109290808439255, | |
"learning_rate": 0.00011533916548786857, | |
"loss": 1.583, | |
"step": 800 | |
}, | |
{ | |
"epoch": 0.8223201174743024, | |
"grad_norm": 0.12838001549243927, | |
"learning_rate": 0.000105541147491597, | |
"loss": 1.7412, | |
"step": 840 | |
}, | |
{ | |
"epoch": 0.8614782183064121, | |
"grad_norm": 0.16042716801166534, | |
"learning_rate": 9.568934619137046e-05, | |
"loss": 1.6519, | |
"step": 880 | |
}, | |
{ | |
"epoch": 0.9006363191385218, | |
"grad_norm": 0.1427149474620819, | |
"learning_rate": 8.587938481769089e-05, | |
"loss": 1.6598, | |
"step": 920 | |
}, | |
{ | |
"epoch": 0.9397944199706314, | |
"grad_norm": 0.118178591132164, | |
"learning_rate": 7.620648049573815e-05, | |
"loss": 1.7378, | |
"step": 960 | |
}, | |
{ | |
"epoch": 0.9789525208027411, | |
"grad_norm": 0.1253277212381363, | |
"learning_rate": 6.676452005203406e-05, | |
"loss": 1.6451, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 1.0176211453744493, | |
"grad_norm": 0.15462452173233032, | |
"learning_rate": 5.764514873320761e-05, | |
"loss": 1.6475, | |
"step": 1040 | |
}, | |
{ | |
"epoch": 1.056779246206559, | |
"grad_norm": 0.106235072016716, | |
"learning_rate": 4.893688068190932e-05, | |
"loss": 1.6686, | |
"step": 1080 | |
}, | |
{ | |
"epoch": 1.0959373470386686, | |
"grad_norm": 0.09717393666505814, | |
"learning_rate": 4.072423980374452e-05, | |
"loss": 1.6824, | |
"step": 1120 | |
}, | |
{ | |
"epoch": 1.1350954478707782, | |
"grad_norm": 0.13711334764957428, | |
"learning_rate": 3.308693936411421e-05, | |
"loss": 1.6147, | |
"step": 1160 | |
}, | |
{ | |
"epoch": 1.174253548702888, | |
"grad_norm": 0.1265803724527359, | |
"learning_rate": 2.6099108277934103e-05, | |
"loss": 1.6174, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 1.174253548702888, | |
"eval_loss": 1.5792005062103271, | |
"eval_runtime": 1903.0333, | |
"eval_samples_per_second": 1.432, | |
"eval_steps_per_second": 0.716, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 1.2134116495349976, | |
"grad_norm": 0.09578167647123337, | |
"learning_rate": 1.982857160199334e-05, | |
"loss": 1.6246, | |
"step": 1240 | |
}, | |
{ | |
"epoch": 1.2525697503671072, | |
"grad_norm": 0.14227357506752014, | |
"learning_rate": 1.4336192213613742e-05, | |
"loss": 1.5548, | |
"step": 1280 | |
}, | |
{ | |
"epoch": 1.2917278511992167, | |
"grad_norm": 0.1526080220937729, | |
"learning_rate": 9.675280065387116e-06, | |
"loss": 1.5454, | |
"step": 1320 | |
}, | |
{ | |
"epoch": 1.3308859520313265, | |
"grad_norm": 0.17356757819652557, | |
"learning_rate": 5.891074749862857e-06, | |
"loss": 1.5555, | |
"step": 1360 | |
}, | |
{ | |
"epoch": 1.3700440528634361, | |
"grad_norm": 0.1258653849363327, | |
"learning_rate": 3.0203063964990617e-06, | |
"loss": 1.5775, | |
"step": 1400 | |
}, | |
{ | |
"epoch": 1.4092021536955457, | |
"grad_norm": 0.12249883264303207, | |
"learning_rate": 1.0908391628854041e-06, | |
"loss": 1.5619, | |
"step": 1440 | |
}, | |
{ | |
"epoch": 1.4483602545276555, | |
"grad_norm": 0.1455027014017105, | |
"learning_rate": 1.2140078057101266e-07, | |
"loss": 1.5342, | |
"step": 1480 | |
}, | |
{ | |
"epoch": 1.4679393049437102, | |
"step": 1500, | |
"total_flos": 1.105565365842985e+17, | |
"train_loss": 1.763797264099121, | |
"train_runtime": 30390.5778, | |
"train_samples_per_second": 0.395, | |
"train_steps_per_second": 0.049 | |
} | |
], | |
"logging_steps": 40, | |
"max_steps": 1500, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 2, | |
"save_steps": 600, | |
"stateful_callbacks": { | |
"EarlyStoppingCallback": { | |
"args": { | |
"early_stopping_patience": 3, | |
"early_stopping_threshold": 0.05 | |
}, | |
"attributes": { | |
"early_stopping_patience_counter": 0 | |
} | |
}, | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": true | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 1.105565365842985e+17, | |
"train_batch_size": 2, | |
"trial_name": null, | |
"trial_params": null | |
} | |