diff --git a/.gitattributes b/.gitattributes index e2e81b1966c09ddfc928b93650c17dca01d1b6b8..c84e3939c7b05c687c15baf513961ce32f2272ce 100644 --- a/.gitattributes +++ b/.gitattributes @@ -44,3 +44,4 @@ LLaMA-Factory/wandb/run-20250305_224606-f4sva5ub/run-f4sva5ub.wandb filter=lfs d LLaMA-Factory/wandb/run-20250305_225658-acin29x5/run-acin29x5.wandb filter=lfs diff=lfs merge=lfs -text datasets/llamafactory-finetune-data/train.json filter=lfs diff=lfs merge=lfs -text tokenizer.json filter=lfs diff=lfs merge=lfs -text +LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/run-9ct1o6yk.wandb filter=lfs diff=lfs merge=lfs -text diff --git a/LLaMA-Factory/src/llamafactory/__pycache__/__init__.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/__pycache__/__init__.cpython-310.pyc index e47b5fff1796b29e71423195a392d3ef57e1248f..eca6a5b982d9384bd5d051aa5221e634732af162 100644 Binary files a/LLaMA-Factory/src/llamafactory/__pycache__/__init__.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/__pycache__/__init__.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/__pycache__/cli.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/__pycache__/cli.cpython-310.pyc index b80b7fca314a58befbca75ca6e5c1f06de0ede09..38152fda1c1f4b33c4e7a0941df83265ac0baacd 100644 Binary files a/LLaMA-Factory/src/llamafactory/__pycache__/cli.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/__pycache__/cli.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/__pycache__/launcher.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/__pycache__/launcher.cpython-310.pyc index 0b9f44328d5c6dac5801bc132f502fc2f631033d..461d95b4ea26e07aee8da1996f89c93b5d30e537 100644 Binary files a/LLaMA-Factory/src/llamafactory/__pycache__/launcher.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/__pycache__/launcher.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/api/__pycache__/__init__.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/api/__pycache__/__init__.cpython-310.pyc index 0c4a43cbdcead6dc94ea71a36a3262fec68be69e..28e763aa527077c1939b3a08f4825cb5ceb6eaf3 100644 Binary files a/LLaMA-Factory/src/llamafactory/api/__pycache__/__init__.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/api/__pycache__/__init__.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/api/__pycache__/app.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/api/__pycache__/app.cpython-310.pyc index 49959f64f3c674c7e7f9f8e752ed376bf745f37b..6e71cbda8e33d84a822bfb76adf44f6785b3981c 100644 Binary files a/LLaMA-Factory/src/llamafactory/api/__pycache__/app.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/api/__pycache__/app.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/api/__pycache__/chat.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/api/__pycache__/chat.cpython-310.pyc index fb0f6e9f8f7c96df6e6f45c73a890d54f7424d4d..17b0abb361807544af4195d9fa5822c458048bc4 100644 Binary files a/LLaMA-Factory/src/llamafactory/api/__pycache__/chat.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/api/__pycache__/chat.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/api/__pycache__/common.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/api/__pycache__/common.cpython-310.pyc index fbe126c930906af9ecd46069c71c9a9247a8e437..43f428e80aa911fdf6456a4f5a1cbdb7623276e9 100644 Binary files a/LLaMA-Factory/src/llamafactory/api/__pycache__/common.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/api/__pycache__/common.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/api/__pycache__/protocol.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/api/__pycache__/protocol.cpython-310.pyc index dbbbb1e9160a512a497ce3ac1beab90aa28cc097..c36c7c401072b71c950a80fe2c9f79fd46a6c511 100644 Binary files a/LLaMA-Factory/src/llamafactory/api/__pycache__/protocol.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/api/__pycache__/protocol.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/chat/__pycache__/__init__.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/chat/__pycache__/__init__.cpython-310.pyc index ca1b02e8d60592c1617a1e0cf0c01b09525412e9..7a8d6a2dea221005b34fbe22992180be1e70f64b 100644 Binary files a/LLaMA-Factory/src/llamafactory/chat/__pycache__/__init__.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/chat/__pycache__/__init__.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/chat/__pycache__/base_engine.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/chat/__pycache__/base_engine.cpython-310.pyc index 3ab22f1daf4e6de29fc098ec01a5215c0ee0c736..772a9a096573dbf9a61657e005f23a7b366ddb38 100644 Binary files a/LLaMA-Factory/src/llamafactory/chat/__pycache__/base_engine.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/chat/__pycache__/base_engine.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/chat/__pycache__/chat_model.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/chat/__pycache__/chat_model.cpython-310.pyc index 7fb25eea936450d7f3a94f7e4942babb70e37b69..b0243495526af2717c9a82e96837d991a9fac3ab 100644 Binary files a/LLaMA-Factory/src/llamafactory/chat/__pycache__/chat_model.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/chat/__pycache__/chat_model.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/chat/__pycache__/hf_engine.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/chat/__pycache__/hf_engine.cpython-310.pyc index 34315da790eea27542d4b134543bbbb9dfb4065f..72a9686c516bef11eea61d94f40653bd46ec77eb 100644 Binary files a/LLaMA-Factory/src/llamafactory/chat/__pycache__/hf_engine.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/chat/__pycache__/hf_engine.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/chat/__pycache__/vllm_engine.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/chat/__pycache__/vllm_engine.cpython-310.pyc index 2c5678e298bb669d26f3b9ebc4ed4f7324595d60..9b107ff1bf6794bc00e1407abb2c201de7a9f6da 100644 Binary files a/LLaMA-Factory/src/llamafactory/chat/__pycache__/vllm_engine.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/chat/__pycache__/vllm_engine.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/data/__pycache__/__init__.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/data/__pycache__/__init__.cpython-310.pyc index ac9f970ab2923b43e8ff80e00e75dfeb6873c27c..a9223058e08bb85282caa23501355cdbfe89dc8b 100644 Binary files a/LLaMA-Factory/src/llamafactory/data/__pycache__/__init__.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/data/__pycache__/__init__.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/data/__pycache__/collator.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/data/__pycache__/collator.cpython-310.pyc index 5fe2ef927689137e451688bb9f0b411cc9a6aa16..8e20cc08d3728d2c50be3d3606c601a5c3190bda 100644 Binary files a/LLaMA-Factory/src/llamafactory/data/__pycache__/collator.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/data/__pycache__/collator.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/data/__pycache__/converter.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/data/__pycache__/converter.cpython-310.pyc index 82913ec6aa559405ecd9f1706ef40101dbc22cff..47702fb0556ce7ebd9684040a64267c8e029ee37 100644 Binary files a/LLaMA-Factory/src/llamafactory/data/__pycache__/converter.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/data/__pycache__/converter.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/data/__pycache__/data_utils.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/data/__pycache__/data_utils.cpython-310.pyc index 2d148dd684c121d5201a313cb77ce24b94016b8a..9608c579c803c89ff554239c8e52f0b46065a512 100644 Binary files a/LLaMA-Factory/src/llamafactory/data/__pycache__/data_utils.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/data/__pycache__/data_utils.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/data/__pycache__/formatter.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/data/__pycache__/formatter.cpython-310.pyc index 946c8d08f4de4806900f7fdbccd026dee26ba8df..9bc99fcaba4c82bd2f2a23e43776c4c1bb8a37c8 100644 Binary files a/LLaMA-Factory/src/llamafactory/data/__pycache__/formatter.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/data/__pycache__/formatter.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/data/__pycache__/loader.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/data/__pycache__/loader.cpython-310.pyc index 79737e873156632e04ff1a79884072bab8984cff..95c52c6905fa4d0cdd12e9632c3016ecaf677e87 100644 Binary files a/LLaMA-Factory/src/llamafactory/data/__pycache__/loader.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/data/__pycache__/loader.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/data/__pycache__/mm_plugin.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/data/__pycache__/mm_plugin.cpython-310.pyc index 9ff8954bb306ad034152d1028b1f08879e541c0d..219540dd754fe2b86c67579cb426634666d4f080 100644 Binary files a/LLaMA-Factory/src/llamafactory/data/__pycache__/mm_plugin.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/data/__pycache__/mm_plugin.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/data/__pycache__/parser.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/data/__pycache__/parser.cpython-310.pyc index c563ca71cb758983366a0be3e8337b78f94a0296..9b4209c40225d11fb2cd9cc9be89e267eca9e0a2 100644 Binary files a/LLaMA-Factory/src/llamafactory/data/__pycache__/parser.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/data/__pycache__/parser.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/data/__pycache__/template.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/data/__pycache__/template.cpython-310.pyc index c22da9e5537940fa3832ae3718016787adcbb431..6f7f1ba22d1a67450bd52e59ff34247c8c265e56 100644 Binary files a/LLaMA-Factory/src/llamafactory/data/__pycache__/template.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/data/__pycache__/template.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/data/__pycache__/tool_utils.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/data/__pycache__/tool_utils.cpython-310.pyc index fff745406bfea23e974239a093309aa4c5cf268a..c9a9397c1f57caa5d98e40a834c2ad35596a666d 100644 Binary files a/LLaMA-Factory/src/llamafactory/data/__pycache__/tool_utils.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/data/__pycache__/tool_utils.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/__init__.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/__init__.cpython-310.pyc index 83e9d2a3a677eced0f33b8787d2472b406317d1c..69609060c7fc3a441ef0f29be7e904d61a0316bb 100644 Binary files a/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/__init__.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/__init__.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/feedback.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/feedback.cpython-310.pyc index d8c67eed8a85d9b8e7cd143cffe88b864dbd207a..fecae98c30e0f30de3af81763719b57e0a112959 100644 Binary files a/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/feedback.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/feedback.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/pairwise.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/pairwise.cpython-310.pyc index 9bec73c1488c3730f72daac953b923217148e7e2..03a55d75e2a5dc19de70c209775a0414c2a18877 100644 Binary files a/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/pairwise.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/pairwise.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/pretrain.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/pretrain.cpython-310.pyc index ec8e164a601e0a7c687e5dc250fcb5ff2eb982ce..5a6e23dc23d5a07c315fffb5e5b491f3f5959c0d 100644 Binary files a/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/pretrain.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/pretrain.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/processor_utils.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/processor_utils.cpython-310.pyc index 91399af6d93b1fe5efef2944142a714a5cd82a3f..41a73897819c3d77b56f0b1ad55e9c4500a7db0e 100644 Binary files a/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/processor_utils.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/processor_utils.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/supervised.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/supervised.cpython-310.pyc index 2413b028c10f11ccf0cd05f5954ff8a970a9bf99..4556cd7652bc5844db4291ac612047889e203794 100644 Binary files a/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/supervised.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/supervised.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/unsupervised.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/unsupervised.cpython-310.pyc index c6bd2c6ef9a228cb51be7aed7a392c0719994bd0..7cc4bb7477e785a2a85732bc2af5b9aa81526ade 100644 Binary files a/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/unsupervised.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/data/processor/__pycache__/unsupervised.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/eval/__pycache__/__init__.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/eval/__pycache__/__init__.cpython-310.pyc index f14e5f7845741045782f6a0edf40b40866a8a35f..4a00a8b3405a8834753049aa88628f9fd799dc8f 100644 Binary files a/LLaMA-Factory/src/llamafactory/eval/__pycache__/__init__.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/eval/__pycache__/__init__.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/eval/__pycache__/evaluator.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/eval/__pycache__/evaluator.cpython-310.pyc index 4b184d0a54ffb9651aab0ae6afab8f9df0eed7f2..bafd71d7dcd510f4ea313842aa56dbf79cc52f0e 100644 Binary files a/LLaMA-Factory/src/llamafactory/eval/__pycache__/evaluator.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/eval/__pycache__/evaluator.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/eval/__pycache__/template.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/eval/__pycache__/template.cpython-310.pyc index 01de2371dc9bfc5c5a2a0607702a15a9d61b48d4..621a42ca34cc84e2540f087f83dbad866e294ac5 100644 Binary files a/LLaMA-Factory/src/llamafactory/eval/__pycache__/template.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/eval/__pycache__/template.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/extras/__pycache__/__init__.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/extras/__pycache__/__init__.cpython-310.pyc index 38299ff72dd0f662d49238c3aad20ff189e7998b..9c11cdb321d6617e5fd498392e3fb1bb430db2f2 100644 Binary files a/LLaMA-Factory/src/llamafactory/extras/__pycache__/__init__.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/extras/__pycache__/__init__.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/extras/__pycache__/constants.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/extras/__pycache__/constants.cpython-310.pyc index 0e79cc7ee3060a6ec42a21e93f87140aaee01b93..fa2b7127e357d41168823c703cbde933e1372245 100644 Binary files a/LLaMA-Factory/src/llamafactory/extras/__pycache__/constants.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/extras/__pycache__/constants.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/extras/__pycache__/env.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/extras/__pycache__/env.cpython-310.pyc index 0c738b31448cd1f45e90de81b5da41e2a525f2f4..fd97eb7d9d52169d7a48247f7eaea327c20e8a18 100644 Binary files a/LLaMA-Factory/src/llamafactory/extras/__pycache__/env.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/extras/__pycache__/env.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/extras/__pycache__/logging.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/extras/__pycache__/logging.cpython-310.pyc index c63128203aa6f6ccf0cbf5440bedcc5b326c154c..ee4fe3aa3fc357e3c144a73570864dfb646bef05 100644 Binary files a/LLaMA-Factory/src/llamafactory/extras/__pycache__/logging.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/extras/__pycache__/logging.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/extras/__pycache__/misc.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/extras/__pycache__/misc.cpython-310.pyc index 1c34e064718f7c9628366391cff282bcf4f469d8..b3b1380ee16e9048c147515bcc12382e2bd20af9 100644 Binary files a/LLaMA-Factory/src/llamafactory/extras/__pycache__/misc.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/extras/__pycache__/misc.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/extras/__pycache__/packages.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/extras/__pycache__/packages.cpython-310.pyc index 0e72dbb0f4ec290813dac454abb64aff13a39034..0fdad13383b4d49044585a02a9d95fa966d15213 100644 Binary files a/LLaMA-Factory/src/llamafactory/extras/__pycache__/packages.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/extras/__pycache__/packages.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/extras/__pycache__/ploting.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/extras/__pycache__/ploting.cpython-310.pyc index 0e1bbf56bd2e3aae04f99d6df7d1c8946cc2e5b7..e68e693fbf0d283b30c61a2b3ef2778a93c18a86 100644 Binary files a/LLaMA-Factory/src/llamafactory/extras/__pycache__/ploting.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/extras/__pycache__/ploting.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/hparams/__pycache__/__init__.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/hparams/__pycache__/__init__.cpython-310.pyc index 4124cbba429e24bc43bdbd2a2a042b6d2a9fd75f..bf9c658a1cbfd6e3abf049ce048e493214a8ef6f 100644 Binary files a/LLaMA-Factory/src/llamafactory/hparams/__pycache__/__init__.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/hparams/__pycache__/__init__.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/hparams/__pycache__/data_args.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/hparams/__pycache__/data_args.cpython-310.pyc index 38bd0f328af2d7af1942d390ead63b33566ca88e..e3e0a9ee58a1bcd1d8e4dd227069b4e702cdabaa 100644 Binary files a/LLaMA-Factory/src/llamafactory/hparams/__pycache__/data_args.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/hparams/__pycache__/data_args.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/hparams/__pycache__/evaluation_args.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/hparams/__pycache__/evaluation_args.cpython-310.pyc index 605cd9fb7211f1952496569ccc96726023a2764e..e69d8c41e4faab81a00dda6876ea182761f465bf 100644 Binary files a/LLaMA-Factory/src/llamafactory/hparams/__pycache__/evaluation_args.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/hparams/__pycache__/evaluation_args.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/hparams/__pycache__/finetuning_args.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/hparams/__pycache__/finetuning_args.cpython-310.pyc index 0774746d270402c2e33a6c26c9f5d6afab4b2773..895a73a15c147cc9cca128678d546893e821106d 100644 Binary files a/LLaMA-Factory/src/llamafactory/hparams/__pycache__/finetuning_args.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/hparams/__pycache__/finetuning_args.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/hparams/__pycache__/generating_args.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/hparams/__pycache__/generating_args.cpython-310.pyc index dc97e115da39f0fa7effb1068f99e06d284b0e24..b7b229adb9ff6c5600dd611e86d738991e404faf 100644 Binary files a/LLaMA-Factory/src/llamafactory/hparams/__pycache__/generating_args.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/hparams/__pycache__/generating_args.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/hparams/__pycache__/model_args.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/hparams/__pycache__/model_args.cpython-310.pyc index 4c777e315d0f88f6eb7859f03094a15f85d9a555..c62e8f829864763daee69355e99d492827a32b03 100644 Binary files a/LLaMA-Factory/src/llamafactory/hparams/__pycache__/model_args.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/hparams/__pycache__/model_args.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/hparams/__pycache__/parser.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/hparams/__pycache__/parser.cpython-310.pyc index a926304e73e4d02cddc6f19c5fab943826b13725..90e20d9bc3476a2127f9521069cddc3a416ee734 100644 Binary files a/LLaMA-Factory/src/llamafactory/hparams/__pycache__/parser.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/hparams/__pycache__/parser.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/hparams/__pycache__/training_args.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/hparams/__pycache__/training_args.cpython-310.pyc index 2f063f7619bc60db09f59504214837237f741ba3..6a9d0acc54c5ac5409f16ffaf3099b02a7ec2788 100644 Binary files a/LLaMA-Factory/src/llamafactory/hparams/__pycache__/training_args.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/hparams/__pycache__/training_args.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/model/__pycache__/__init__.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/model/__pycache__/__init__.cpython-310.pyc index 0e3fb1436cb6ca901a131c1bfa95e21f1e19a67d..5ed56396ed709e3960932b4e228ea47809684c16 100644 Binary files a/LLaMA-Factory/src/llamafactory/model/__pycache__/__init__.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/model/__pycache__/__init__.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/model/__pycache__/adapter.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/model/__pycache__/adapter.cpython-310.pyc index b5c5940ce83904d076e0bb45166990ca82dc0342..d7c3bb80f2d76116f7f27b4d532fd8bd6896732f 100644 Binary files a/LLaMA-Factory/src/llamafactory/model/__pycache__/adapter.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/model/__pycache__/adapter.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/model/__pycache__/loader.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/model/__pycache__/loader.cpython-310.pyc index d39575e83ff502792292abcbb60e0d27c1aa016d..302e0277d072d3c04cb486be959e902799e86982 100644 Binary files a/LLaMA-Factory/src/llamafactory/model/__pycache__/loader.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/model/__pycache__/loader.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/model/__pycache__/patcher.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/model/__pycache__/patcher.cpython-310.pyc index 81b22e7b930a378bc1c51b1a745160fedd53b29c..afc43a68b615da58484cee1cfa9fb3e6ccfc8c94 100644 Binary files a/LLaMA-Factory/src/llamafactory/model/__pycache__/patcher.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/model/__pycache__/patcher.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/__init__.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/__init__.cpython-310.pyc index 5b401b09c047f6dfff07bf5872c9c9240ce1436d..de664183e2cbe7e8388db4266eb181a0e6de271b 100644 Binary files a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/__init__.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/__init__.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/attention.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/attention.cpython-310.pyc index cf56cc46dd7119dbe16de4b06146b4dfa49ff058..44c6db33ec118a2b5d7885be41cdef2af7c3b897 100644 Binary files a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/attention.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/attention.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/checkpointing.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/checkpointing.cpython-310.pyc index 3cc9236a97353682d5b41616f6fc552160e7ad48..1ddd97b427e75c4f01914fa4008cc7b07158f40d 100644 Binary files a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/checkpointing.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/checkpointing.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/embedding.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/embedding.cpython-310.pyc index b93fc84115b7c5692a37747d2a9643162d27f43e..573985cc2ad11f33a27e1d82c9f48dd4d86de726 100644 Binary files a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/embedding.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/embedding.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/liger_kernel.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/liger_kernel.cpython-310.pyc index e40ca69b5f26e6bbb6ab31df37dadd12361bc46b..5865a2148f0bad635820bb47864ec0b98db502b3 100644 Binary files a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/liger_kernel.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/liger_kernel.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/longlora.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/longlora.cpython-310.pyc index 60d1fb5bece801709ef1af5985174d1cba8b6793..1aa2c58f0bdb8fce758760526e83a8ce4f785ee9 100644 Binary files a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/longlora.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/longlora.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/misc.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/misc.cpython-310.pyc index 5f8e52802edc240cdea48d8f880436c98873e886..03dc67c86641d5c03c6c3138b6c613fd4e63517f 100644 Binary files a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/misc.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/misc.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/mod.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/mod.cpython-310.pyc index 7efa1dec233ad0a0edbd9895498d67b7c4a9f8f9..2dd50f01df081e3779fd0f27c4639653e8b4ff11 100644 Binary files a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/mod.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/mod.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/moe.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/moe.cpython-310.pyc index 020caf272aadc00a3781396dfd0bbad5919dd8f7..813174f2d3748804670f89f5ace48343bea79b77 100644 Binary files a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/moe.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/moe.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/packing.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/packing.cpython-310.pyc index 39131fec2567895a1d3bea17017b2b2e2ed093dc..94f46c7d750eb95b9035e56043f3d95fd9926d3a 100644 Binary files a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/packing.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/packing.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/quantization.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/quantization.cpython-310.pyc index 12d0c9f8b6c45fb67df582bdb29c58d5609b7b61..6d329869cc89082995c6521aa0d2af20f459d4ca 100644 Binary files a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/quantization.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/quantization.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/rope.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/rope.cpython-310.pyc index 654b1a89f7277d607954cff9a7162597e09d2fa1..29730dc444c98ca0f816c0f050f87f4901f7f365 100644 Binary files a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/rope.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/rope.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/unsloth.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/unsloth.cpython-310.pyc index c4550c2f2fd4c1145728d3fbf5038bc9ca81e623..978772347e3a643627559eecfd4a1943de79c1e0 100644 Binary files a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/unsloth.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/unsloth.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/valuehead.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/valuehead.cpython-310.pyc index 3fefc32fa3c71f5f7abc5b0a8b55a082066dcb75..e6e82d6f729e3da79cc777be3039fb0569125d04 100644 Binary files a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/valuehead.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/valuehead.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/visual.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/visual.cpython-310.pyc index 76aa37cac071cca02f03ac69898ee57ef62adc8f..b4a790bf7e0934fb5ccfba28397cef8210d821ec 100644 Binary files a/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/visual.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/model/model_utils/__pycache__/visual.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/__pycache__/__init__.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/__pycache__/__init__.cpython-310.pyc index cd89db9cf192ed3af971b23701a68cc76bda1afc..1e9f69ec53bb18f18b82037fa006b6d487a53f21 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/__pycache__/__init__.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/__pycache__/__init__.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/__pycache__/callbacks.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/__pycache__/callbacks.cpython-310.pyc index 0f021fd90396823786df7fd626885caebc036a31..39e5aeea2c27caa165636b75bd9a06c262fd9eb1 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/__pycache__/callbacks.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/__pycache__/callbacks.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/__pycache__/trainer_utils.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/__pycache__/trainer_utils.cpython-310.pyc index 2ce87274a4d3f43902cc2696bec4dda939e668c0..a0a095300c7f8a961a853829c82bda7bd4044aa0 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/__pycache__/trainer_utils.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/__pycache__/trainer_utils.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/__pycache__/tuner.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/__pycache__/tuner.cpython-310.pyc index 4689801d3b8f311cc2e696e1c5b6c578068464b2..0736e02fe77c530c2465c12e2ec9c72ee6ceaea3 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/__pycache__/tuner.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/__pycache__/tuner.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/dpo/__pycache__/__init__.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/dpo/__pycache__/__init__.cpython-310.pyc index 321c3b4a06588676666dc40212e43a1db32a3f52..08bee80809b55d675e0501d8a66f81b10598f83c 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/dpo/__pycache__/__init__.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/dpo/__pycache__/__init__.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/dpo/__pycache__/trainer.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/dpo/__pycache__/trainer.cpython-310.pyc index fb53b5f2ecbc5eede395b74e7ca8ec74761cac1d..5779e49302c06dd66610a8e581476f9ad231bc28 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/dpo/__pycache__/trainer.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/dpo/__pycache__/trainer.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/dpo/__pycache__/workflow.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/dpo/__pycache__/workflow.cpython-310.pyc index ac0682f9e20c62ee2fec98906e685521c4c22cd8..d463ab21375fefeb588dc321869ad07e164d7382 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/dpo/__pycache__/workflow.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/dpo/__pycache__/workflow.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/kto/__pycache__/__init__.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/kto/__pycache__/__init__.cpython-310.pyc index 744be472bf80009bc3f3c79fad54a20ca96d0202..4ccd563fcd749ed129f6469fefa0cf453811f8f5 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/kto/__pycache__/__init__.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/kto/__pycache__/__init__.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/kto/__pycache__/trainer.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/kto/__pycache__/trainer.cpython-310.pyc index 6d6aa8d87515204d03a15bf66af2f3ef64ac6913..6a01c34ce8ba6596a129e6d3259320db148b74a5 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/kto/__pycache__/trainer.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/kto/__pycache__/trainer.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/kto/__pycache__/workflow.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/kto/__pycache__/workflow.cpython-310.pyc index f4128fc1bb737d17034416383619f756e98d5ca7..b2f930ed5557dde86dd1968ba4bc2fa068f05fb2 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/kto/__pycache__/workflow.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/kto/__pycache__/workflow.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/ppo/__pycache__/__init__.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/ppo/__pycache__/__init__.cpython-310.pyc index f82f10c677590f3fc820da1b01208d8379936f83..69e74b10f3eed53d3616e959d8c2e989456f83d0 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/ppo/__pycache__/__init__.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/ppo/__pycache__/__init__.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/ppo/__pycache__/ppo_utils.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/ppo/__pycache__/ppo_utils.cpython-310.pyc index 28a8a916e58c23750be85866275b9b2a1c97b029..e4bcedf204bed4c8f30630470b5175a829b69254 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/ppo/__pycache__/ppo_utils.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/ppo/__pycache__/ppo_utils.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/ppo/__pycache__/trainer.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/ppo/__pycache__/trainer.cpython-310.pyc index ac6b0620c801ecda15ce1ff74ab9140e50ec9ea0..a7da33a0768cee830f7082acd42ad84dd3e3eb48 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/ppo/__pycache__/trainer.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/ppo/__pycache__/trainer.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/ppo/__pycache__/workflow.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/ppo/__pycache__/workflow.cpython-310.pyc index 4363ca3ed9d64a59077266a7bf76bddb4b2c39e7..b4e0ddca3e402f5b4534ea685b5d45cd4d4162b0 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/ppo/__pycache__/workflow.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/ppo/__pycache__/workflow.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/pt/__pycache__/__init__.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/pt/__pycache__/__init__.cpython-310.pyc index 3248899481428a3bef578a446eb6cb9b2a67ff00..f8bd24bc1003b1fc5031c2fd54d6f3493de0cd65 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/pt/__pycache__/__init__.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/pt/__pycache__/__init__.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/pt/__pycache__/trainer.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/pt/__pycache__/trainer.cpython-310.pyc index 1030d15ab78c0fc6564331075fbe723b4966064b..ffb9039983dd2a83801dc41bc65a47f2384c93de 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/pt/__pycache__/trainer.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/pt/__pycache__/trainer.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/pt/__pycache__/workflow.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/pt/__pycache__/workflow.cpython-310.pyc index 7c1b121da7835a13d4aa8796f9bbcbeb0f05bb1d..cdd1466020f83ad8b4e008c4eccfcfbc9da4956c 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/pt/__pycache__/workflow.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/pt/__pycache__/workflow.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/rm/__pycache__/__init__.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/rm/__pycache__/__init__.cpython-310.pyc index cba8869fc2e4ee2f2f5330b098b4b641cb5a1316..4c9680f06dc81b19d0d5c6f66773725dc6cd7c8d 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/rm/__pycache__/__init__.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/rm/__pycache__/__init__.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/rm/__pycache__/metric.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/rm/__pycache__/metric.cpython-310.pyc index d6f0c23c11e92a1c556be9a6651234635a15c439..0a52f6a696d3b90199c6f6e4fc8c417f82afd127 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/rm/__pycache__/metric.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/rm/__pycache__/metric.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/rm/__pycache__/trainer.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/rm/__pycache__/trainer.cpython-310.pyc index 531b9f3f351e1e3161cbdcd8ec4c36e569ded676..ec44d873cb7828052483a42a42e0bc6c2465ecf8 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/rm/__pycache__/trainer.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/rm/__pycache__/trainer.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/rm/__pycache__/workflow.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/rm/__pycache__/workflow.cpython-310.pyc index 79ef75abcf75cb9e1e8419ae0d3282d5d0ea47d4..d3c8ec3a2d2f3b959549a37c4af385a3eea467a8 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/rm/__pycache__/workflow.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/rm/__pycache__/workflow.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/sft/__pycache__/__init__.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/sft/__pycache__/__init__.cpython-310.pyc index 1749d4c6056cf343305ceb4c3e9efecaadcb9296..0a3f414c36da6a6040aa6e0283efad00095737ba 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/sft/__pycache__/__init__.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/sft/__pycache__/__init__.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/sft/__pycache__/metric.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/sft/__pycache__/metric.cpython-310.pyc index da8ec2ee1a89f32807be3a9df48bc7dca8068438..7844e743d73ef82e6f9fe2037517fad49eb3f3ae 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/sft/__pycache__/metric.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/sft/__pycache__/metric.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/sft/__pycache__/trainer.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/sft/__pycache__/trainer.cpython-310.pyc index 6e94dc30304c2dd6a3f2d799a5670525195131e8..00af073d6ab5c4fa407ce796ee085f60ef9fe599 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/sft/__pycache__/trainer.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/sft/__pycache__/trainer.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/train/sft/__pycache__/workflow.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/train/sft/__pycache__/workflow.cpython-310.pyc index 4a93618b9952c788319ac7e50438e48eb8570691..72892a339235b9d90e9d06763e1677d8b389d419 100644 Binary files a/LLaMA-Factory/src/llamafactory/train/sft/__pycache__/workflow.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/train/sft/__pycache__/workflow.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/webui/__pycache__/__init__.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/webui/__pycache__/__init__.cpython-310.pyc index cf3a9e325abc7f68ca2d8c912b6a58fa97bc833b..f3c3534d55cc565ad21ef2b915726485b4831a73 100644 Binary files a/LLaMA-Factory/src/llamafactory/webui/__pycache__/__init__.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/webui/__pycache__/__init__.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/webui/__pycache__/chatter.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/webui/__pycache__/chatter.cpython-310.pyc index 3435b39fdb248174aefa9a214d3de91b8b5d3ae4..1512aa190ca78bdefac2da666b1a6035ba0e26b9 100644 Binary files a/LLaMA-Factory/src/llamafactory/webui/__pycache__/chatter.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/webui/__pycache__/chatter.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/webui/__pycache__/common.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/webui/__pycache__/common.cpython-310.pyc index 4ff3e3fab4ab8ac049e1362e6e59b9dfe87a9f20..25c7bffe7fe3f4b1b2ec4d19f40903cf5528eeb6 100644 Binary files a/LLaMA-Factory/src/llamafactory/webui/__pycache__/common.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/webui/__pycache__/common.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/webui/__pycache__/control.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/webui/__pycache__/control.cpython-310.pyc index 1c0f913fbeb22e2a7aaa30f56d7b96348f85c29d..515acce1f840964c024159012f9e97a8edcc8121 100644 Binary files a/LLaMA-Factory/src/llamafactory/webui/__pycache__/control.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/webui/__pycache__/control.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/webui/__pycache__/css.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/webui/__pycache__/css.cpython-310.pyc index 5d8e4b6e6939e401697a851f24262ba8487500d1..bfa76fe93f21a1729f22309c8896580526e14e3f 100644 Binary files a/LLaMA-Factory/src/llamafactory/webui/__pycache__/css.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/webui/__pycache__/css.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/webui/__pycache__/engine.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/webui/__pycache__/engine.cpython-310.pyc index 1a9914041afd7679cd7e6151708c7bb3347590c4..425ff06a6ffadb9e964cb87cc7b326ebca11cb2c 100644 Binary files a/LLaMA-Factory/src/llamafactory/webui/__pycache__/engine.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/webui/__pycache__/engine.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/webui/__pycache__/interface.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/webui/__pycache__/interface.cpython-310.pyc index 15211d9f7e9a5413cb361fc223ba63f2e2e483dc..3fb7a8557a921c0c66daec79811cec81355b055b 100644 Binary files a/LLaMA-Factory/src/llamafactory/webui/__pycache__/interface.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/webui/__pycache__/interface.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/webui/__pycache__/locales.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/webui/__pycache__/locales.cpython-310.pyc index 62eb75290d1fece15819ffb1b63c204cae7f40fa..d8c0f4e78435bf6c0029f0fb23a8f59423bdd719 100644 Binary files a/LLaMA-Factory/src/llamafactory/webui/__pycache__/locales.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/webui/__pycache__/locales.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/webui/__pycache__/manager.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/webui/__pycache__/manager.cpython-310.pyc index b50431908f683183f732e062085e5222221b50e6..2a56b270dbd427854da4c6ac42357b4f448b3bd0 100644 Binary files a/LLaMA-Factory/src/llamafactory/webui/__pycache__/manager.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/webui/__pycache__/manager.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/webui/__pycache__/runner.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/webui/__pycache__/runner.cpython-310.pyc index 18385b99067e835e658c2032ee57c159eb134733..ab6177da9debffd451613110db41e2b52a97421b 100644 Binary files a/LLaMA-Factory/src/llamafactory/webui/__pycache__/runner.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/webui/__pycache__/runner.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/__init__.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/__init__.cpython-310.pyc index 1e9ffb4977ca55b2162906397e36d3c63fdec18c..6f8daf2a6b48970e589ef4fc7e755806944ec4b3 100644 Binary files a/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/__init__.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/__init__.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/chatbot.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/chatbot.cpython-310.pyc index 472160bff1e39d5fa15e65b1edc6ffd17f39fa45..88519af73407e95849881c41955c55d9967a7c11 100644 Binary files a/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/chatbot.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/chatbot.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/data.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/data.cpython-310.pyc index ad462cacede38b08ed1896d67466a55d9f251fb5..beed4a49ba3655b496ea2f9d186b626343a5bb67 100644 Binary files a/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/data.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/data.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/eval.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/eval.cpython-310.pyc index e2fc94131eda2a98674004a781a64938d0cabb67..ff5520ce30ba47f470b775ff7801f97a5a0df981 100644 Binary files a/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/eval.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/eval.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/export.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/export.cpython-310.pyc index 2e15af7c62267da14ea6d39b1471a049db4915da..bd768a0ab7e4327659452533f6699b9bc58ebe01 100644 Binary files a/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/export.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/export.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/infer.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/infer.cpython-310.pyc index 0685553330dbbaa43bf0fd924850e4ca509efaae..1d44bce44c3b7233fd6c15afdeb69cdda9b762a3 100644 Binary files a/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/infer.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/infer.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/top.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/top.cpython-310.pyc index 6e3c2d40bf61ab307de9e665a29a54ac8bd002d5..5faa4c86b6c18c51b8e08b40222f1567689981c3 100644 Binary files a/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/top.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/top.cpython-310.pyc differ diff --git a/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/train.cpython-310.pyc b/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/train.cpython-310.pyc index 11b744dd5f9bd70536c58a407e4b20837f4f46d7..feeac60a15aae35274fd57fabe30a761652ba6b9 100644 Binary files a/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/train.cpython-310.pyc and b/LLaMA-Factory/src/llamafactory/webui/components/__pycache__/train.cpython-310.pyc differ diff --git a/LLaMA-Factory/wandb/debug-internal.log b/LLaMA-Factory/wandb/debug-internal.log index 434a75e0afb3baecece21cbf747a9dc2d5135e02..7f352addde07d876abb65995dc9b1d71b63605e0 100644 --- a/LLaMA-Factory/wandb/debug-internal.log +++ b/LLaMA-Factory/wandb/debug-internal.log @@ -1,7 +1,7 @@ -{"time":"2025-03-05T22:56:58.688455297Z","level":"INFO","msg":"stream: starting","core version":"0.19.8","symlink path":"/kaggle/working/LLaMA-Factory/wandb/run-20250305_225658-acin29x5/logs/debug-core.log"} -{"time":"2025-03-05T22:56:58.79179447Z","level":"INFO","msg":"created new stream","id":"acin29x5"} -{"time":"2025-03-05T22:56:58.791834101Z","level":"INFO","msg":"stream: started","id":"acin29x5"} -{"time":"2025-03-05T22:56:58.791875378Z","level":"INFO","msg":"writer: Do: started","stream_id":"acin29x5"} -{"time":"2025-03-05T22:56:58.791891773Z","level":"INFO","msg":"handler: started","stream_id":"acin29x5"} -{"time":"2025-03-05T22:56:58.791902233Z","level":"INFO","msg":"sender: started","stream_id":"acin29x5"} -{"time":"2025-03-05T22:56:59.000445467Z","level":"INFO","msg":"Starting system monitor"} +{"time":"2025-03-05T23:32:46.352950755Z","level":"INFO","msg":"stream: starting","core version":"0.19.8","symlink path":"/kaggle/working/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/logs/debug-core.log"} +{"time":"2025-03-05T23:32:46.455714274Z","level":"INFO","msg":"created new stream","id":"9ct1o6yk"} +{"time":"2025-03-05T23:32:46.455754315Z","level":"INFO","msg":"stream: started","id":"9ct1o6yk"} +{"time":"2025-03-05T23:32:46.455787298Z","level":"INFO","msg":"writer: Do: started","stream_id":"9ct1o6yk"} +{"time":"2025-03-05T23:32:46.455813658Z","level":"INFO","msg":"sender: started","stream_id":"9ct1o6yk"} +{"time":"2025-03-05T23:32:46.455830745Z","level":"INFO","msg":"handler: started","stream_id":"9ct1o6yk"} +{"time":"2025-03-05T23:32:46.78514652Z","level":"INFO","msg":"Starting system monitor"} diff --git a/LLaMA-Factory/wandb/debug.log b/LLaMA-Factory/wandb/debug.log index ac5079905c95917cdb81430a22d92e764c005fd5..dcd53a6080019a2b54764ff953d77b3a219fbab3 100644 --- a/LLaMA-Factory/wandb/debug.log +++ b/LLaMA-Factory/wandb/debug.log @@ -1,26 +1,26 @@ -2025-03-05 22:56:58,685 INFO MainThread:1229 [wandb_setup.py:_flush():67] Current SDK version is 0.19.8 -2025-03-05 22:56:58,685 INFO MainThread:1229 [wandb_setup.py:_flush():67] Configure stats pid to 1229 -2025-03-05 22:56:58,685 INFO MainThread:1229 [wandb_setup.py:_flush():67] Loading settings from /root/.config/wandb/settings -2025-03-05 22:56:58,685 INFO MainThread:1229 [wandb_setup.py:_flush():67] Loading settings from /kaggle/working/LLaMA-Factory/wandb/settings -2025-03-05 22:56:58,685 INFO MainThread:1229 [wandb_setup.py:_flush():67] Loading settings from environment variables -2025-03-05 22:56:58,685 INFO MainThread:1229 [wandb_init.py:setup_run_log_directory():647] Logging user logs to /kaggle/working/LLaMA-Factory/wandb/run-20250305_225658-acin29x5/logs/debug.log -2025-03-05 22:56:58,685 INFO MainThread:1229 [wandb_init.py:setup_run_log_directory():648] Logging internal logs to /kaggle/working/LLaMA-Factory/wandb/run-20250305_225658-acin29x5/logs/debug-internal.log -2025-03-05 22:56:58,685 INFO MainThread:1229 [wandb_init.py:init():761] calling init triggers -2025-03-05 22:56:58,685 INFO MainThread:1229 [wandb_init.py:init():766] wandb.init called with sweep_config: {} +2025-03-05 23:32:46,342 INFO MainThread:163 [wandb_setup.py:_flush():67] Current SDK version is 0.19.8 +2025-03-05 23:32:46,342 INFO MainThread:163 [wandb_setup.py:_flush():67] Configure stats pid to 163 +2025-03-05 23:32:46,342 INFO MainThread:163 [wandb_setup.py:_flush():67] Loading settings from /root/.config/wandb/settings +2025-03-05 23:32:46,342 INFO MainThread:163 [wandb_setup.py:_flush():67] Loading settings from /kaggle/working/LLaMA-Factory/wandb/settings +2025-03-05 23:32:46,342 INFO MainThread:163 [wandb_setup.py:_flush():67] Loading settings from environment variables +2025-03-05 23:32:46,342 INFO MainThread:163 [wandb_init.py:setup_run_log_directory():647] Logging user logs to /kaggle/working/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/logs/debug.log +2025-03-05 23:32:46,342 INFO MainThread:163 [wandb_init.py:setup_run_log_directory():648] Logging internal logs to /kaggle/working/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/logs/debug-internal.log +2025-03-05 23:32:46,343 INFO MainThread:163 [wandb_init.py:init():761] calling init triggers +2025-03-05 23:32:46,343 INFO MainThread:163 [wandb_init.py:init():766] wandb.init called with sweep_config: {} config: {'_wandb': {}} -2025-03-05 22:56:58,685 INFO MainThread:1229 [wandb_init.py:init():784] starting backend -2025-03-05 22:56:58,685 INFO MainThread:1229 [wandb_init.py:init():788] sending inform_init request -2025-03-05 22:56:58,688 INFO MainThread:1229 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn -2025-03-05 22:56:58,688 INFO MainThread:1229 [wandb_init.py:init():798] backend started and connected -2025-03-05 22:56:58,692 INFO MainThread:1229 [wandb_init.py:init():891] updated telemetry -2025-03-05 22:56:58,701 INFO MainThread:1229 [wandb_init.py:init():915] communicating run to backend with 90.0 second timeout -2025-03-05 22:56:58,998 INFO MainThread:1229 [wandb_init.py:init():990] starting run threads in backend -2025-03-05 22:56:59,678 INFO MainThread:1229 [wandb_run.py:_console_start():2375] atexit reg -2025-03-05 22:56:59,679 INFO MainThread:1229 [wandb_run.py:_redirect():2227] redirect: wrap_raw -2025-03-05 22:56:59,679 INFO MainThread:1229 [wandb_run.py:_redirect():2292] Wrapping output streams. -2025-03-05 22:56:59,679 INFO MainThread:1229 [wandb_run.py:_redirect():2315] Redirects installed. -2025-03-05 22:56:59,682 INFO MainThread:1229 [wandb_init.py:init():1032] run started, returning control to user process -2025-03-05 22:56:59,685 INFO MainThread:1229 [wandb_run.py:_config_callback():1261] config_cb None None {'peft_config': {'default': {'peft_type': , 'auto_mapping': None, 'base_model_name_or_path': 'Qwen/Qwen2.5-1.5B-Instruct', 'revision': None, 'task_type': , 'inference_mode': False, 'r': 64, 'target_modules': {'o_proj', 'k_proj', 'down_proj', 'gate_proj', 'q_proj', 'v_proj', 'up_proj'}, 'lora_alpha': 128, 'lora_dropout': 0.0, 'fan_in_fan_out': False, 'bias': 'none', 'use_rslora': False, 'modules_to_save': None, 'init_lora_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'rank_pattern': {}, 'alpha_pattern': {}, 'megatron_config': None, 'megatron_core': 'megatron.core', 'loftq_config': {}, 'use_dora': False, 'layer_replication': None, 'runtime_config': {'ephemeral_gpu_offload': False}}}, 'vocab_size': 151936, 'max_position_embeddings': 32768, 'hidden_size': 1536, 'intermediate_size': 8960, 'num_hidden_layers': 28, 'num_attention_heads': 12, 'use_sliding_window': False, 'sliding_window': None, 'max_window_layers': 21, 'num_key_value_heads': 2, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-06, 'use_cache': False, 'rope_theta': 1000000.0, 'rope_scaling': None, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['Qwen2ForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 151643, 'pad_token_id': None, 'eos_token_id': 151645, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'Qwen/Qwen2.5-1.5B-Instruct', '_attn_implementation_autoset': True, 'transformers_version': '4.48.3', 'model_type': 'qwen2', 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.1, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar05_22-56-30_394dbbe3a00e', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'newsx-finetune-llamafactory', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'checkpoint', 'hub_token': '', 'hub_private_repo': True, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 180000000, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': False, 'generation_max_length': 2048, 'generation_num_beams': None, 'generation_config': None, 'ray_run_name': None, 'ray_storage_path': './saves', 'ray_num_workers': 1, 'resources_per_worker': {'GPU': 1}, 'placement_strategy': 'PACK'} -2025-03-05 22:56:59,693 INFO MainThread:1229 [wandb_config.py:__setitem__():154] config set model/num_parameters = 1617573376 - > -2025-03-05 22:56:59,693 INFO MainThread:1229 [wandb_run.py:_config_callback():1261] config_cb model/num_parameters 1617573376 None -2025-03-05 22:56:59,695 INFO MainThread:1229 [wandb_run.py:_config_callback():1261] config_cb None None {'model_args': {'vllm_maxlen': 4096, 'vllm_gpu_util': 0.9, 'vllm_enforce_eager': False, 'vllm_max_lora_rank': 32, 'vllm_config': None, 'export_dir': None, 'export_size': 5, 'export_device': 'cpu', 'export_quantization_bit': None, 'export_quantization_dataset': None, 'export_quantization_nsamples': 128, 'export_quantization_maxlen': 1024, 'export_legacy_format': False, 'export_hub_model_id': 'OsamaMo/Arabic_Text-To-SQL', 'image_max_pixels': 589824, 'image_min_pixels': 1024, 'video_max_pixels': 65536, 'video_min_pixels': 256, 'video_fps': 2.0, 'video_maxlen': 128, 'quantization_method': 'bitsandbytes', 'quantization_bit': None, 'quantization_type': 'nf4', 'double_quantization': True, 'quantization_device_map': None, 'model_name_or_path': 'Qwen/Qwen2.5-1.5B-Instruct', 'adapter_name_or_path': None, 'adapter_folder': None, 'cache_dir': None, 'use_fast_tokenizer': True, 'resize_vocab': False, 'split_special_tokens': False, 'new_special_tokens': None, 'model_revision': 'main', 'low_cpu_mem_usage': True, 'rope_scaling': None, 'flash_attn': 'auto', 'shift_attn': False, 'mixture_of_depths': None, 'use_unsloth': False, 'use_unsloth_gc': False, 'enable_liger_kernel': False, 'moe_aux_loss_coef': None, 'disable_gradient_checkpointing': False, 'use_reentrant_gc': True, 'upcast_layernorm': False, 'upcast_lmhead_output': False, 'train_from_scratch': False, 'infer_backend': 'huggingface', 'offload_folder': 'offload', 'use_cache': True, 'infer_dtype': 'auto', 'hf_hub_token': '', 'ms_hub_token': '', 'om_hub_token': '', 'print_param_status': False, 'trust_remote_code': True, 'compute_dtype': 'torch.bfloat16', 'device_map': {'': 'cuda:0'}, 'model_max_length': 2048, 'block_diag_attn': False}, 'data_args': {'template': 'qwen', 'dataset': ['news_finetune_train'], 'eval_dataset': ['news_finetune_val'], 'dataset_dir': 'data', 'media_dir': 'data', 'cutoff_len': 2048, 'train_on_prompt': False, 'mask_history': False, 'streaming': False, 'buffer_size': 16384, 'mix_strategy': 'concat', 'interleave_probs': None, 'overwrite_cache': True, 'preprocessing_batch_size': 1000, 'preprocessing_num_workers': 16, 'max_samples': None, 'eval_num_beams': None, 'ignore_pad_token_for_loss': True, 'val_size': 0.0, 'packing': False, 'neat_packing': False, 'tool_format': None, 'tokenized_path': None}, 'finetuning_args': {'use_swanlab': False, 'swanlab_project': 'llamafactory', 'swanlab_workspace': None, 'swanlab_run_name': None, 'swanlab_mode': 'cloud', 'swanlab_api_key': '', 'use_badam': False, 'badam_mode': 'layer', 'badam_start_block': None, 'badam_switch_mode': 'ascending', 'badam_switch_interval': 50, 'badam_update_ratio': 0.05, 'badam_mask_mode': 'adjacent', 'badam_verbose': 0, 'use_apollo': False, 'apollo_target': ['all'], 'apollo_rank': 16, 'apollo_update_interval': 200, 'apollo_scale': 32.0, 'apollo_proj': 'random', 'apollo_proj_type': 'std', 'apollo_scale_type': 'channel', 'apollo_layerwise': False, 'apollo_scale_front': False, 'use_galore': False, 'galore_target': ['all'], 'galore_rank': 16, 'galore_update_interval': 200, 'galore_scale': 2.0, 'galore_proj_type': 'std', 'galore_layerwise': False, 'pref_beta': 0.1, 'pref_ftx': 0.0, 'pref_loss': 'sigmoid', 'dpo_label_smoothing': 0.0, 'kto_chosen_weight': 1.0, 'kto_rejected_weight': 1.0, 'simpo_gamma': 0.5, 'ppo_buffer_size': 1, 'ppo_epochs': 4, 'ppo_score_norm': False, 'ppo_target': 6.0, 'ppo_whiten_rewards': False, 'ref_model': None, 'ref_model_adapters': None, 'ref_model_quantization_bit': None, 'reward_model': None, 'reward_model_adapters': None, 'reward_model_quantization_bit': None, 'reward_model_type': 'lora', 'additional_target': None, 'lora_alpha': 128, 'lora_dropout': 0.0, 'lora_rank': 64, 'lora_target': ['all'], 'loraplus_lr_ratio': None, 'loraplus_lr_embedding': 1e-06, 'use_rslora': False, 'use_dora': False, 'pissa_init': False, 'pissa_iter': 16, 'pissa_convert': False, 'create_new_adapter': False, 'freeze_trainable_layers': 2, 'freeze_trainable_modules': ['all'], 'freeze_extra_modules': None, 'pure_bf16': False, 'stage': 'sft', 'finetuning_type': 'lora', 'use_llama_pro': False, 'use_adam_mini': False, 'freeze_vision_tower': True, 'freeze_multi_modal_projector': True, 'train_mm_proj_only': False, 'compute_accuracy': False, 'disable_shuffling': False, 'plot_loss': True, 'include_effective_tokens_per_second': False}, 'generating_args': {'do_sample': True, 'temperature': 0.95, 'top_p': 0.7, 'top_k': 50, 'num_beams': 1, 'max_new_tokens': 1024, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'default_system': None, 'skip_special_tokens': True}} +2025-03-05 23:32:46,343 INFO MainThread:163 [wandb_init.py:init():784] starting backend +2025-03-05 23:32:46,343 INFO MainThread:163 [wandb_init.py:init():788] sending inform_init request +2025-03-05 23:32:46,350 INFO MainThread:163 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2025-03-05 23:32:46,350 INFO MainThread:163 [wandb_init.py:init():798] backend started and connected +2025-03-05 23:32:46,355 INFO MainThread:163 [wandb_init.py:init():891] updated telemetry +2025-03-05 23:32:46,361 INFO MainThread:163 [wandb_init.py:init():915] communicating run to backend with 90.0 second timeout +2025-03-05 23:32:46,782 INFO MainThread:163 [wandb_init.py:init():990] starting run threads in backend +2025-03-05 23:32:47,457 INFO MainThread:163 [wandb_run.py:_console_start():2375] atexit reg +2025-03-05 23:32:47,457 INFO MainThread:163 [wandb_run.py:_redirect():2227] redirect: wrap_raw +2025-03-05 23:32:47,457 INFO MainThread:163 [wandb_run.py:_redirect():2292] Wrapping output streams. +2025-03-05 23:32:47,457 INFO MainThread:163 [wandb_run.py:_redirect():2315] Redirects installed. +2025-03-05 23:32:47,460 INFO MainThread:163 [wandb_init.py:init():1032] run started, returning control to user process +2025-03-05 23:32:47,463 INFO MainThread:163 [wandb_run.py:_config_callback():1261] config_cb None None {'peft_config': {'default': {'peft_type': , 'auto_mapping': None, 'base_model_name_or_path': 'Qwen/Qwen2.5-1.5B-Instruct', 'revision': None, 'task_type': , 'inference_mode': False, 'r': 64, 'target_modules': {'q_proj', 'v_proj', 'gate_proj', 'k_proj', 'down_proj', 'o_proj', 'up_proj'}, 'lora_alpha': 128, 'lora_dropout': 0.0, 'fan_in_fan_out': False, 'bias': 'none', 'use_rslora': False, 'modules_to_save': None, 'init_lora_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'rank_pattern': {}, 'alpha_pattern': {}, 'megatron_config': None, 'megatron_core': 'megatron.core', 'loftq_config': {}, 'use_dora': False, 'layer_replication': None, 'runtime_config': {'ephemeral_gpu_offload': False}}}, 'vocab_size': 151936, 'max_position_embeddings': 32768, 'hidden_size': 1536, 'intermediate_size': 8960, 'num_hidden_layers': 28, 'num_attention_heads': 12, 'use_sliding_window': False, 'sliding_window': None, 'max_window_layers': 21, 'num_key_value_heads': 2, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-06, 'use_cache': False, 'rope_theta': 1000000.0, 'rope_scaling': None, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['Qwen2ForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 151643, 'pad_token_id': None, 'eos_token_id': 151645, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'Qwen/Qwen2.5-1.5B-Instruct', '_attn_implementation_autoset': True, 'transformers_version': '4.48.3', 'model_type': 'qwen2', 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.1, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar05_23-32-16_7cf3bf98fc84', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'newsx-finetune-llamafactory', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'checkpoint', 'hub_token': '', 'hub_private_repo': True, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 180000000, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': False, 'generation_max_length': 2048, 'generation_num_beams': None, 'generation_config': None, 'ray_run_name': None, 'ray_storage_path': './saves', 'ray_num_workers': 1, 'resources_per_worker': {'GPU': 1}, 'placement_strategy': 'PACK'} +2025-03-05 23:32:47,471 INFO MainThread:163 [wandb_config.py:__setitem__():154] config set model/num_parameters = 1617573376 - > +2025-03-05 23:32:47,471 INFO MainThread:163 [wandb_run.py:_config_callback():1261] config_cb model/num_parameters 1617573376 None +2025-03-05 23:32:47,473 INFO MainThread:163 [wandb_run.py:_config_callback():1261] config_cb None None {'model_args': {'vllm_maxlen': 4096, 'vllm_gpu_util': 0.9, 'vllm_enforce_eager': False, 'vllm_max_lora_rank': 32, 'vllm_config': None, 'export_dir': None, 'export_size': 5, 'export_device': 'cpu', 'export_quantization_bit': None, 'export_quantization_dataset': None, 'export_quantization_nsamples': 128, 'export_quantization_maxlen': 1024, 'export_legacy_format': False, 'export_hub_model_id': 'OsamaMo/Arabic_Text-To-SQL', 'image_max_pixels': 589824, 'image_min_pixels': 1024, 'video_max_pixels': 65536, 'video_min_pixels': 256, 'video_fps': 2.0, 'video_maxlen': 128, 'quantization_method': 'bitsandbytes', 'quantization_bit': None, 'quantization_type': 'nf4', 'double_quantization': True, 'quantization_device_map': None, 'model_name_or_path': 'Qwen/Qwen2.5-1.5B-Instruct', 'adapter_name_or_path': None, 'adapter_folder': None, 'cache_dir': None, 'use_fast_tokenizer': True, 'resize_vocab': False, 'split_special_tokens': False, 'new_special_tokens': None, 'model_revision': 'main', 'low_cpu_mem_usage': True, 'rope_scaling': None, 'flash_attn': 'auto', 'shift_attn': False, 'mixture_of_depths': None, 'use_unsloth': False, 'use_unsloth_gc': False, 'enable_liger_kernel': False, 'moe_aux_loss_coef': None, 'disable_gradient_checkpointing': False, 'use_reentrant_gc': True, 'upcast_layernorm': False, 'upcast_lmhead_output': False, 'train_from_scratch': False, 'infer_backend': 'huggingface', 'offload_folder': 'offload', 'use_cache': True, 'infer_dtype': 'auto', 'hf_hub_token': '', 'ms_hub_token': '', 'om_hub_token': '', 'print_param_status': False, 'trust_remote_code': True, 'compute_dtype': 'torch.bfloat16', 'device_map': {'': 'cuda:0'}, 'model_max_length': 2048, 'block_diag_attn': False}, 'data_args': {'template': 'qwen', 'dataset': ['news_finetune_train'], 'eval_dataset': ['news_finetune_val'], 'dataset_dir': 'data', 'media_dir': 'data', 'cutoff_len': 2048, 'train_on_prompt': False, 'mask_history': False, 'streaming': False, 'buffer_size': 16384, 'mix_strategy': 'concat', 'interleave_probs': None, 'overwrite_cache': True, 'preprocessing_batch_size': 1000, 'preprocessing_num_workers': 16, 'max_samples': None, 'eval_num_beams': None, 'ignore_pad_token_for_loss': True, 'val_size': 0.0, 'packing': False, 'neat_packing': False, 'tool_format': None, 'tokenized_path': None}, 'finetuning_args': {'use_swanlab': False, 'swanlab_project': 'llamafactory', 'swanlab_workspace': None, 'swanlab_run_name': None, 'swanlab_mode': 'cloud', 'swanlab_api_key': '', 'use_badam': False, 'badam_mode': 'layer', 'badam_start_block': None, 'badam_switch_mode': 'ascending', 'badam_switch_interval': 50, 'badam_update_ratio': 0.05, 'badam_mask_mode': 'adjacent', 'badam_verbose': 0, 'use_apollo': False, 'apollo_target': ['all'], 'apollo_rank': 16, 'apollo_update_interval': 200, 'apollo_scale': 32.0, 'apollo_proj': 'random', 'apollo_proj_type': 'std', 'apollo_scale_type': 'channel', 'apollo_layerwise': False, 'apollo_scale_front': False, 'use_galore': False, 'galore_target': ['all'], 'galore_rank': 16, 'galore_update_interval': 200, 'galore_scale': 2.0, 'galore_proj_type': 'std', 'galore_layerwise': False, 'pref_beta': 0.1, 'pref_ftx': 0.0, 'pref_loss': 'sigmoid', 'dpo_label_smoothing': 0.0, 'kto_chosen_weight': 1.0, 'kto_rejected_weight': 1.0, 'simpo_gamma': 0.5, 'ppo_buffer_size': 1, 'ppo_epochs': 4, 'ppo_score_norm': False, 'ppo_target': 6.0, 'ppo_whiten_rewards': False, 'ref_model': None, 'ref_model_adapters': None, 'ref_model_quantization_bit': None, 'reward_model': None, 'reward_model_adapters': None, 'reward_model_quantization_bit': None, 'reward_model_type': 'lora', 'additional_target': None, 'lora_alpha': 128, 'lora_dropout': 0.0, 'lora_rank': 64, 'lora_target': ['all'], 'loraplus_lr_ratio': None, 'loraplus_lr_embedding': 1e-06, 'use_rslora': False, 'use_dora': False, 'pissa_init': False, 'pissa_iter': 16, 'pissa_convert': False, 'create_new_adapter': False, 'freeze_trainable_layers': 2, 'freeze_trainable_modules': ['all'], 'freeze_extra_modules': None, 'pure_bf16': False, 'stage': 'sft', 'finetuning_type': 'lora', 'use_llama_pro': False, 'use_adam_mini': False, 'freeze_vision_tower': True, 'freeze_multi_modal_projector': True, 'train_mm_proj_only': False, 'compute_accuracy': False, 'disable_shuffling': False, 'plot_loss': True, 'include_effective_tokens_per_second': False}, 'generating_args': {'do_sample': True, 'temperature': 0.95, 'top_p': 0.7, 'top_k': 50, 'num_beams': 1, 'max_new_tokens': 1024, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'default_system': None, 'skip_special_tokens': True}} diff --git a/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/files/output.log b/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..d16a7dc3a26c9728622fab88729d93d5b88dac90 --- /dev/null +++ b/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/files/output.log @@ -0,0 +1,111 @@ + 2%|▉ | 100/4197 [13:56<9:35:45, 8.43s/it][INFO|trainer.py:4226] 2025-03-05 23:46:43,564 >> +{'loss': 1.0969, 'grad_norm': 4.4086809158325195, 'learning_rate': 2.3809523809523808e-06, 'epoch': 0.01} +{'loss': 1.0795, 'grad_norm': 5.687011241912842, 'learning_rate': 4.7619047619047615e-06, 'epoch': 0.01} +{'loss': 0.7536, 'grad_norm': 1.976590633392334, 'learning_rate': 7.142857142857143e-06, 'epoch': 0.02} +{'loss': 0.5564, 'grad_norm': 3.1355409622192383, 'learning_rate': 9.523809523809523e-06, 'epoch': 0.03} +{'loss': 0.623, 'grad_norm': 2.6710309982299805, 'learning_rate': 1.1904761904761905e-05, 'epoch': 0.04} +{'loss': 0.5322, 'grad_norm': 2.8567938804626465, 'learning_rate': 1.4285714285714285e-05, 'epoch': 0.04} +{'loss': 0.5102, 'grad_norm': 3.4388861656188965, 'learning_rate': 1.6666666666666667e-05, 'epoch': 0.05} +{'loss': 0.568, 'grad_norm': 3.093275308609009, 'learning_rate': 1.9047619047619046e-05, 'epoch': 0.06} +{'loss': 0.4883, 'grad_norm': 2.3798677921295166, 'learning_rate': 2.1428571428571428e-05, 'epoch': 0.06} +{'loss': 0.417, 'grad_norm': 2.846259117126465, 'learning_rate': 2.380952380952381e-05, 'epoch': 0.07} +***** Running Evaluation ***** +[INFO|trainer.py:4228] 2025-03-05 23:46:43,564 >> Num examples = 1400 +[INFO|trainer.py:4231] 2025-03-05 23:46:43,564 >> Batch size = 1 + 5%|█▊ | 200/4197 [44:41<8:12:40, 7.40s/it][INFO|trainer.py:4226] 2025-03-06 00:17:29,351 >> +***** Running Evaluation ***** +{'eval_news_finetune_val_loss': 0.48679304122924805, 'eval_news_finetune_val_runtime': 1001.9158, 'eval_news_finetune_val_samples_per_second': 1.397, 'eval_news_finetune_val_steps_per_second': 1.397, 'epoch': 0.07} +{'loss': 0.4595, 'grad_norm': 1.9387887716293335, 'learning_rate': 2.6190476190476192e-05, 'epoch': 0.08} +{'loss': 0.4658, 'grad_norm': 2.3232853412628174, 'learning_rate': 2.857142857142857e-05, 'epoch': 0.09} +{'loss': 0.4122, 'grad_norm': 2.813093423843384, 'learning_rate': 3.095238095238095e-05, 'epoch': 0.09} +{'loss': 0.4878, 'grad_norm': 1.9588465690612793, 'learning_rate': 3.3333333333333335e-05, 'epoch': 0.1} +{'loss': 0.4168, 'grad_norm': 1.4838117361068726, 'learning_rate': 3.571428571428572e-05, 'epoch': 0.11} +{'loss': 0.4298, 'grad_norm': 3.020738124847412, 'learning_rate': 3.809523809523809e-05, 'epoch': 0.11} +{'loss': 0.4413, 'grad_norm': 2.097656011581421, 'learning_rate': 4.047619047619048e-05, 'epoch': 0.12} +{'loss': 0.3734, 'grad_norm': 1.6332950592041016, 'learning_rate': 4.2857142857142856e-05, 'epoch': 0.13} +{'loss': 0.4015, 'grad_norm': 2.1570417881011963, 'learning_rate': 4.523809523809524e-05, 'epoch': 0.14} +{'loss': 0.4411, 'grad_norm': 1.6941479444503784, 'learning_rate': 4.761904761904762e-05, 'epoch': 0.14} +[INFO|trainer.py:4228] 2025-03-06 00:17:29,351 >> Num examples = 1400 +[INFO|trainer.py:4231] 2025-03-06 00:17:29,351 >> Batch size = 1 + 7%|██▌ | 300/4197 [1:15:23<9:18:01, 8.59s/it][INFO|trainer.py:4226] 2025-03-06 00:48:10,517 >> +***** Running Evaluation ***** +{'eval_news_finetune_val_loss': 0.4338369369506836, 'eval_news_finetune_val_runtime': 1002.1695, 'eval_news_finetune_val_samples_per_second': 1.397, 'eval_news_finetune_val_steps_per_second': 1.397, 'epoch': 0.14} +{'loss': 0.3697, 'grad_norm': 2.3582301139831543, 'learning_rate': 5e-05, 'epoch': 0.15} +{'loss': 0.4076, 'grad_norm': 2.0517632961273193, 'learning_rate': 5.2380952380952384e-05, 'epoch': 0.16} +{'loss': 0.3307, 'grad_norm': 1.3338748216629028, 'learning_rate': 5.4761904761904766e-05, 'epoch': 0.16} +{'loss': 0.4227, 'grad_norm': 3.0515363216400146, 'learning_rate': 5.714285714285714e-05, 'epoch': 0.17} +{'loss': 0.4689, 'grad_norm': 2.4899113178253174, 'learning_rate': 5.9523809523809524e-05, 'epoch': 0.18} +{'loss': 0.3618, 'grad_norm': 1.6197255849838257, 'learning_rate': 6.19047619047619e-05, 'epoch': 0.19} +{'loss': 0.4668, 'grad_norm': 1.654628872871399, 'learning_rate': 6.428571428571429e-05, 'epoch': 0.19} +{'loss': 0.3525, 'grad_norm': 1.6470831632614136, 'learning_rate': 6.666666666666667e-05, 'epoch': 0.2} +{'loss': 0.3707, 'grad_norm': 2.640536308288574, 'learning_rate': 6.904761904761905e-05, 'epoch': 0.21} +{'loss': 0.4461, 'grad_norm': 2.3426971435546875, 'learning_rate': 7.142857142857143e-05, 'epoch': 0.21} +[INFO|trainer.py:4228] 2025-03-06 00:48:10,517 >> Num examples = 1400 +[INFO|trainer.py:4231] 2025-03-06 00:48:10,517 >> Batch size = 1 + 10%|███▍ | 400/4197 [1:47:18<9:20:35, 8.86s/it][INFO|trainer.py:4226] 2025-03-06 01:20:06,321 >> +***** Running Evaluation ***** +{'eval_news_finetune_val_loss': 0.40391305088996887, 'eval_news_finetune_val_runtime': 1002.5797, 'eval_news_finetune_val_samples_per_second': 1.396, 'eval_news_finetune_val_steps_per_second': 1.396, 'epoch': 0.21} +{'loss': 0.3439, 'grad_norm': 1.0351321697235107, 'learning_rate': 7.380952380952382e-05, 'epoch': 0.22} +{'loss': 0.4492, 'grad_norm': 3.062483549118042, 'learning_rate': 7.619047619047618e-05, 'epoch': 0.23} +{'loss': 0.3399, 'grad_norm': 2.095825672149658, 'learning_rate': 7.857142857142858e-05, 'epoch': 0.24} +{'loss': 0.4336, 'grad_norm': 1.700642704963684, 'learning_rate': 8.095238095238096e-05, 'epoch': 0.24} +{'loss': 0.3628, 'grad_norm': 1.6802127361297607, 'learning_rate': 8.333333333333334e-05, 'epoch': 0.25} +{'loss': 0.4113, 'grad_norm': 1.1725817918777466, 'learning_rate': 8.571428571428571e-05, 'epoch': 0.26} +{'loss': 0.4009, 'grad_norm': 1.0182325839996338, 'learning_rate': 8.80952380952381e-05, 'epoch': 0.26} +{'loss': 0.3399, 'grad_norm': 2.5762252807617188, 'learning_rate': 9.047619047619048e-05, 'epoch': 0.27} +{'loss': 0.326, 'grad_norm': 1.5393809080123901, 'learning_rate': 9.285714285714286e-05, 'epoch': 0.28} +{'loss': 0.4228, 'grad_norm': 2.3259921073913574, 'learning_rate': 9.523809523809524e-05, 'epoch': 0.29} +[INFO|trainer.py:4228] 2025-03-06 01:20:06,321 >> Num examples = 1400 +[INFO|trainer.py:4231] 2025-03-06 01:20:06,322 >> Batch size = 1 + 12%|████▎ | 500/4197 [2:18:40<9:58:38, 9.72s/it][INFO|trainer.py:4226] 2025-03-06 01:51:27,973 >> +***** Running Evaluation ***** +{'eval_news_finetune_val_loss': 0.39322975277900696, 'eval_news_finetune_val_runtime': 1002.8865, 'eval_news_finetune_val_samples_per_second': 1.396, 'eval_news_finetune_val_steps_per_second': 1.396, 'epoch': 0.29} +{'loss': 0.3184, 'grad_norm': 0.9278184771537781, 'learning_rate': 9.761904761904762e-05, 'epoch': 0.29} +{'loss': 0.473, 'grad_norm': 1.4571782350540161, 'learning_rate': 0.0001, 'epoch': 0.3} +{'loss': 0.392, 'grad_norm': 1.6199829578399658, 'learning_rate': 9.99982704095424e-05, 'epoch': 0.31} +{'loss': 0.3824, 'grad_norm': 1.302309513092041, 'learning_rate': 9.999308175782893e-05, 'epoch': 0.31} +{'loss': 0.4001, 'grad_norm': 1.438289761543274, 'learning_rate': 9.998443440382927e-05, 'epoch': 0.32} +{'loss': 0.4144, 'grad_norm': 1.7557189464569092, 'learning_rate': 9.997232894579868e-05, 'epoch': 0.33} +{'loss': 0.3094, 'grad_norm': 0.9362027645111084, 'learning_rate': 9.995676622123655e-05, 'epoch': 0.34} +{'loss': 0.2966, 'grad_norm': 1.7850221395492554, 'learning_rate': 9.993774730682845e-05, 'epoch': 0.34} +{'loss': 0.3274, 'grad_norm': 1.705842137336731, 'learning_rate': 9.991527351837174e-05, 'epoch': 0.35} +{'loss': 0.4301, 'grad_norm': 1.0722746849060059, 'learning_rate': 9.988934641068436e-05, 'epoch': 0.36} +[INFO|trainer.py:4228] 2025-03-06 01:51:27,973 >> Num examples = 1400 +[INFO|trainer.py:4231] 2025-03-06 01:51:27,973 >> Batch size = 1 + 12%|████▎ | 500/4197 [2:35:23<9:58:38, 9.72s/it][INFO|trainer.py:3910] 2025-03-06 02:08:10,835 >> Saving model checkpoint to /kaggle/working/checkpoint-500 +[INFO|configuration_utils.py:696] 2025-03-06 02:08:11,323 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/config.json +{'eval_news_finetune_val_loss': 0.3787713646888733, 'eval_news_finetune_val_runtime': 1002.8588, 'eval_news_finetune_val_samples_per_second': 1.396, 'eval_news_finetune_val_steps_per_second': 1.396, 'epoch': 0.36} +[INFO|configuration_utils.py:768] 2025-03-06 02:08:11,324 >> Model config Qwen2Config { + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "max_position_embeddings": 32768, + "max_window_layers": 21, + "model_type": "qwen2", + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.48.3", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} + +[INFO|tokenization_utils_base.py:2491] 2025-03-06 02:08:11,997 >> tokenizer config file saved in /kaggle/working/checkpoint-500/tokenizer_config.json +[INFO|tokenization_utils_base.py:2500] 2025-03-06 02:08:11,998 >> Special tokens file saved in /kaggle/working/checkpoint-500/special_tokens_map.json +[INFO|tokenization_utils_base.py:2491] 2025-03-06 02:08:13,215 >> tokenizer config file saved in /kaggle/working/tokenizer_config.json +[INFO|tokenization_utils_base.py:2500] 2025-03-06 02:08:13,215 >> Special tokens file saved in /kaggle/working/special_tokens_map.json +It seems you are trying to upload a large folder at once. This might take some time and then fail if the folder is too large. For such cases, it is recommended to upload in smaller batches or to use `HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder. diff --git a/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/files/requirements.txt b/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..485b0da5b9528c5937d78dccd7f208f9cd887c0c --- /dev/null +++ b/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/files/requirements.txt @@ -0,0 +1,903 @@ +setproctitle==1.2.2 +psutil==6.1.1 +colorama==0.4.6 +airportsdata==20250224 +msgspec==0.19.0 +Faker==35.2.0 +watchfiles==1.0.4 +starlette==0.46.0 +xformers==0.0.28.post3 +MarkupSafe==2.1.5 +peft==0.12.0 +blake3==1.0.4 +openai==1.61.0 +gradio_client==1.7.2 +xgrammar==0.1.14 +datasets==3.2.0 +opencensus==0.11.4 +depyf==0.18.0 +fastapi==0.115.11 +outlines==0.1.11 +fsspec==2024.9.0 +json_repair==0.29.1 +uvloop==0.21.0 +vllm==0.7.2 +py-spy==0.4.0 +sse-starlette==2.2.1 +tomlkit==0.13.2 +tyro==0.8.14 +partial-json-parser==0.2.1.1.post5 +lark==1.2.2 +lm-format-enforcer==0.10.11 +wandb==0.19.8 +gradio==5.18.0 +colorful==0.5.6 +ruff==0.9.9 +av==14.2.0 +aiohttp-cors==0.7.0 +opencensus-context==0.1.3 +fire==0.7.0 +ffmpy==0.5.0 +compressed-tensors==0.9.1 +semantic-version==2.10.0 +virtualenv==20.29.2 +python-multipart==0.0.20 +distlib==0.3.9 +transformers==4.48.3 +anyio==4.8.0 +mistral_common==1.5.3 +astor==0.8.1 +diskcache==5.6.3 +prometheus-fastapi-instrumentator==7.0.2 +optimum==1.24.0 +pycountry==24.6.1 +httptools==0.6.4 +shtab==1.7.1 +interegular==0.3.3 +outlines_core==0.1.26 +llamafactory==0.9.2.dev0 +python-dotenv==1.0.1 +gguf==0.10.0 +uvicorn==0.34.0 +safehttpx==0.1.6 +trl==0.9.6 +protobuf==3.20.3 +bq_helper==0.4.1 +lightgbm==4.5.0 +pytools==2025.1.1 +pycuda==2025.1 +shapely==2.0.7 +tbb==2022.0.0 +mkl==2025.0.1 +tbb4py==2022.0.0 +libpysal==4.9.2 +intel-cmplr-lib-ur==2024.2.0 +intel-cmplr-lib-rt==2024.2.0 +mkl-umath==0.1.1 +mkl-service==2.4.1 +mkl-random==1.2.4 +numpy==1.26.4 +intel-openmp==2024.2.0 +mkl-fft==1.3.8 +tensorflow_decision_forests==1.10.0 +ydf==0.9.0 +wurlitzer==3.1.1 +learntools==0.3.4 +pycparser==2.22 +annotated-types==0.7.0 +pydantic_core==2.29.0 +pydantic==2.11.0a2 +dnspython==2.7.0 +in-toto-attestation==0.9.3 +attrs==25.1.0 +id==1.5.0 +PyJWT==2.10.1 +urllib3==2.3.0 +pyOpenSSL==25.0.0 +idna==3.10 +email_validator==2.2.0 +filelock==3.17.0 +cffi==1.17.1 +multiprocess==0.70.16 +charset-normalizer==3.4.1 +tuf==5.1.0 +propcache==0.2.1 +model-signing==0.2.0 +pyasn1==0.6.1 +aiohappyeyeballs==2.4.6 +rfc3161-client==0.1.2 +importlib-resources==5.13.0 +aiohttp==3.11.12 +huggingface-hub==0.29.0 +python-dateutil==2.9.0.post0 +certifi==2025.1.31 +markdown-it-py==3.0.0 +mdurl==0.1.2 +pytz==2025.1 +hpack==4.1.0 +xxhash==3.5.0 +six==1.17.0 +packaging==24.2 +rich==13.9.4 +sigstore-protobuf-specs==0.3.2 +grpclib==0.4.8rc2 +pandas==2.2.3 +requests==2.32.3 +multidict==6.1.0 +aiosignal==1.3.2 +typing_extensions==4.12.2 +sigstore==3.6.1 +Pygments==2.19.1 +betterproto==2.0.0b6 +tqdm==4.67.1 +PyYAML==6.0.2 +kagglehub==0.3.9 +cryptography==44.0.1 +async-timeout==5.0.1 +securesystemslib==1.2.0 +yarl==1.18.3 +hyperframe==6.1.0 +rfc8785==0.1.4 +sigstore-rekor-types==0.0.18 +platformdirs==4.3.6 +tzdata==2025.1 +h2==4.2.0 +dill==0.3.8 +frozenlist==1.5.0 +pyarrow==19.0.1 +ppft==1.7.6.9 +fiona==1.10.1 +urwid_readline==0.15.1 +keras-nlp==0.18.1 +Wand==0.6.13 +raft-dask-cu12==25.2.0 +qgrid==1.3.1 +jupyter_client==8.6.3 +woodwork==0.31.0 +overrides==7.7.0 +y-py==0.6.2 +docstring-to-markdown==0.15 +ipywidgets==8.1.5 +treelite==4.4.1 +hep_ml==0.7.3 +scikit-multilearn==0.2.0 +cytoolz==1.0.1 +pytesseract==0.3.13 +click-plugins==1.1.1 +onnx==1.17.0 +dask==2024.12.1 +odfpy==1.4.1 +mpld3==0.5.10 +Boruta==0.4.3 +pytorch-ignite==0.5.1 +fqdn==1.5.1 +torchinfo==1.8.0 +clint==0.5.1 +pybind11==2.13.6 +torchtune==0.5.0 +alembic==1.14.1 +pynvml==12.0.0 +nvidia-ml-py==12.570.86 +PyWavelets==1.8.0 +python-lsp-server==1.12.2 +jupyter_server_terminals==0.5.3 +pox==0.3.5 +keras-core==0.1.7 +libcudf-cu12==25.2.0 +pygltflib==1.16.3 +ucx-py-cu12==0.42.0 +cuvs-cu12==25.2.0 +pandas-profiling==3.6.6 +asttokens==3.0.0 +botocore==1.36.23 +scikit-surprise==1.1.4 +vtk==9.3.1 +jupyter-ydoc==0.2.5 +aiofiles==22.1.0 +deap==1.4.2 +isoduration==20.11.0 +featuretools==1.31.0 +plotly-express==0.4.1 +types-python-dateutil==2.9.0.20241206 +ipympl==0.9.6 +easyocr==1.7.2 +kornia==0.8.0 +slicer==0.0.7 +ImageHash==4.3.1 +numba-cuda==0.2.0 +pyemd==1.0.0 +fuzzywuzzy==0.18.0 +xgboost==2.0.3 +pandasql==0.7.3 +update-checker==0.18.0 +catboost==1.2.7 +pathos==0.3.2 +widgetsnbextension==4.0.13 +jupyter_server_fileid==0.9.3 +libcuvs-cu12==25.2.0 +nbdev==2.3.34 +ninja==1.11.1.3 +google-cloud-vision==3.10.0 +xvfbwrapper==0.2.9 +urwid==2.6.16 +google-cloud-storage==2.14.0 +fasttext==0.9.3 +daal==2025.2.0 +stopit==1.1.2 +ydata-profiling==4.12.2 +haversine==2.9.0 +colorlog==6.9.0 +jupyter_server==2.12.5 +dask-cudf-cu12==25.2.0 +geojson==3.2.0 +uri-template==1.3.0 +notebook==6.5.4 +fury==0.12.0 +igraph==0.11.8 +libucx-cu12==1.18.0 +distributed==2024.12.1 +google-cloud-automl==1.0.1 +nltk==3.2.4 +grpcio-status==1.48.2 +olefile==0.47 +semver==3.0.4 +gymnasium==0.29.0 +TPOT==0.12.1 +dask-expr==1.1.21 +google-cloud-translate==3.12.1 +tensorflow-cloud==0.1.5 +shap==0.44.1 +ghapi==1.0.6 +Cartopy==0.24.1 +jupyter-lsp==1.5.1 +dask-cuda==25.2.0 +gpxpy==1.6.2 +lightning-utilities==0.12.0 +zict==3.0.0 +tsfresh==0.20.2 +mlcrate==0.2.0 +papermill==2.6.0 +visions==0.7.6 +jupyterlab==3.6.8 +args==0.1.0 +typing-inspect==0.9.0 +omegaconf==2.3.0 +google-cloud-videointelligence==2.16.0 +dacite==1.9.2 +qtconsole==5.6.1 +trx-python==0.3 +Chessnut==0.4.1 +jmespath==1.0.1 +jupyterlab_server==2.27.3 +ypy-websocket==0.8.4 +ansicolors==1.1.8 +leven==1.0.4 +path.py==12.5.0 +blobfile==3.0.0 +tensorflow-io==0.37.1 +pymc3==3.11.4 +wavio==0.0.9 +cligj==0.7.2 +h2o==3.46.0.6 +pdf2image==1.17.0 +lml==0.1.0 +pyaml==25.1.0 +line_profiler==4.2.0 +pydub==0.25.1 +orderly-set==5.3.0 +pyLDAvis==3.4.1 +antlr4-python3-runtime==4.9.3 +Janome==0.5.0 +langid==1.1.6 +pyclipper==1.3.0.post6 +setuptools-scm==8.1.0 +scikit-learn-intelex==2025.2.0 +scikit-plot==0.3.7 +python-json-logger==3.2.1 +pydegensac==0.1.2 +jupyter_server_ydoc==0.8.0 +phik==0.12.4 +nose==1.3.7 +pycryptodomex==3.21.0 +keras-tuner==1.4.7 +execnb==0.1.11 +colorama==0.4.6 +PyArabic==0.6.15 +cesium==0.12.1 +kornia_rs==0.1.8 +optuna==4.2.1 +ujson==5.10.0 +Theano==1.0.5 +annoy==1.17.3 +Theano-PyMC==1.1.2 +Pympler==1.1 +dipy==1.10.0 +s3fs==0.4.2 +tblib==3.0.0 +geopandas==0.14.4 +nbconvert==6.4.5 +scikit-learn==1.2.2 +emoji==2.14.1 +SimpleITK==2.4.1 +watchdog==6.0.0 +pylibraft-cu12==25.2.0 +funcy==2.0 +rapids-dask-dependency==25.2.0 +testpath==0.6.0 +coverage==7.6.12 +s3transfer==0.11.2 +openslide-python==1.4.1 +rfc3986-validator==0.1.1 +PyUpSet==0.1.1.post7 +nbclient==0.5.13 +torchmetrics==1.6.1 +cuda-bindings==12.8.0 +python-bidi==0.6.6 +deepdiff==8.2.0 +squarify==0.4.4 +comm==0.2.2 +dataclasses-json==0.6.7 +jupyter-events==0.12.0 +pettingzoo==1.24.0 +matplotlib==3.7.5 +nilearn==0.10.4 +segment_anything==1.0 +datashader==0.17.0 +pypdf==5.3.0 +kaggle-environments==1.16.11 +marshmallow==3.26.1 +eli5==0.13.0 +rgf-python==3.12.0 +tiktoken==0.9.0 +Farama-Notifications==0.0.4 +stable-baselines3==2.1.0 +jedi==0.19.2 +google-api-core==1.34.1 +jupyterlab-lsp==3.10.2 +python-lsp-jsonrpc==1.1.2 +aiosqlite==0.21.0 +rmm-cu12==25.2.0 +QtPy==2.4.3 +pydicom==3.0.1 +multimethod==1.12 +docker==7.1.0 +mypy-extensions==1.0.0 +cuda-python==12.8.0 +libucxx-cu12==0.42.0 +libcuml-cu12==25.2.0 +arrow==1.3.0 +libkvikio-cu12==25.2.0 +isoweek==1.3.3 +texttable==1.7.0 +sphinx-rtd-theme==0.2.4 +kt-legacy==1.0.5 +pyct==0.5.0 +seaborn==0.12.2 +pyexcel-io==0.6.7 +Shimmy==1.3.0 +rfc3339-validator==0.1.4 +category_encoders==2.7.0 +stumpy==1.13.0 +mamba==0.11.3 +Rtree==1.3.0 +pytorch-lightning==2.5.0.post0 +keras-hub==0.18.1 +path==17.1.0 +Mako==1.3.9 +pyexcel-ods==0.6.0 +ray==2.42.1 +preprocessing==0.1.13 +lime==0.2.0.1 +htmlmin==0.1.12 +boto3==1.36.23 +pymongo==4.11.1 +bayesian-optimization==2.0.3 +pylibcudf-cu12==25.2.0 +pycryptodome==3.21.0 +cudf-cu12==25.2.0 +nvidia-nvcomp-cu12==4.1.0.6 +libraft-cu12==25.2.0 +ucxx-cu12==0.42.0 +keras-cv==0.9.0 +pudb==2024.1.3 +gatspy==0.3 +hf_transfer==0.1.9 +scikit-optimize==0.10.2 +mne==1.9.0 +sortedcontainers==2.4.0 +openslide-bin==4.0.0.6 +tensorflow-text==2.17.0 +json5==0.10.0 +mistune==0.8.4 +distributed-ucxx-cu12==0.42.0 +cuml-cu12==25.2.0 +google-colab==1.0.0 +psutil==5.9.5 +astunparse==1.6.3 +scs==3.2.7 +db-dtypes==1.3.1 +ipython==7.34.0 +cmake==3.31.2 +safetensors==0.4.5 +librosa==0.10.2.post1 +soxr==0.5.0.post1 +pydot==3.0.3 +python-box==7.3.0 +libclang==18.1.1 +gensim==4.3.3 +h11==0.14.0 +imagesize==1.4.1 +py-cpuinfo==9.0.0 +geemap==0.35.1 +debugpy==1.8.0 +jupyterlab_pygments==0.3.0 +backcall==0.2.0 +tensorflow-hub==0.16.1 +scooby==0.10.0 +en-core-web-sm==3.7.1 +qdldl==0.1.7.post4 +ipython-genutils==0.2.0 +catalogue==2.0.10 +proto-plus==1.25.0 +sphinxcontrib-devhelp==2.0.0 +partd==1.4.2 +Deprecated==1.2.15 +sklearn-pandas==2.2.0 +Markdown==3.7 +sphinxcontrib-qthelp==2.0.0 +google-auth-httplib2==0.2.0 +Flask==3.1.0 +preshed==3.0.9 +marisa-trie==1.2.1 +ipyleaflet==0.19.2 +tensorflow-probability==0.24.0 +chardet==5.2.0 +jupyter_core==5.7.2 +google-genai==0.2.2 +gspread==6.0.2 +albucore==0.0.19 +gin-config==0.5.0 +osqp==0.6.7.post3 +ipython-sql==0.5.0 +toml==0.10.2 +polars==1.9.0 +jsonpointer==3.0.0 +ndindex==1.9.2 +kaggle==1.6.17 +gspread-dataframe==3.3.1 +h5py==3.12.1 +tensorflow-io-gcs-filesystem==0.37.1 +datascience==0.17.6 +nvidia-cuda-nvcc-cu12==12.6.85 +alabaster==1.0.0 +pyshp==2.3.1 +pylibcugraph-cu12==24.10.0 +Jinja2==3.1.4 +nvidia-cublas-cu12==12.6.4.1 +spacy-legacy==3.0.12 +imageio==2.36.1 +requests-toolbelt==1.0.0 +thinc==8.2.5 +eval_type_backport==0.2.0 +PyDrive==1.3.1 +pytest==8.3.4 +imutils==0.5.4 +opt_einsum==3.4.0 +moviepy==1.0.3 +xarray-einstats==0.8.0 +lazy_loader==0.4 +imbalanced-learn==0.12.4 +ipyevents==2.0.2 +immutabledict==4.2.1 +google-cloud-functions==1.19.0 +music21==9.3.0 +narwhals==1.18.4 +linkify-it-py==2.0.3 +pytensor==2.26.4 +holidays==0.63 +language_data==1.3.0 +torchsummary==1.5.1 +webencodings==0.5.1 +webcolors==24.11.1 +google-cloud-aiplatform==1.74.0 +jellyfish==1.1.0 +gym==0.25.2 +tcmlib==1.2.0 +gdown==5.2.0 +fastcore==1.7.27 +pymystem3==0.2.0 +parso==0.8.4 +py4j==0.10.9.7 +entrypoints==0.4 +fastprogress==1.0.3 +pyogrio==0.10.0 +GitPython==3.1.43 +prettytable==3.12.0 +grpc-google-iam-v1==0.13.1 +msgpack==1.1.0 +oauthlib==3.2.2 +fastjsonschema==2.21.1 +sentence-transformers==3.3.1 +psycopg2==2.9.10 +missingno==0.5.2 +pandas-datareader==0.10.0 +pooch==1.8.2 +cycler==0.12.1 +cloudpickle==3.1.0 +cvxpy==1.6.0 +stringzilla==3.11.1 +nvidia-cusolver-cu12==11.7.1.2 +einops==0.8.0 +typeguard==4.4.1 +smmap==5.0.1 +argon2-cffi-bindings==21.2.0 +namex==0.0.8 +rpds-py==0.22.3 +dopamine_rl==4.1.0 +google-cloud-pubsub==2.27.1 +arviz==0.20.0 +google-cloud-resource-manager==1.14.0 +plotnine==0.14.4 +argon2-cffi==23.1.0 +diffusers==0.31.0 +pygit2==1.16.0 +sphinxcontrib-applehelp==2.0.0 +timm==1.0.12 +jax-cuda12-pjrt==0.4.33 +google-generativeai==0.8.3 +langchain==0.3.12 +clarabel==0.9.0 +google-api-python-client==2.155.0 +nvidia-nccl-cu12==2.23.4 +regex==2024.11.6 +tzlocal==5.2 +bleach==6.2.0 +tensorflow-metadata==1.13.1 +nvtx==0.2.10 +defusedxml==0.7.1 +sphinxcontrib-serializinghtml==2.0.0 +opencv-python-headless==4.10.0.84 +pynvjitlink-cu12==0.4.0 +python-utils==3.9.1 +Pyomo==6.8.2 +pydotplus==2.0.2 +ml-dtypes==0.4.1 +google-pasta==0.2.0 +pyzmq==24.0.1 +sqlglot==25.1.0 +cmdstanpy==1.2.5 +ipyparallel==8.8.0 +parsy==2.1 +spacy-loggers==1.0.5 +pandas-gbq==0.25.0 +prophet==1.1.6 +absl-py==1.4.0 +openpyxl==3.1.5 +vega-datasets==0.9.0 +mpmath==1.3.0 +scikit-image==0.25.0 +tomli==2.2.1 +frozendict==2.4.6 +gcsfs==2024.10.0 +google-cloud-bigquery==3.25.0 +mdit-py-plugins==0.4.2 +tornado==6.3.3 +astropy==6.1.7 +PyOpenGL==3.1.7 +keras==3.5.0 +langcodes==3.5.0 +cupy-cuda12x==12.2.0 +srsly==2.5.0 +astropy-iers-data==0.2024.12.16.0.35.48 +blinker==1.9.0 +tensorflow==2.17.1 +termcolor==2.5.0 +et_xmlfile==2.0.0 +jieba==0.42.1 +pluggy==1.5.0 +hyperopt==0.2.7 +python-louvain==0.16 +orbax-checkpoint==0.6.4 +google-auth-oauthlib==1.2.1 +soupsieve==2.6 +PyDrive2==1.21.3 +pyproj==3.7.0 +tf_keras==2.17.0 +wcwidth==0.2.13 +googleapis-common-protos==1.66.0 +google-cloud-language==2.16.0 +xyzservices==2024.9.0 +lxml==5.3.0 +google-cloud-core==2.4.1 +progressbar2==4.5.0 +pexpect==4.9.0 +imageio-ffmpeg==0.5.1 +ptyprocess==0.7.0 +pygame==2.6.1 +docker-pycreds==0.4.0 +exceptiongroup==1.2.2 +shellingham==1.5.4 +setproctitle==1.3.4 +prometheus_client==0.21.1 +nbformat==5.10.4 +tweepy==4.14.0 +nest-asyncio==1.6.0 +chex==0.1.88 +nvidia-cufft-cu12==11.3.0.4 +nibabel==5.3.2 +folium==0.19.2 +iniconfig==2.0.0 +simple-parsing==0.1.6 +multipledispatch==1.0.0 +networkx==3.4.2 +locket==1.0.0 +types-pytz==2024.2.0.20241003 +sentencepiece==0.2.0 +plotly==5.24.1 +rpy2==3.4.2 +ipytree==0.2.2 +graphviz==0.20.3 +opencv-python==4.10.0.84 +bokeh==3.6.2 +nvidia-cuda-runtime-cu12==12.6.77 +pymc==5.19.1 +pydata-google-auth==1.9.0 +decorator==4.4.2 +google-cloud-datastore==2.20.2 +zipp==3.21.0 +docstring_parser==0.16 +pickleshare==0.7.5 +peewee==3.17.8 +GDAL==3.6.4 +CacheControl==0.14.1 +ibis-framework==9.2.0 +holoviews==1.20.0 +googledrivedownloader==0.4 +wasabi==1.1.3 +bqplot==0.12.43 +optax==0.2.4 +gast==0.6.0 +Werkzeug==3.1.3 +colorcet==3.1.0 +tensorstore==0.1.71 +atpublic==4.1.0 +blis==0.7.11 +bigquery-magics==0.4.0 +humanize==4.11.0 +toolz==0.12.1 +python-slugify==8.0.4 +babel==2.16.0 +miniKanren==1.0.3 +traitlets==5.7.1 +sqlparse==0.5.3 +terminado==0.18.1 +tables==3.10.1 +tensorflow-datasets==4.9.7 +google-cloud-firestore==2.19.0 +sphinxcontrib-htmlhelp==2.1.0 +wrapt==1.17.0 +geocoder==1.38.1 +proglog==0.1.10 +ply==3.11 +textblob==0.17.1 +opentelemetry-api==1.29.0 +audioread==3.0.1 +docutils==0.21.2 +prompt_toolkit==3.0.48 +distro==1.9.0 +tf-slim==1.1.0 +google-auth==2.27.0 +cons==0.4.6 +h5netcdf==1.4.1 +blosc2==2.7.1 +fonttools==4.55.3 +array_record==0.5.1 +geographiclib==2.0 +requests-oauthlib==1.3.1 +matplotlib-inline==0.1.7 +editdistance==0.8.1 +httpcore==1.0.7 +spacy==3.7.5 +beautifulsoup4==4.12.3 +pillow==11.0.0 +tabulate==0.9.0 +statsmodels==0.14.4 +tensorboard==2.17.1 +community==1.0.0b1 +fastrlock==0.8.2 +gym-notices==0.0.8 +notebook_shim==0.2.4 +itsdangerous==2.2.0 +jsonpatch==1.33 +threadpoolctl==3.5.0 +google-ai-generativelanguage==0.6.10 +StrEnum==0.4.15 +traittypes==0.2.1 +autograd==1.7.0 +text-unidecode==1.3 +pycocotools==2.0.8 +weasel==0.4.1 +langsmith==0.2.3 +wordcloud==1.9.4 +scipy==1.13.1 +patsy==1.0.1 +gitdb==4.0.11 +dlib==19.24.2 +tenacity==9.0.0 +nvidia-cudnn-cu12==9.6.0.74 +umf==0.9.1 +more-itertools==10.5.0 +cloudpathlib==0.20.0 +altair==5.5.0 +torch==2.5.1+cu121 +cufflinks==0.17.3 +cvxopt==1.3.2 +ipykernel==5.5.6 +tokenizers==0.21.0 +importlib_metadata==8.5.0 +PySocks==1.7.1 +uc-micro-py==1.0.3 +joblib==1.4.2 +numba==0.60.0 +grpcio==1.68.1 +yfinance==0.2.50 +xlrd==2.0.1 +numexpr==2.10.2 +mlxtend==0.23.3 +promise==2.3 +soundfile==0.12.1 +flax==0.8.5 +smart-open==7.0.5 +Send2Trash==1.8.3 +google-cloud-iam==2.17.0 +sniffio==1.3.1 +httplib2==0.22.0 +referencing==0.35.1 +jupyterlab_widgets==3.0.13 +httpimport==1.4.0 +confection==0.1.5 +uritemplate==4.1.1 +stanio==0.5.1 +opentelemetry-sdk==1.29.0 +easydict==1.13 +flatbuffers==24.3.25 +llvmlite==0.43.0 +nvidia-nvjitlink-cu12==12.6.85 +Sphinx==8.1.3 +future==1.0.0 +websocket-client==1.8.0 +optree==0.13.1 +firebase-admin==6.6.0 +imgaug==0.4.0 +Bottleneck==1.4.2 +fastai==2.7.18 +sentry-sdk==2.19.2 +snowballstemmer==2.2.0 +orjson==3.10.12 +typer==0.15.1 +colour==0.1.5 +pyspark==3.5.3 +google-crc32c==1.6.0 +sphinxcontrib-jsmath==1.0.1 +pyviz_comms==3.0.3 +google-resumable-media==2.7.2 +portpicker==1.5.2 +nvidia-cusparse-cu12==12.5.4.2 +earthengine-api==1.4.3 +pyparsing==3.2.0 +sympy==1.13.1 +pathlib==1.0.1 +panel==1.5.4 +nvidia-curand-cu12==10.3.7.77 +jaxlib==0.4.33 +pandas-stubs==2.2.2.240909 +ratelim==0.1.6 +greenlet==3.1.1 +multitasking==0.0.11 +kiwisolver==1.4.7 +pyperclip==1.9.0 +jsonschema-specifications==2024.10.1 +jax-cuda12-plugin==0.4.33 +websockets==14.1 +tifffile==2024.12.12 +tinycss2==1.4.0 +torchaudio==2.5.1+cu121 +nbclassic==1.1.0 +jiter==0.8.2 +nx-cugraph-cu12==24.10.0 +jsonpickle==4.0.1 +opencv-contrib-python==4.10.0.84 +cachetools==5.5.0 +oauth2client==4.1.3 +jax==0.4.33 +etuples==0.3.9 +SQLAlchemy==2.0.36 +albumentations==1.4.20 +geopy==2.4.1 +murmurhash==1.0.11 +logical-unification==0.4.6 +natsort==8.4.0 +eerepr==0.0.4 +pyerfa==2.0.1.5 +param==2.2.0 +click==8.1.7 +cymem==2.0.10 +langchain-core==0.3.25 +langchain-text-splitters==0.3.3 +mizani==0.13.1 +etils==1.11.0 +bigframes==1.29.0 +accelerate==1.2.1 +google-cloud-bigtable==2.27.0 +httpx==0.28.1 +google-cloud-bigquery-connection==1.17.0 +jsonschema==4.23.0 +fastdownload==0.0.7 +matplotlib-venn==1.1.1 +jupyter-console==6.1.0 +duckdb==1.1.3 +branca==0.8.1 +pyasn1_modules==0.4.1 +Cython==3.0.11 +pandocfilters==1.5.1 +yellowbrick==1.5 +torchvision==0.20.1+cu121 +contourpy==1.3.1 +tensorboard-data-server==0.7.2 +google==2.0.3 +jupyter-leaflet==0.19.2 +xarray==2024.11.0 +dm-tree==0.1.8 +wheel==0.45.1 +nvidia-cuda-cupti-cu12==12.6.80 +opentelemetry-semantic-conventions==0.50b0 +glob2==0.7 +colorlover==0.3.0 +ipyfilechooser==0.6.0 +rsa==4.9 +inflect==7.4.0 +html5lib==1.1 +python-apt==0.0.0 +setuptools==75.1.0 +types-setuptools==75.6.0.20241126 +requirements-parser==0.9.0 +pip==24.1.2 +llamafactory==0.9.2.dev0 +PyGObject==3.42.1 +blinker==1.4 +jeepney==0.7.1 +six==1.16.0 +oauthlib==3.2.0 +wadllib==1.3.6 +launchpadlib==1.10.16 +dbus-python==1.2.18 +PyJWT==2.3.0 +importlib-metadata==4.6.4 +httplib2==0.20.2 +zipp==1.0.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu4 +lazr.restfulclient==0.14.4 +SecretStorage==3.3.1 +distro==1.7.0 +lazr.uri==1.0.6 +more-itertools==8.10.0 +cryptography==3.4.8 +keyring==23.5.0 +packaging==24.1 +inflect==7.3.1 +autocommand==2.2.2 +typeguard==4.3.0 +jaraco.text==3.12.1 +importlib_resources==6.4.0 +wheel==0.43.0 +zipp==3.19.2 +platformdirs==4.2.2 +importlib_metadata==8.0.0 +tomli==2.0.1 +jaraco.collections==5.1.0 +more-itertools==10.3.0 +typing_extensions==4.12.2 +backports.tarfile==1.2.0 +jaraco.functools==4.0.1 +jaraco.context==5.3.0 diff --git a/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/files/wandb-metadata.json b/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..0e004180641b1d05ae0615363952e8f2670fc414 --- /dev/null +++ b/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/files/wandb-metadata.json @@ -0,0 +1,43 @@ +{ + "os": "Linux-6.6.56+-x86_64-with-glibc2.35", + "python": "CPython 3.10.12", + "startedAt": "2025-03-05T23:32:46.351190Z", + "args": [ + "/kaggle/working/LLaMA-Factory/examples/train_loranews_finetune.yaml" + ], + "program": "/usr/local/bin/llamafactory-cli", + "git": { + "remote": "https://github.com/hiyouga/LLaMA-Factory.git", + "commit": "8ad03258e16309158368384e2a0a707845536133" + }, + "email": "osamamohamedmohamed30@gmail.com", + "root": "/kaggle/working/LLaMA-Factory", + "host": "7cf3bf98fc84", + "executable": "/usr/bin/python3", + "cpu_count": 2, + "cpu_count_logical": 4, + "gpu": "Tesla P100-PCIE-16GB", + "gpu_count": 1, + "disk": { + "/": { + "total": "8656922775552", + "used": "6630373683200" + } + }, + "memory": { + "total": "33662353408" + }, + "cpu": { + "count": 2, + "countLogical": 4 + }, + "gpu_nvidia": [ + { + "name": "Tesla P100-PCIE-16GB", + "memoryTotal": "17179869184", + "cudaCores": 3584, + "architecture": "Pascal" + } + ], + "cudaVersion": "12.6" +} \ No newline at end of file diff --git a/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/logs/debug-core.log b/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..071214043679534686809d55f9887cd40051bc36 --- /dev/null +++ b/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/logs/debug-core.log @@ -0,0 +1,7 @@ +{"time":"2025-03-05T23:31:05.998906481Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp8gaa3gds/port-18.txt","pid":18,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false} +{"time":"2025-03-05T23:31:06.013820115Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":18} +{"time":"2025-03-05T23:31:06.013800198Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":43967,"Zone":""}} +{"time":"2025-03-05T23:31:06.184506865Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:47328"} +{"time":"2025-03-05T23:32:45.932837658Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:41314"} +{"time":"2025-03-05T23:32:46.352812267Z","level":"INFO","msg":"handleInformInit: received","streamId":"9ct1o6yk","id":"127.0.0.1:41314"} +{"time":"2025-03-05T23:32:46.455759901Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"9ct1o6yk","id":"127.0.0.1:41314"} diff --git a/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/logs/debug-internal.log b/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..7f352addde07d876abb65995dc9b1d71b63605e0 --- /dev/null +++ b/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/logs/debug-internal.log @@ -0,0 +1,7 @@ +{"time":"2025-03-05T23:32:46.352950755Z","level":"INFO","msg":"stream: starting","core version":"0.19.8","symlink path":"/kaggle/working/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/logs/debug-core.log"} +{"time":"2025-03-05T23:32:46.455714274Z","level":"INFO","msg":"created new stream","id":"9ct1o6yk"} +{"time":"2025-03-05T23:32:46.455754315Z","level":"INFO","msg":"stream: started","id":"9ct1o6yk"} +{"time":"2025-03-05T23:32:46.455787298Z","level":"INFO","msg":"writer: Do: started","stream_id":"9ct1o6yk"} +{"time":"2025-03-05T23:32:46.455813658Z","level":"INFO","msg":"sender: started","stream_id":"9ct1o6yk"} +{"time":"2025-03-05T23:32:46.455830745Z","level":"INFO","msg":"handler: started","stream_id":"9ct1o6yk"} +{"time":"2025-03-05T23:32:46.78514652Z","level":"INFO","msg":"Starting system monitor"} diff --git a/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/logs/debug.log b/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..dcd53a6080019a2b54764ff953d77b3a219fbab3 --- /dev/null +++ b/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/logs/debug.log @@ -0,0 +1,26 @@ +2025-03-05 23:32:46,342 INFO MainThread:163 [wandb_setup.py:_flush():67] Current SDK version is 0.19.8 +2025-03-05 23:32:46,342 INFO MainThread:163 [wandb_setup.py:_flush():67] Configure stats pid to 163 +2025-03-05 23:32:46,342 INFO MainThread:163 [wandb_setup.py:_flush():67] Loading settings from /root/.config/wandb/settings +2025-03-05 23:32:46,342 INFO MainThread:163 [wandb_setup.py:_flush():67] Loading settings from /kaggle/working/LLaMA-Factory/wandb/settings +2025-03-05 23:32:46,342 INFO MainThread:163 [wandb_setup.py:_flush():67] Loading settings from environment variables +2025-03-05 23:32:46,342 INFO MainThread:163 [wandb_init.py:setup_run_log_directory():647] Logging user logs to /kaggle/working/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/logs/debug.log +2025-03-05 23:32:46,342 INFO MainThread:163 [wandb_init.py:setup_run_log_directory():648] Logging internal logs to /kaggle/working/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/logs/debug-internal.log +2025-03-05 23:32:46,343 INFO MainThread:163 [wandb_init.py:init():761] calling init triggers +2025-03-05 23:32:46,343 INFO MainThread:163 [wandb_init.py:init():766] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-03-05 23:32:46,343 INFO MainThread:163 [wandb_init.py:init():784] starting backend +2025-03-05 23:32:46,343 INFO MainThread:163 [wandb_init.py:init():788] sending inform_init request +2025-03-05 23:32:46,350 INFO MainThread:163 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2025-03-05 23:32:46,350 INFO MainThread:163 [wandb_init.py:init():798] backend started and connected +2025-03-05 23:32:46,355 INFO MainThread:163 [wandb_init.py:init():891] updated telemetry +2025-03-05 23:32:46,361 INFO MainThread:163 [wandb_init.py:init():915] communicating run to backend with 90.0 second timeout +2025-03-05 23:32:46,782 INFO MainThread:163 [wandb_init.py:init():990] starting run threads in backend +2025-03-05 23:32:47,457 INFO MainThread:163 [wandb_run.py:_console_start():2375] atexit reg +2025-03-05 23:32:47,457 INFO MainThread:163 [wandb_run.py:_redirect():2227] redirect: wrap_raw +2025-03-05 23:32:47,457 INFO MainThread:163 [wandb_run.py:_redirect():2292] Wrapping output streams. +2025-03-05 23:32:47,457 INFO MainThread:163 [wandb_run.py:_redirect():2315] Redirects installed. +2025-03-05 23:32:47,460 INFO MainThread:163 [wandb_init.py:init():1032] run started, returning control to user process +2025-03-05 23:32:47,463 INFO MainThread:163 [wandb_run.py:_config_callback():1261] config_cb None None {'peft_config': {'default': {'peft_type': , 'auto_mapping': None, 'base_model_name_or_path': 'Qwen/Qwen2.5-1.5B-Instruct', 'revision': None, 'task_type': , 'inference_mode': False, 'r': 64, 'target_modules': {'q_proj', 'v_proj', 'gate_proj', 'k_proj', 'down_proj', 'o_proj', 'up_proj'}, 'lora_alpha': 128, 'lora_dropout': 0.0, 'fan_in_fan_out': False, 'bias': 'none', 'use_rslora': False, 'modules_to_save': None, 'init_lora_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'rank_pattern': {}, 'alpha_pattern': {}, 'megatron_config': None, 'megatron_core': 'megatron.core', 'loftq_config': {}, 'use_dora': False, 'layer_replication': None, 'runtime_config': {'ephemeral_gpu_offload': False}}}, 'vocab_size': 151936, 'max_position_embeddings': 32768, 'hidden_size': 1536, 'intermediate_size': 8960, 'num_hidden_layers': 28, 'num_attention_heads': 12, 'use_sliding_window': False, 'sliding_window': None, 'max_window_layers': 21, 'num_key_value_heads': 2, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-06, 'use_cache': False, 'rope_theta': 1000000.0, 'rope_scaling': None, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['Qwen2ForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 151643, 'pad_token_id': None, 'eos_token_id': 151645, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'Qwen/Qwen2.5-1.5B-Instruct', '_attn_implementation_autoset': True, 'transformers_version': '4.48.3', 'model_type': 'qwen2', 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.1, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar05_23-32-16_7cf3bf98fc84', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'newsx-finetune-llamafactory', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'checkpoint', 'hub_token': '', 'hub_private_repo': True, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 180000000, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': False, 'generation_max_length': 2048, 'generation_num_beams': None, 'generation_config': None, 'ray_run_name': None, 'ray_storage_path': './saves', 'ray_num_workers': 1, 'resources_per_worker': {'GPU': 1}, 'placement_strategy': 'PACK'} +2025-03-05 23:32:47,471 INFO MainThread:163 [wandb_config.py:__setitem__():154] config set model/num_parameters = 1617573376 - > +2025-03-05 23:32:47,471 INFO MainThread:163 [wandb_run.py:_config_callback():1261] config_cb model/num_parameters 1617573376 None +2025-03-05 23:32:47,473 INFO MainThread:163 [wandb_run.py:_config_callback():1261] config_cb None None {'model_args': {'vllm_maxlen': 4096, 'vllm_gpu_util': 0.9, 'vllm_enforce_eager': False, 'vllm_max_lora_rank': 32, 'vllm_config': None, 'export_dir': None, 'export_size': 5, 'export_device': 'cpu', 'export_quantization_bit': None, 'export_quantization_dataset': None, 'export_quantization_nsamples': 128, 'export_quantization_maxlen': 1024, 'export_legacy_format': False, 'export_hub_model_id': 'OsamaMo/Arabic_Text-To-SQL', 'image_max_pixels': 589824, 'image_min_pixels': 1024, 'video_max_pixels': 65536, 'video_min_pixels': 256, 'video_fps': 2.0, 'video_maxlen': 128, 'quantization_method': 'bitsandbytes', 'quantization_bit': None, 'quantization_type': 'nf4', 'double_quantization': True, 'quantization_device_map': None, 'model_name_or_path': 'Qwen/Qwen2.5-1.5B-Instruct', 'adapter_name_or_path': None, 'adapter_folder': None, 'cache_dir': None, 'use_fast_tokenizer': True, 'resize_vocab': False, 'split_special_tokens': False, 'new_special_tokens': None, 'model_revision': 'main', 'low_cpu_mem_usage': True, 'rope_scaling': None, 'flash_attn': 'auto', 'shift_attn': False, 'mixture_of_depths': None, 'use_unsloth': False, 'use_unsloth_gc': False, 'enable_liger_kernel': False, 'moe_aux_loss_coef': None, 'disable_gradient_checkpointing': False, 'use_reentrant_gc': True, 'upcast_layernorm': False, 'upcast_lmhead_output': False, 'train_from_scratch': False, 'infer_backend': 'huggingface', 'offload_folder': 'offload', 'use_cache': True, 'infer_dtype': 'auto', 'hf_hub_token': '', 'ms_hub_token': '', 'om_hub_token': '', 'print_param_status': False, 'trust_remote_code': True, 'compute_dtype': 'torch.bfloat16', 'device_map': {'': 'cuda:0'}, 'model_max_length': 2048, 'block_diag_attn': False}, 'data_args': {'template': 'qwen', 'dataset': ['news_finetune_train'], 'eval_dataset': ['news_finetune_val'], 'dataset_dir': 'data', 'media_dir': 'data', 'cutoff_len': 2048, 'train_on_prompt': False, 'mask_history': False, 'streaming': False, 'buffer_size': 16384, 'mix_strategy': 'concat', 'interleave_probs': None, 'overwrite_cache': True, 'preprocessing_batch_size': 1000, 'preprocessing_num_workers': 16, 'max_samples': None, 'eval_num_beams': None, 'ignore_pad_token_for_loss': True, 'val_size': 0.0, 'packing': False, 'neat_packing': False, 'tool_format': None, 'tokenized_path': None}, 'finetuning_args': {'use_swanlab': False, 'swanlab_project': 'llamafactory', 'swanlab_workspace': None, 'swanlab_run_name': None, 'swanlab_mode': 'cloud', 'swanlab_api_key': '', 'use_badam': False, 'badam_mode': 'layer', 'badam_start_block': None, 'badam_switch_mode': 'ascending', 'badam_switch_interval': 50, 'badam_update_ratio': 0.05, 'badam_mask_mode': 'adjacent', 'badam_verbose': 0, 'use_apollo': False, 'apollo_target': ['all'], 'apollo_rank': 16, 'apollo_update_interval': 200, 'apollo_scale': 32.0, 'apollo_proj': 'random', 'apollo_proj_type': 'std', 'apollo_scale_type': 'channel', 'apollo_layerwise': False, 'apollo_scale_front': False, 'use_galore': False, 'galore_target': ['all'], 'galore_rank': 16, 'galore_update_interval': 200, 'galore_scale': 2.0, 'galore_proj_type': 'std', 'galore_layerwise': False, 'pref_beta': 0.1, 'pref_ftx': 0.0, 'pref_loss': 'sigmoid', 'dpo_label_smoothing': 0.0, 'kto_chosen_weight': 1.0, 'kto_rejected_weight': 1.0, 'simpo_gamma': 0.5, 'ppo_buffer_size': 1, 'ppo_epochs': 4, 'ppo_score_norm': False, 'ppo_target': 6.0, 'ppo_whiten_rewards': False, 'ref_model': None, 'ref_model_adapters': None, 'ref_model_quantization_bit': None, 'reward_model': None, 'reward_model_adapters': None, 'reward_model_quantization_bit': None, 'reward_model_type': 'lora', 'additional_target': None, 'lora_alpha': 128, 'lora_dropout': 0.0, 'lora_rank': 64, 'lora_target': ['all'], 'loraplus_lr_ratio': None, 'loraplus_lr_embedding': 1e-06, 'use_rslora': False, 'use_dora': False, 'pissa_init': False, 'pissa_iter': 16, 'pissa_convert': False, 'create_new_adapter': False, 'freeze_trainable_layers': 2, 'freeze_trainable_modules': ['all'], 'freeze_extra_modules': None, 'pure_bf16': False, 'stage': 'sft', 'finetuning_type': 'lora', 'use_llama_pro': False, 'use_adam_mini': False, 'freeze_vision_tower': True, 'freeze_multi_modal_projector': True, 'train_mm_proj_only': False, 'compute_accuracy': False, 'disable_shuffling': False, 'plot_loss': True, 'include_effective_tokens_per_second': False}, 'generating_args': {'do_sample': True, 'temperature': 0.95, 'top_p': 0.7, 'top_k': 50, 'num_beams': 1, 'max_new_tokens': 1024, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'default_system': None, 'skip_special_tokens': True}} diff --git a/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/run-9ct1o6yk.wandb b/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/run-9ct1o6yk.wandb new file mode 100644 index 0000000000000000000000000000000000000000..d8b7a9102559510301849524337c0cfceec8c674 --- /dev/null +++ b/LLaMA-Factory/wandb/run-20250305_233246-9ct1o6yk/run-9ct1o6yk.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d15e4d751db20edfc626a3e0507eb5fa31644b527dcba46b8fc0e27df3f6c60 +size 2818048 diff --git a/adapter_config.json b/adapter_config.json index 8bb2ca5a8247f01c1e2665dec0a7c3e554821937..82d459588f1ef8214ae9529c8f21202ae72f09ad 100644 --- a/adapter_config.json +++ b/adapter_config.json @@ -20,12 +20,12 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "o_proj", - "k_proj", - "down_proj", - "gate_proj", "q_proj", "v_proj", + "gate_proj", + "k_proj", + "down_proj", + "o_proj", "up_proj" ], "task_type": "CAUSAL_LM", diff --git a/trainer_log.jsonl b/trainer_log.jsonl index 397b00d758764abd7d09210b865908ec197cb15e..56a708a78156929be6454302f32426e34c9dd9bc 100644 --- a/trainer_log.jsonl +++ b/trainer_log.jsonl @@ -1,57 +1,55 @@ -{"current_steps": 10, "total_steps": 4197, "loss": 1.0705, "lr": 2.3809523809523808e-06, "epoch": 0.007145409074669525, "percentage": 0.24, "elapsed_time": "0:01:27", "remaining_time": "10:13:15"} -{"current_steps": 20, "total_steps": 4197, "loss": 1.0799, "lr": 4.7619047619047615e-06, "epoch": 0.01429081814933905, "percentage": 0.48, "elapsed_time": "0:02:51", "remaining_time": "9:58:03"} -{"current_steps": 10, "total_steps": 4197, "loss": 1.0969, "lr": 2.3809523809523808e-06, "epoch": 0.007145409074669525, "percentage": 0.24, "elapsed_time": "0:01:27", "remaining_time": "10:09:38"} -{"current_steps": 20, "total_steps": 4197, "loss": 1.0795, "lr": 4.7619047619047615e-06, "epoch": 0.01429081814933905, "percentage": 0.48, "elapsed_time": "0:02:51", "remaining_time": "9:56:18"} -{"current_steps": 30, "total_steps": 4197, "loss": 0.7536, "lr": 7.142857142857143e-06, "epoch": 0.021436227224008574, "percentage": 0.71, "elapsed_time": "0:04:13", "remaining_time": "9:47:33"} -{"current_steps": 40, "total_steps": 4197, "loss": 0.5564, "lr": 9.523809523809523e-06, "epoch": 0.0285816362986781, "percentage": 0.95, "elapsed_time": "0:05:43", "remaining_time": "9:54:10"} +{"current_steps": 10, "total_steps": 4197, "loss": 1.0969, "lr": 2.3809523809523808e-06, "epoch": 0.007145409074669525, "percentage": 0.24, "elapsed_time": "0:01:27", "remaining_time": "10:10:31"} +{"current_steps": 20, "total_steps": 4197, "loss": 1.0795, "lr": 4.7619047619047615e-06, "epoch": 0.01429081814933905, "percentage": 0.48, "elapsed_time": "0:02:51", "remaining_time": "9:56:35"} +{"current_steps": 30, "total_steps": 4197, "loss": 0.7536, "lr": 7.142857142857143e-06, "epoch": 0.021436227224008574, "percentage": 0.71, "elapsed_time": "0:04:13", "remaining_time": "9:47:42"} +{"current_steps": 40, "total_steps": 4197, "loss": 0.5564, "lr": 9.523809523809523e-06, "epoch": 0.0285816362986781, "percentage": 0.95, "elapsed_time": "0:05:43", "remaining_time": "9:54:12"} {"current_steps": 50, "total_steps": 4197, "loss": 0.623, "lr": 1.1904761904761905e-05, "epoch": 0.03572704537334762, "percentage": 1.19, "elapsed_time": "0:07:04", "remaining_time": "9:47:13"} {"current_steps": 60, "total_steps": 4197, "loss": 0.5322, "lr": 1.4285714285714285e-05, "epoch": 0.04287245444801715, "percentage": 1.43, "elapsed_time": "0:08:33", "remaining_time": "9:50:11"} -{"current_steps": 70, "total_steps": 4197, "loss": 0.5102, "lr": 1.6666666666666667e-05, "epoch": 0.050017863522686674, "percentage": 1.67, "elapsed_time": "0:09:47", "remaining_time": "9:37:05"} -{"current_steps": 80, "total_steps": 4197, "loss": 0.568, "lr": 1.9047619047619046e-05, "epoch": 0.0571632725973562, "percentage": 1.91, "elapsed_time": "0:11:03", "remaining_time": "9:29:24"} -{"current_steps": 90, "total_steps": 4197, "loss": 0.4883, "lr": 2.1428571428571428e-05, "epoch": 0.06430868167202572, "percentage": 2.14, "elapsed_time": "0:12:37", "remaining_time": "9:36:06"} -{"current_steps": 100, "total_steps": 4197, "loss": 0.417, "lr": 2.380952380952381e-05, "epoch": 0.07145409074669525, "percentage": 2.38, "elapsed_time": "0:13:56", "remaining_time": "9:30:57"} -{"current_steps": 100, "total_steps": 4197, "epoch": 0.07145409074669525, "percentage": 2.38, "elapsed_time": "0:30:38", "remaining_time": "20:55:39"} -{"current_steps": 110, "total_steps": 4197, "loss": 0.4595, "lr": 2.6190476190476192e-05, "epoch": 0.07859949982136477, "percentage": 2.62, "elapsed_time": "0:32:02", "remaining_time": "19:50:22"} -{"current_steps": 120, "total_steps": 4197, "loss": 0.4658, "lr": 2.857142857142857e-05, "epoch": 0.0857449088960343, "percentage": 2.86, "elapsed_time": "0:33:27", "remaining_time": "18:56:28"} -{"current_steps": 130, "total_steps": 4197, "loss": 0.4122, "lr": 3.095238095238095e-05, "epoch": 0.09289031797070382, "percentage": 3.1, "elapsed_time": "0:34:49", "remaining_time": "18:09:30"} -{"current_steps": 140, "total_steps": 4197, "loss": 0.4878, "lr": 3.3333333333333335e-05, "epoch": 0.10003572704537335, "percentage": 3.34, "elapsed_time": "0:36:23", "remaining_time": "17:34:43"} -{"current_steps": 150, "total_steps": 4197, "loss": 0.4168, "lr": 3.571428571428572e-05, "epoch": 0.10718113612004287, "percentage": 3.57, "elapsed_time": "0:37:52", "remaining_time": "17:01:54"} -{"current_steps": 160, "total_steps": 4197, "loss": 0.4298, "lr": 3.809523809523809e-05, "epoch": 0.1143265451947124, "percentage": 3.81, "elapsed_time": "0:39:13", "remaining_time": "16:29:38"} -{"current_steps": 170, "total_steps": 4197, "loss": 0.4413, "lr": 4.047619047619048e-05, "epoch": 0.12147195426938193, "percentage": 4.05, "elapsed_time": "0:40:31", "remaining_time": "16:00:05"} -{"current_steps": 180, "total_steps": 4197, "loss": 0.3734, "lr": 4.2857142857142856e-05, "epoch": 0.12861736334405144, "percentage": 4.29, "elapsed_time": "0:41:54", "remaining_time": "15:35:14"} -{"current_steps": 190, "total_steps": 4197, "loss": 0.4015, "lr": 4.523809523809524e-05, "epoch": 0.13576277241872098, "percentage": 4.53, "elapsed_time": "0:43:22", "remaining_time": "15:14:40"} -{"current_steps": 200, "total_steps": 4197, "loss": 0.4411, "lr": 4.761904761904762e-05, "epoch": 0.1429081814933905, "percentage": 4.77, "elapsed_time": "0:44:43", "remaining_time": "14:53:42"} -{"current_steps": 200, "total_steps": 4197, "epoch": 0.1429081814933905, "percentage": 4.77, "elapsed_time": "1:01:26", "remaining_time": "20:27:53"} -{"current_steps": 210, "total_steps": 4197, "loss": 0.3697, "lr": 5e-05, "epoch": 0.15005359056806003, "percentage": 5.0, "elapsed_time": "1:02:58", "remaining_time": "19:55:36"} -{"current_steps": 220, "total_steps": 4197, "loss": 0.4076, "lr": 5.2380952380952384e-05, "epoch": 0.15719899964272954, "percentage": 5.24, "elapsed_time": "1:04:20", "remaining_time": "19:23:08"} -{"current_steps": 230, "total_steps": 4197, "loss": 0.3307, "lr": 5.4761904761904766e-05, "epoch": 0.16434440871739908, "percentage": 5.48, "elapsed_time": "1:05:48", "remaining_time": "18:54:57"} -{"current_steps": 240, "total_steps": 4197, "loss": 0.4227, "lr": 5.714285714285714e-05, "epoch": 0.1714898177920686, "percentage": 5.72, "elapsed_time": "1:07:07", "remaining_time": "18:26:37"} -{"current_steps": 250, "total_steps": 4197, "loss": 0.4689, "lr": 5.9523809523809524e-05, "epoch": 0.17863522686673813, "percentage": 5.96, "elapsed_time": "1:08:29", "remaining_time": "18:01:26"} -{"current_steps": 260, "total_steps": 4197, "loss": 0.3618, "lr": 6.19047619047619e-05, "epoch": 0.18578063594140765, "percentage": 6.19, "elapsed_time": "1:09:54", "remaining_time": "17:38:32"} -{"current_steps": 270, "total_steps": 4197, "loss": 0.4668, "lr": 6.428571428571429e-05, "epoch": 0.19292604501607716, "percentage": 6.43, "elapsed_time": "1:11:19", "remaining_time": "17:17:23"} -{"current_steps": 280, "total_steps": 4197, "loss": 0.3525, "lr": 6.666666666666667e-05, "epoch": 0.2000714540907467, "percentage": 6.67, "elapsed_time": "1:12:36", "remaining_time": "16:55:42"} -{"current_steps": 290, "total_steps": 4197, "loss": 0.3707, "lr": 6.904761904761905e-05, "epoch": 0.2072168631654162, "percentage": 6.91, "elapsed_time": "1:13:59", "remaining_time": "16:36:56"} -{"current_steps": 300, "total_steps": 4197, "loss": 0.4461, "lr": 7.142857142857143e-05, "epoch": 0.21436227224008575, "percentage": 7.15, "elapsed_time": "1:15:25", "remaining_time": "16:19:47"} -{"current_steps": 300, "total_steps": 4197, "epoch": 0.21436227224008575, "percentage": 7.15, "elapsed_time": "1:32:08", "remaining_time": "19:57:00"} -{"current_steps": 310, "total_steps": 4197, "loss": 0.3439, "lr": 7.380952380952382e-05, "epoch": 0.22150768131475526, "percentage": 7.39, "elapsed_time": "1:33:42", "remaining_time": "19:35:03"} -{"current_steps": 320, "total_steps": 4197, "loss": 0.4492, "lr": 7.619047619047618e-05, "epoch": 0.2286530903894248, "percentage": 7.62, "elapsed_time": "1:35:13", "remaining_time": "19:13:38"} -{"current_steps": 330, "total_steps": 4197, "loss": 0.3399, "lr": 7.857142857142858e-05, "epoch": 0.2357984994640943, "percentage": 7.86, "elapsed_time": "1:36:31", "remaining_time": "18:51:03"} -{"current_steps": 340, "total_steps": 4197, "loss": 0.4336, "lr": 8.095238095238096e-05, "epoch": 0.24294390853876385, "percentage": 8.1, "elapsed_time": "1:38:12", "remaining_time": "18:34:04"} -{"current_steps": 350, "total_steps": 4197, "loss": 0.3628, "lr": 8.333333333333334e-05, "epoch": 0.2500893176134334, "percentage": 8.34, "elapsed_time": "1:39:47", "remaining_time": "18:16:50"} -{"current_steps": 360, "total_steps": 4197, "loss": 0.4113, "lr": 8.571428571428571e-05, "epoch": 0.2572347266881029, "percentage": 8.58, "elapsed_time": "1:41:20", "remaining_time": "18:00:10"} -{"current_steps": 370, "total_steps": 4197, "loss": 0.4009, "lr": 8.80952380952381e-05, "epoch": 0.2643801357627724, "percentage": 8.82, "elapsed_time": "1:43:02", "remaining_time": "17:45:51"} -{"current_steps": 380, "total_steps": 4197, "loss": 0.3399, "lr": 9.047619047619048e-05, "epoch": 0.27152554483744196, "percentage": 9.05, "elapsed_time": "1:44:30", "remaining_time": "17:29:44"} -{"current_steps": 390, "total_steps": 4197, "loss": 0.326, "lr": 9.285714285714286e-05, "epoch": 0.27867095391211144, "percentage": 9.29, "elapsed_time": "1:45:58", "remaining_time": "17:14:26"} -{"current_steps": 400, "total_steps": 4197, "loss": 0.4228, "lr": 9.523809523809524e-05, "epoch": 0.285816362986781, "percentage": 9.53, "elapsed_time": "1:47:22", "remaining_time": "16:59:16"} -{"current_steps": 400, "total_steps": 4197, "epoch": 0.285816362986781, "percentage": 9.53, "elapsed_time": "2:04:06", "remaining_time": "19:38:01"} -{"current_steps": 410, "total_steps": 4197, "loss": 0.3184, "lr": 9.761904761904762e-05, "epoch": 0.2929617720614505, "percentage": 9.77, "elapsed_time": "2:05:32", "remaining_time": "19:19:34"} -{"current_steps": 420, "total_steps": 4197, "loss": 0.473, "lr": 0.0001, "epoch": 0.30010718113612006, "percentage": 10.01, "elapsed_time": "2:07:12", "remaining_time": "19:03:55"} -{"current_steps": 430, "total_steps": 4197, "loss": 0.392, "lr": 9.99982704095424e-05, "epoch": 0.30725259021078954, "percentage": 10.25, "elapsed_time": "2:08:47", "remaining_time": "18:48:20"} -{"current_steps": 440, "total_steps": 4197, "loss": 0.3824, "lr": 9.999308175782893e-05, "epoch": 0.3143979992854591, "percentage": 10.48, "elapsed_time": "2:10:04", "remaining_time": "18:30:40"} -{"current_steps": 450, "total_steps": 4197, "loss": 0.4001, "lr": 9.998443440382927e-05, "epoch": 0.3215434083601286, "percentage": 10.72, "elapsed_time": "2:11:22", "remaining_time": "18:13:54"} -{"current_steps": 460, "total_steps": 4197, "loss": 0.4144, "lr": 9.997232894579868e-05, "epoch": 0.32868881743479816, "percentage": 10.96, "elapsed_time": "2:12:55", "remaining_time": "17:59:52"} -{"current_steps": 470, "total_steps": 4197, "loss": 0.3094, "lr": 9.995676622123655e-05, "epoch": 0.33583422650946765, "percentage": 11.2, "elapsed_time": "2:14:23", "remaining_time": "17:45:42"} -{"current_steps": 480, "total_steps": 4197, "loss": 0.2966, "lr": 9.993774730682845e-05, "epoch": 0.3429796355841372, "percentage": 11.44, "elapsed_time": "2:15:43", "remaining_time": "17:31:03"} -{"current_steps": 490, "total_steps": 4197, "loss": 0.3274, "lr": 9.991527351837174e-05, "epoch": 0.35012504465880673, "percentage": 11.68, "elapsed_time": "2:17:07", "remaining_time": "17:17:26"} -{"current_steps": 500, "total_steps": 4197, "loss": 0.4301, "lr": 9.988934641068436e-05, "epoch": 0.35727045373347627, "percentage": 11.91, "elapsed_time": "2:18:45", "remaining_time": "17:05:56"} -{"current_steps": 500, "total_steps": 4197, "epoch": 0.35727045373347627, "percentage": 11.91, "elapsed_time": "2:35:28", "remaining_time": "19:09:36"} +{"current_steps": 70, "total_steps": 4197, "loss": 0.5102, "lr": 1.6666666666666667e-05, "epoch": 0.050017863522686674, "percentage": 1.67, "elapsed_time": "0:09:47", "remaining_time": "9:37:03"} +{"current_steps": 80, "total_steps": 4197, "loss": 0.568, "lr": 1.9047619047619046e-05, "epoch": 0.0571632725973562, "percentage": 1.91, "elapsed_time": "0:11:03", "remaining_time": "9:29:20"} +{"current_steps": 90, "total_steps": 4197, "loss": 0.4883, "lr": 2.1428571428571428e-05, "epoch": 0.06430868167202572, "percentage": 2.14, "elapsed_time": "0:12:37", "remaining_time": "9:36:03"} +{"current_steps": 100, "total_steps": 4197, "loss": 0.417, "lr": 2.380952380952381e-05, "epoch": 0.07145409074669525, "percentage": 2.38, "elapsed_time": "0:13:56", "remaining_time": "9:30:54"} +{"current_steps": 100, "total_steps": 4197, "epoch": 0.07145409074669525, "percentage": 2.38, "elapsed_time": "0:30:38", "remaining_time": "20:55:03"} +{"current_steps": 110, "total_steps": 4197, "loss": 0.4595, "lr": 2.6190476190476192e-05, "epoch": 0.07859949982136477, "percentage": 2.62, "elapsed_time": "0:32:01", "remaining_time": "19:49:48"} +{"current_steps": 120, "total_steps": 4197, "loss": 0.4658, "lr": 2.857142857142857e-05, "epoch": 0.0857449088960343, "percentage": 2.86, "elapsed_time": "0:33:26", "remaining_time": "18:55:57"} +{"current_steps": 130, "total_steps": 4197, "loss": 0.4122, "lr": 3.095238095238095e-05, "epoch": 0.09289031797070382, "percentage": 3.1, "elapsed_time": "0:34:48", "remaining_time": "18:09:01"} +{"current_steps": 140, "total_steps": 4197, "loss": 0.4878, "lr": 3.3333333333333335e-05, "epoch": 0.10003572704537335, "percentage": 3.34, "elapsed_time": "0:36:22", "remaining_time": "17:34:15"} +{"current_steps": 150, "total_steps": 4197, "loss": 0.4168, "lr": 3.571428571428572e-05, "epoch": 0.10718113612004287, "percentage": 3.57, "elapsed_time": "0:37:51", "remaining_time": "17:01:28"} +{"current_steps": 160, "total_steps": 4197, "loss": 0.4298, "lr": 3.809523809523809e-05, "epoch": 0.1143265451947124, "percentage": 3.81, "elapsed_time": "0:39:12", "remaining_time": "16:29:12"} +{"current_steps": 170, "total_steps": 4197, "loss": 0.4413, "lr": 4.047619047619048e-05, "epoch": 0.12147195426938193, "percentage": 4.05, "elapsed_time": "0:40:30", "remaining_time": "15:59:39"} +{"current_steps": 180, "total_steps": 4197, "loss": 0.3734, "lr": 4.2857142857142856e-05, "epoch": 0.12861736334405144, "percentage": 4.29, "elapsed_time": "0:41:53", "remaining_time": "15:34:49"} +{"current_steps": 190, "total_steps": 4197, "loss": 0.4015, "lr": 4.523809523809524e-05, "epoch": 0.13576277241872098, "percentage": 4.53, "elapsed_time": "0:43:21", "remaining_time": "15:14:15"} +{"current_steps": 200, "total_steps": 4197, "loss": 0.4411, "lr": 4.761904761904762e-05, "epoch": 0.1429081814933905, "percentage": 4.77, "elapsed_time": "0:44:41", "remaining_time": "14:53:17"} +{"current_steps": 200, "total_steps": 4197, "epoch": 0.1429081814933905, "percentage": 4.77, "elapsed_time": "1:01:24", "remaining_time": "20:27:05"} +{"current_steps": 210, "total_steps": 4197, "loss": 0.3697, "lr": 5e-05, "epoch": 0.15005359056806003, "percentage": 5.0, "elapsed_time": "1:02:56", "remaining_time": "19:54:50"} +{"current_steps": 220, "total_steps": 4197, "loss": 0.4076, "lr": 5.2380952380952384e-05, "epoch": 0.15719899964272954, "percentage": 5.24, "elapsed_time": "1:04:18", "remaining_time": "19:22:24"} +{"current_steps": 230, "total_steps": 4197, "loss": 0.3307, "lr": 5.4761904761904766e-05, "epoch": 0.16434440871739908, "percentage": 5.48, "elapsed_time": "1:05:45", "remaining_time": "18:54:14"} +{"current_steps": 240, "total_steps": 4197, "loss": 0.4227, "lr": 5.714285714285714e-05, "epoch": 0.1714898177920686, "percentage": 5.72, "elapsed_time": "1:07:04", "remaining_time": "18:25:56"} +{"current_steps": 250, "total_steps": 4197, "loss": 0.4689, "lr": 5.9523809523809524e-05, "epoch": 0.17863522686673813, "percentage": 5.96, "elapsed_time": "1:08:27", "remaining_time": "18:00:46"} +{"current_steps": 260, "total_steps": 4197, "loss": 0.3618, "lr": 6.19047619047619e-05, "epoch": 0.18578063594140765, "percentage": 6.19, "elapsed_time": "1:09:51", "remaining_time": "17:37:54"} +{"current_steps": 270, "total_steps": 4197, "loss": 0.4668, "lr": 6.428571428571429e-05, "epoch": 0.19292604501607716, "percentage": 6.43, "elapsed_time": "1:11:17", "remaining_time": "17:16:47"} +{"current_steps": 280, "total_steps": 4197, "loss": 0.3525, "lr": 6.666666666666667e-05, "epoch": 0.2000714540907467, "percentage": 6.67, "elapsed_time": "1:12:33", "remaining_time": "16:55:07"} +{"current_steps": 290, "total_steps": 4197, "loss": 0.3707, "lr": 6.904761904761905e-05, "epoch": 0.2072168631654162, "percentage": 6.91, "elapsed_time": "1:13:57", "remaining_time": "16:36:22"} +{"current_steps": 300, "total_steps": 4197, "loss": 0.4461, "lr": 7.142857142857143e-05, "epoch": 0.21436227224008575, "percentage": 7.15, "elapsed_time": "1:15:23", "remaining_time": "16:19:14"} +{"current_steps": 300, "total_steps": 4197, "epoch": 0.21436227224008575, "percentage": 7.15, "elapsed_time": "1:32:05", "remaining_time": "19:56:17"} +{"current_steps": 310, "total_steps": 4197, "loss": 0.3439, "lr": 7.380952380952382e-05, "epoch": 0.22150768131475526, "percentage": 7.39, "elapsed_time": "1:33:39", "remaining_time": "19:34:22"} +{"current_steps": 320, "total_steps": 4197, "loss": 0.4492, "lr": 7.619047619047618e-05, "epoch": 0.2286530903894248, "percentage": 7.62, "elapsed_time": "1:35:09", "remaining_time": "19:12:56"} +{"current_steps": 330, "total_steps": 4197, "loss": 0.3399, "lr": 7.857142857142858e-05, "epoch": 0.2357984994640943, "percentage": 7.86, "elapsed_time": "1:36:27", "remaining_time": "18:50:23"} +{"current_steps": 340, "total_steps": 4197, "loss": 0.4336, "lr": 8.095238095238096e-05, "epoch": 0.24294390853876385, "percentage": 8.1, "elapsed_time": "1:38:08", "remaining_time": "18:33:24"} +{"current_steps": 350, "total_steps": 4197, "loss": 0.3628, "lr": 8.333333333333334e-05, "epoch": 0.2500893176134334, "percentage": 8.34, "elapsed_time": "1:39:43", "remaining_time": "18:16:11"} +{"current_steps": 360, "total_steps": 4197, "loss": 0.4113, "lr": 8.571428571428571e-05, "epoch": 0.2572347266881029, "percentage": 8.58, "elapsed_time": "1:41:17", "remaining_time": "17:59:31"} +{"current_steps": 370, "total_steps": 4197, "loss": 0.4009, "lr": 8.80952380952381e-05, "epoch": 0.2643801357627724, "percentage": 8.82, "elapsed_time": "1:42:59", "remaining_time": "17:45:13"} +{"current_steps": 380, "total_steps": 4197, "loss": 0.3399, "lr": 9.047619047619048e-05, "epoch": 0.27152554483744196, "percentage": 9.05, "elapsed_time": "1:44:26", "remaining_time": "17:29:07"} +{"current_steps": 390, "total_steps": 4197, "loss": 0.326, "lr": 9.285714285714286e-05, "epoch": 0.27867095391211144, "percentage": 9.29, "elapsed_time": "1:45:54", "remaining_time": "17:13:49"} +{"current_steps": 400, "total_steps": 4197, "loss": 0.4228, "lr": 9.523809523809524e-05, "epoch": 0.285816362986781, "percentage": 9.53, "elapsed_time": "1:47:18", "remaining_time": "16:58:40"} +{"current_steps": 400, "total_steps": 4197, "epoch": 0.285816362986781, "percentage": 9.53, "elapsed_time": "2:04:01", "remaining_time": "19:37:20"} +{"current_steps": 410, "total_steps": 4197, "loss": 0.3184, "lr": 9.761904761904762e-05, "epoch": 0.2929617720614505, "percentage": 9.77, "elapsed_time": "2:05:28", "remaining_time": "19:18:54"} +{"current_steps": 420, "total_steps": 4197, "loss": 0.473, "lr": 0.0001, "epoch": 0.30010718113612006, "percentage": 10.01, "elapsed_time": "2:07:07", "remaining_time": "19:03:16"} +{"current_steps": 430, "total_steps": 4197, "loss": 0.392, "lr": 9.99982704095424e-05, "epoch": 0.30725259021078954, "percentage": 10.25, "elapsed_time": "2:08:43", "remaining_time": "18:47:42"} +{"current_steps": 440, "total_steps": 4197, "loss": 0.3824, "lr": 9.999308175782893e-05, "epoch": 0.3143979992854591, "percentage": 10.48, "elapsed_time": "2:10:00", "remaining_time": "18:30:02"} +{"current_steps": 450, "total_steps": 4197, "loss": 0.4001, "lr": 9.998443440382927e-05, "epoch": 0.3215434083601286, "percentage": 10.72, "elapsed_time": "2:11:17", "remaining_time": "18:13:17"} +{"current_steps": 460, "total_steps": 4197, "loss": 0.4144, "lr": 9.997232894579868e-05, "epoch": 0.32868881743479816, "percentage": 10.96, "elapsed_time": "2:12:50", "remaining_time": "17:59:15"} +{"current_steps": 470, "total_steps": 4197, "loss": 0.3094, "lr": 9.995676622123655e-05, "epoch": 0.33583422650946765, "percentage": 11.2, "elapsed_time": "2:14:19", "remaining_time": "17:45:06"} +{"current_steps": 480, "total_steps": 4197, "loss": 0.2966, "lr": 9.993774730682845e-05, "epoch": 0.3429796355841372, "percentage": 11.44, "elapsed_time": "2:15:39", "remaining_time": "17:30:28"} +{"current_steps": 490, "total_steps": 4197, "loss": 0.3274, "lr": 9.991527351837174e-05, "epoch": 0.35012504465880673, "percentage": 11.68, "elapsed_time": "2:17:03", "remaining_time": "17:16:51"} +{"current_steps": 500, "total_steps": 4197, "loss": 0.4301, "lr": 9.988934641068436e-05, "epoch": 0.35727045373347627, "percentage": 11.91, "elapsed_time": "2:18:40", "remaining_time": "17:05:21"} +{"current_steps": 500, "total_steps": 4197, "epoch": 0.35727045373347627, "percentage": 11.91, "elapsed_time": "2:35:23", "remaining_time": "19:08:56"} diff --git a/training_args.bin b/training_args.bin index 1f3ccd8f62a891e21c1c081d4a259bef677031db..164046c2da02585c19e1a5c44e00c9633feafc2f 100644 --- a/training_args.bin +++ b/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2782bf8a609c154eefb42753d138dafcba3aa0ea92b9d87f5184a3cbc4f0f0be +oid sha256:367406c6dccce148fee91e86e6fa08ac415e3c6a88231b8f42f8d49575bf98f6 size 5624