diff --git a/low-shot-task-specific-100-ex/coin_flip/best_model/adapter_config.json b/low-shot-task-specific-100-ex/coin_flip/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific-100-ex/coin_flip/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific-100-ex/coin_flip/best_model/adapter_model.bin b/low-shot-task-specific-100-ex/coin_flip/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..43a34bff6604f26cbe9799377031a3f351b7bf22 --- /dev/null +++ b/low-shot-task-specific-100-ex/coin_flip/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd47cb9e4d59e5592dc86a4825506634b4b81810e2c8cce8d4ee00b654660681 +size 104973389 diff --git a/low-shot-task-specific-100-ex/coin_flip/best_model/optimizer.pt b/low-shot-task-specific-100-ex/coin_flip/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f71ab05a043e24abe7409c59ecdc058feae959e2 --- /dev/null +++ b/low-shot-task-specific-100-ex/coin_flip/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89cb77b938fcbec8ef8fc937366b88a4eb9b3dda7246bcdf5841e8d1ff389eb7 +size 209984517 diff --git a/low-shot-task-specific-100-ex/coin_flip/best_model/rng_state.pth b/low-shot-task-specific-100-ex/coin_flip/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..80a4b24ac39d2e283b0d0adbe575e539e7cb8444 --- /dev/null +++ b/low-shot-task-specific-100-ex/coin_flip/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b683f78ced98226c01d86c04f25e6a3295aa86e50560601c917694a914d68aad +size 14575 diff --git a/low-shot-task-specific-100-ex/coin_flip/best_model/scheduler.pt b/low-shot-task-specific-100-ex/coin_flip/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..879d7ab75d8479e9604f08d5ff328f1b267075dc --- /dev/null +++ b/low-shot-task-specific-100-ex/coin_flip/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb69dbf6f829f506b60594816da07bc6940e3d9adce52ab976bf73b294ac5127 +size 627 diff --git a/low-shot-task-specific-100-ex/coin_flip/best_model/trainer_state.json b/low-shot-task-specific-100-ex/coin_flip/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b8bacf4cac67d85ed54a334a42d91410fb2beef5 --- /dev/null +++ b/low-shot-task-specific-100-ex/coin_flip/best_model/trainer_state.json @@ -0,0 +1,95 @@ +{ + "best_metric": 0.7364377379417419, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/coin_flip/checkpoint-20", + "epoch": 8.0, + "eval_steps": 500, + "global_step": 20, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.8, + "eval_loss": 4.770002365112305, + "eval_runtime": 0.7512, + "eval_samples_per_second": 26.626, + "eval_steps_per_second": 3.994, + "step": 2 + }, + { + "epoch": 2.0, + "eval_loss": 4.635828971862793, + "eval_runtime": 0.7546, + "eval_samples_per_second": 26.505, + "eval_steps_per_second": 3.976, + "step": 5 + }, + { + "epoch": 2.8, + "eval_loss": 4.354025363922119, + "eval_runtime": 0.7548, + "eval_samples_per_second": 26.499, + "eval_steps_per_second": 3.975, + "step": 7 + }, + { + "epoch": 4.0, + "learning_rate": 5.9999999999999995e-05, + "loss": 4.5475, + "step": 10 + }, + { + "epoch": 4.0, + "eval_loss": 3.8307082653045654, + "eval_runtime": 0.7548, + "eval_samples_per_second": 26.498, + "eval_steps_per_second": 3.975, + "step": 10 + }, + { + "epoch": 4.8, + "eval_loss": 3.6256070137023926, + "eval_runtime": 0.7542, + "eval_samples_per_second": 26.519, + "eval_steps_per_second": 3.978, + "step": 12 + }, + { + "epoch": 6.0, + "eval_loss": 2.8008601665496826, + "eval_runtime": 0.7532, + "eval_samples_per_second": 26.552, + "eval_steps_per_second": 3.983, + "step": 15 + }, + { + "epoch": 6.8, + "eval_loss": 1.8803138732910156, + "eval_runtime": 0.7625, + "eval_samples_per_second": 26.229, + "eval_steps_per_second": 3.934, + "step": 17 + }, + { + "epoch": 8.0, + "learning_rate": 0.00011399999999999999, + "loss": 2.7123, + "step": 20 + }, + { + "epoch": 8.0, + "eval_loss": 0.7364377379417419, + "eval_runtime": 0.7535, + "eval_samples_per_second": 26.542, + "eval_steps_per_second": 3.981, + "step": 20 + } + ], + "logging_steps": 10, + "max_steps": 20, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 2393497935544320.0, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific-100-ex/coin_flip/best_model/training_args.bin b/low-shot-task-specific-100-ex/coin_flip/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cca1d50ab2c5b9626b78953f59c1d21278960586 --- /dev/null +++ b/low-shot-task-specific-100-ex/coin_flip/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfd2f888c4bb95dd2b9338dcfeb755b0ed743f36bb3535194839914be97ca407 +size 4091 diff --git a/low-shot-task-specific-100-ex/cola/best_model/adapter_config.json b/low-shot-task-specific-100-ex/cola/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific-100-ex/cola/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific-100-ex/cola/best_model/adapter_model.bin b/low-shot-task-specific-100-ex/cola/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..f15e5e26d98e66d031af59fce1b51b20006f0b67 --- /dev/null +++ b/low-shot-task-specific-100-ex/cola/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1223c9b19186bca2d1f9a40d1afb7b9d667bb02a7b7fdf5a29875a3ba0ad2c8f +size 104973389 diff --git a/low-shot-task-specific-100-ex/cola/best_model/optimizer.pt b/low-shot-task-specific-100-ex/cola/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2bd8e3fbb9fc30c3147c92e091603ec921a195de --- /dev/null +++ b/low-shot-task-specific-100-ex/cola/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52d58812c7b440cb5008d9896abfb5be121a3021dd1ec3e407cf5ece524078cb +size 209984517 diff --git a/low-shot-task-specific-100-ex/cola/best_model/rng_state.pth b/low-shot-task-specific-100-ex/cola/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..411f311183b61678aeb511e12775b50c9f09b1bf --- /dev/null +++ b/low-shot-task-specific-100-ex/cola/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cfb4b1b5009a9b015311afd9778b0ef0a4a8bc389c04629a1d2e68a84aeea44 +size 14575 diff --git a/low-shot-task-specific-100-ex/cola/best_model/scheduler.pt b/low-shot-task-specific-100-ex/cola/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..10701e0231c43fd7fbee03df6bea6a9e4d381788 --- /dev/null +++ b/low-shot-task-specific-100-ex/cola/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b63e6b896793d947452e2a5278b1333dd1dcc71d94b37f92a47c6766156ba1e +size 627 diff --git a/low-shot-task-specific-100-ex/cola/best_model/trainer_state.json b/low-shot-task-specific-100-ex/cola/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a4b2df95bc9f3a5231487274377d693b7b50f521 --- /dev/null +++ b/low-shot-task-specific-100-ex/cola/best_model/trainer_state.json @@ -0,0 +1,95 @@ +{ + "best_metric": 3.2643978595733643, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/cola/checkpoint-20", + "epoch": 8.0, + "eval_steps": 500, + "global_step": 20, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.8, + "eval_loss": 7.496130466461182, + "eval_runtime": 0.651, + "eval_samples_per_second": 30.721, + "eval_steps_per_second": 4.608, + "step": 2 + }, + { + "epoch": 2.0, + "eval_loss": 7.389729976654053, + "eval_runtime": 0.6506, + "eval_samples_per_second": 30.74, + "eval_steps_per_second": 4.611, + "step": 5 + }, + { + "epoch": 2.8, + "eval_loss": 7.235182285308838, + "eval_runtime": 0.6495, + "eval_samples_per_second": 30.792, + "eval_steps_per_second": 4.619, + "step": 7 + }, + { + "epoch": 4.0, + "learning_rate": 5.9999999999999995e-05, + "loss": 7.3487, + "step": 10 + }, + { + "epoch": 4.0, + "eval_loss": 6.905265808105469, + "eval_runtime": 0.6482, + "eval_samples_per_second": 30.855, + "eval_steps_per_second": 4.628, + "step": 10 + }, + { + "epoch": 4.8, + "eval_loss": 6.568731784820557, + "eval_runtime": 0.6499, + "eval_samples_per_second": 30.774, + "eval_steps_per_second": 4.616, + "step": 12 + }, + { + "epoch": 6.0, + "eval_loss": 5.829730987548828, + "eval_runtime": 0.6495, + "eval_samples_per_second": 30.791, + "eval_steps_per_second": 4.619, + "step": 15 + }, + { + "epoch": 6.8, + "eval_loss": 5.088259220123291, + "eval_runtime": 0.6485, + "eval_samples_per_second": 30.84, + "eval_steps_per_second": 4.626, + "step": 17 + }, + { + "epoch": 8.0, + "learning_rate": 0.00011999999999999999, + "loss": 5.6641, + "step": 20 + }, + { + "epoch": 8.0, + "eval_loss": 3.2643978595733643, + "eval_runtime": 0.6478, + "eval_samples_per_second": 30.872, + "eval_steps_per_second": 4.631, + "step": 20 + } + ], + "logging_steps": 10, + "max_steps": 20, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 862945020149760.0, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific-100-ex/cola/best_model/training_args.bin b/low-shot-task-specific-100-ex/cola/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4a37896dcde66f6bf0b41321830abc936c5c42ec --- /dev/null +++ b/low-shot-task-specific-100-ex/cola/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7584dccd05bece2a9ec3f622ce5aa37500e33036a484978bd2e232bdb92a42bd +size 4091 diff --git a/low-shot-task-specific-100-ex/commonsense_qa/best_model/adapter_config.json b/low-shot-task-specific-100-ex/commonsense_qa/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific-100-ex/commonsense_qa/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific-100-ex/commonsense_qa/best_model/adapter_model.bin b/low-shot-task-specific-100-ex/commonsense_qa/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a4da00c0b315fec7a594473ba0df88a8a17210e4 --- /dev/null +++ b/low-shot-task-specific-100-ex/commonsense_qa/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f63d75b63e7b225c36c047b776e506742a0eb06a2cb34d6ac32cbc8faea89420 +size 104973389 diff --git a/low-shot-task-specific-100-ex/commonsense_qa/best_model/optimizer.pt b/low-shot-task-specific-100-ex/commonsense_qa/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..766f0661a1d679216d2d8ff1b310ed3035de7d12 --- /dev/null +++ b/low-shot-task-specific-100-ex/commonsense_qa/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fdd8a85dbed2d83977bbce5185352392812b7736e12294c276e5e9046d46672 +size 209984517 diff --git a/low-shot-task-specific-100-ex/commonsense_qa/best_model/rng_state.pth b/low-shot-task-specific-100-ex/commonsense_qa/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2cd9ca3bea976d1f3c3e6223b8d4dbc35042bcc9 --- /dev/null +++ b/low-shot-task-specific-100-ex/commonsense_qa/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e7d44b7a4cdc1eb16e6709c586b407ac6e8b61b77e11a4182c69dce6b3efbf4 +size 14575 diff --git a/low-shot-task-specific-100-ex/commonsense_qa/best_model/scheduler.pt b/low-shot-task-specific-100-ex/commonsense_qa/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..10701e0231c43fd7fbee03df6bea6a9e4d381788 --- /dev/null +++ b/low-shot-task-specific-100-ex/commonsense_qa/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b63e6b896793d947452e2a5278b1333dd1dcc71d94b37f92a47c6766156ba1e +size 627 diff --git a/low-shot-task-specific-100-ex/commonsense_qa/best_model/trainer_state.json b/low-shot-task-specific-100-ex/commonsense_qa/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2e9b1545afdbb64590d3888db8909a566c358f7a --- /dev/null +++ b/low-shot-task-specific-100-ex/commonsense_qa/best_model/trainer_state.json @@ -0,0 +1,95 @@ +{ + "best_metric": 1.0019607543945312, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/commonsense_qa/checkpoint-20", + "epoch": 8.0, + "eval_steps": 500, + "global_step": 20, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.8, + "eval_loss": 5.86135196685791, + "eval_runtime": 0.8193, + "eval_samples_per_second": 24.41, + "eval_steps_per_second": 3.662, + "step": 2 + }, + { + "epoch": 2.0, + "eval_loss": 5.789961814880371, + "eval_runtime": 0.8276, + "eval_samples_per_second": 24.167, + "eval_steps_per_second": 3.625, + "step": 5 + }, + { + "epoch": 2.8, + "eval_loss": 5.628936767578125, + "eval_runtime": 0.8224, + "eval_samples_per_second": 24.32, + "eval_steps_per_second": 3.648, + "step": 7 + }, + { + "epoch": 4.0, + "learning_rate": 5.9999999999999995e-05, + "loss": 5.5941, + "step": 10 + }, + { + "epoch": 4.0, + "eval_loss": 4.905825614929199, + "eval_runtime": 0.8285, + "eval_samples_per_second": 24.14, + "eval_steps_per_second": 3.621, + "step": 10 + }, + { + "epoch": 4.8, + "eval_loss": 4.051631927490234, + "eval_runtime": 0.8299, + "eval_samples_per_second": 24.099, + "eval_steps_per_second": 3.615, + "step": 12 + }, + { + "epoch": 6.0, + "eval_loss": 3.0537314414978027, + "eval_runtime": 0.8237, + "eval_samples_per_second": 24.28, + "eval_steps_per_second": 3.642, + "step": 15 + }, + { + "epoch": 6.8, + "eval_loss": 2.2741684913635254, + "eval_runtime": 0.8216, + "eval_samples_per_second": 24.344, + "eval_steps_per_second": 3.652, + "step": 17 + }, + { + "epoch": 8.0, + "learning_rate": 0.00011999999999999999, + "loss": 3.1561, + "step": 20 + }, + { + "epoch": 8.0, + "eval_loss": 1.0019607543945312, + "eval_runtime": 0.8235, + "eval_samples_per_second": 24.287, + "eval_steps_per_second": 3.643, + "step": 20 + } + ], + "logging_steps": 10, + "max_steps": 20, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 3078414183628800.0, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific-100-ex/commonsense_qa/best_model/training_args.bin b/low-shot-task-specific-100-ex/commonsense_qa/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..265b74d9fafc8f7f824bf4da4a2d7247f2888a72 --- /dev/null +++ b/low-shot-task-specific-100-ex/commonsense_qa/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcaa3c829539bb3746b5f7e0dafb15dbfccde57e3d5650c68eb42e3fbbacfa3e +size 4091 diff --git a/low-shot-task-specific-100-ex/emotion/best_model/adapter_config.json b/low-shot-task-specific-100-ex/emotion/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific-100-ex/emotion/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific-100-ex/emotion/best_model/adapter_model.bin b/low-shot-task-specific-100-ex/emotion/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..c013d8b3d8472cd3f790e9de0e3092238a10f4e3 --- /dev/null +++ b/low-shot-task-specific-100-ex/emotion/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23178e724f805ef456b15b82726bde7476bb7f85943ce21b94a72364d19f8459 +size 104973389 diff --git a/low-shot-task-specific-100-ex/emotion/best_model/optimizer.pt b/low-shot-task-specific-100-ex/emotion/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0131cbd4a3dc71d5fbad3200d6611eccd0cd1cc3 --- /dev/null +++ b/low-shot-task-specific-100-ex/emotion/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73979bacc3db61859edeb2672ed3234900273717c07987eb84d60c9ebb4e30cc +size 209984517 diff --git a/low-shot-task-specific-100-ex/emotion/best_model/rng_state.pth b/low-shot-task-specific-100-ex/emotion/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6069cccc74ba7a762067123d6dd43795e852a812 --- /dev/null +++ b/low-shot-task-specific-100-ex/emotion/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a608d60cb0a08df4efb2f7fa2cd8590824d7f4a718f101d397116dc4b5272878 +size 14575 diff --git a/low-shot-task-specific-100-ex/emotion/best_model/scheduler.pt b/low-shot-task-specific-100-ex/emotion/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..10701e0231c43fd7fbee03df6bea6a9e4d381788 --- /dev/null +++ b/low-shot-task-specific-100-ex/emotion/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b63e6b896793d947452e2a5278b1333dd1dcc71d94b37f92a47c6766156ba1e +size 627 diff --git a/low-shot-task-specific-100-ex/emotion/best_model/trainer_state.json b/low-shot-task-specific-100-ex/emotion/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3b9c1fbef0340dbd00e8dc48234d836ad4430c31 --- /dev/null +++ b/low-shot-task-specific-100-ex/emotion/best_model/trainer_state.json @@ -0,0 +1,95 @@ +{ + "best_metric": 3.7403335571289062, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/emotion/checkpoint-20", + "epoch": 8.0, + "eval_steps": 500, + "global_step": 20, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.8, + "eval_loss": 6.759609222412109, + "eval_runtime": 0.7242, + "eval_samples_per_second": 27.617, + "eval_steps_per_second": 4.143, + "step": 2 + }, + { + "epoch": 2.0, + "eval_loss": 6.701653957366943, + "eval_runtime": 0.7245, + "eval_samples_per_second": 27.606, + "eval_steps_per_second": 4.141, + "step": 5 + }, + { + "epoch": 2.8, + "eval_loss": 6.61182165145874, + "eval_runtime": 0.7269, + "eval_samples_per_second": 27.515, + "eval_steps_per_second": 4.127, + "step": 7 + }, + { + "epoch": 4.0, + "learning_rate": 5.9999999999999995e-05, + "loss": 6.5973, + "step": 10 + }, + { + "epoch": 4.0, + "eval_loss": 6.34386682510376, + "eval_runtime": 0.7251, + "eval_samples_per_second": 27.583, + "eval_steps_per_second": 4.137, + "step": 10 + }, + { + "epoch": 4.8, + "eval_loss": 5.928864479064941, + "eval_runtime": 0.7256, + "eval_samples_per_second": 27.565, + "eval_steps_per_second": 4.135, + "step": 12 + }, + { + "epoch": 6.0, + "eval_loss": 5.030377388000488, + "eval_runtime": 0.7248, + "eval_samples_per_second": 27.593, + "eval_steps_per_second": 4.139, + "step": 15 + }, + { + "epoch": 6.8, + "eval_loss": 4.649694442749023, + "eval_runtime": 0.7259, + "eval_samples_per_second": 27.554, + "eval_steps_per_second": 4.133, + "step": 17 + }, + { + "epoch": 8.0, + "learning_rate": 0.00011999999999999999, + "loss": 5.1116, + "step": 20 + }, + { + "epoch": 8.0, + "eval_loss": 3.7403335571289062, + "eval_runtime": 0.7246, + "eval_samples_per_second": 27.602, + "eval_steps_per_second": 4.14, + "step": 20 + } + ], + "logging_steps": 10, + "max_steps": 20, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 2252558490992640.0, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific-100-ex/emotion/best_model/training_args.bin b/low-shot-task-specific-100-ex/emotion/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e87b4c75f75da916f77116eb0307ea835da6d074 --- /dev/null +++ b/low-shot-task-specific-100-ex/emotion/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cec69ccd4746da9921e4026850479fcbb626150ed87fa80dc42db47d3486b211 +size 4091 diff --git a/low-shot-task-specific-100-ex/social_i_qa/best_model/adapter_config.json b/low-shot-task-specific-100-ex/social_i_qa/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific-100-ex/social_i_qa/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific-100-ex/social_i_qa/best_model/adapter_model.bin b/low-shot-task-specific-100-ex/social_i_qa/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..52344dc0bb0985fe987f51ee88dd4152fd3ab084 --- /dev/null +++ b/low-shot-task-specific-100-ex/social_i_qa/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef60f33031060f9266bfe61d170eb7c269e012245e37652a45d7c3131c8d3add +size 104973389 diff --git a/low-shot-task-specific-100-ex/social_i_qa/best_model/optimizer.pt b/low-shot-task-specific-100-ex/social_i_qa/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..494b1dd20be0861d0affb9f8875a792f9d811f1c --- /dev/null +++ b/low-shot-task-specific-100-ex/social_i_qa/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abbeb1912756da0877b31d08f01c2fd522bbb55dc819144a235dd9eae39560ec +size 209984517 diff --git a/low-shot-task-specific-100-ex/social_i_qa/best_model/rng_state.pth b/low-shot-task-specific-100-ex/social_i_qa/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..82786a327b2ffca4e7e788c6a2882ecaa836bf13 --- /dev/null +++ b/low-shot-task-specific-100-ex/social_i_qa/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ab0262a43d8b8116e7c1b8d394e85687822d3581f13149fbc20dd3f065e85a6 +size 14575 diff --git a/low-shot-task-specific-100-ex/social_i_qa/best_model/scheduler.pt b/low-shot-task-specific-100-ex/social_i_qa/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..10701e0231c43fd7fbee03df6bea6a9e4d381788 --- /dev/null +++ b/low-shot-task-specific-100-ex/social_i_qa/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b63e6b896793d947452e2a5278b1333dd1dcc71d94b37f92a47c6766156ba1e +size 627 diff --git a/low-shot-task-specific-100-ex/social_i_qa/best_model/trainer_state.json b/low-shot-task-specific-100-ex/social_i_qa/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bffe5fbe450f94a29147bbf72491e20ef61973e9 --- /dev/null +++ b/low-shot-task-specific-100-ex/social_i_qa/best_model/trainer_state.json @@ -0,0 +1,95 @@ +{ + "best_metric": 0.821982741355896, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/social_i_qa/checkpoint-20", + "epoch": 8.0, + "eval_steps": 500, + "global_step": 20, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.8, + "eval_loss": 6.487459659576416, + "eval_runtime": 0.8925, + "eval_samples_per_second": 22.408, + "eval_steps_per_second": 3.361, + "step": 2 + }, + { + "epoch": 2.0, + "eval_loss": 6.3755621910095215, + "eval_runtime": 0.897, + "eval_samples_per_second": 22.297, + "eval_steps_per_second": 3.345, + "step": 5 + }, + { + "epoch": 2.8, + "eval_loss": 6.16649866104126, + "eval_runtime": 0.8963, + "eval_samples_per_second": 22.314, + "eval_steps_per_second": 3.347, + "step": 7 + }, + { + "epoch": 4.0, + "learning_rate": 5.9999999999999995e-05, + "loss": 6.1511, + "step": 10 + }, + { + "epoch": 4.0, + "eval_loss": 5.644223690032959, + "eval_runtime": 0.8985, + "eval_samples_per_second": 22.259, + "eval_steps_per_second": 3.339, + "step": 10 + }, + { + "epoch": 4.8, + "eval_loss": 4.884535312652588, + "eval_runtime": 0.9023, + "eval_samples_per_second": 22.166, + "eval_steps_per_second": 3.325, + "step": 12 + }, + { + "epoch": 6.0, + "eval_loss": 3.0946044921875, + "eval_runtime": 0.8991, + "eval_samples_per_second": 22.245, + "eval_steps_per_second": 3.337, + "step": 15 + }, + { + "epoch": 6.8, + "eval_loss": 2.1555423736572266, + "eval_runtime": 0.9036, + "eval_samples_per_second": 22.133, + "eval_steps_per_second": 3.32, + "step": 17 + }, + { + "epoch": 8.0, + "learning_rate": 0.00011999999999999999, + "loss": 3.3011, + "step": 20 + }, + { + "epoch": 8.0, + "eval_loss": 0.821982741355896, + "eval_runtime": 0.8939, + "eval_samples_per_second": 22.373, + "eval_steps_per_second": 3.356, + "step": 20 + } + ], + "logging_steps": 10, + "max_steps": 20, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 3338039476224000.0, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific-100-ex/social_i_qa/best_model/training_args.bin b/low-shot-task-specific-100-ex/social_i_qa/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dd47b3dd7b4b1b9240e4c9bb5f73e480a555a266 --- /dev/null +++ b/low-shot-task-specific-100-ex/social_i_qa/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b54b052f5cac8f07391027a3b0c53c8bffff1028fc30595e86e27a07c6b887e +size 4091 diff --git a/low-shot-task-specific-100-ex/sst/best_model/adapter_config.json b/low-shot-task-specific-100-ex/sst/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific-100-ex/sst/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific-100-ex/sst/best_model/adapter_model.bin b/low-shot-task-specific-100-ex/sst/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..dc2e7d8a3b7651d93dfc40765b488c8706f670f8 --- /dev/null +++ b/low-shot-task-specific-100-ex/sst/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0ab51ab7c25129d35e69c6a5c9f19421507d7aea82814b2fbf3fbb9e27e0120 +size 104973389 diff --git a/low-shot-task-specific-100-ex/sst/best_model/optimizer.pt b/low-shot-task-specific-100-ex/sst/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae601c20ca349aeaac8ffc25b67793bbfc5c66c8 --- /dev/null +++ b/low-shot-task-specific-100-ex/sst/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75b2aab283007757d7da6dabd8a4d1a3e6a88f4f3b3c863b51c6af953abd76f6 +size 209984517 diff --git a/low-shot-task-specific-100-ex/sst/best_model/rng_state.pth b/low-shot-task-specific-100-ex/sst/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3a7c4ffda07384c11f19852f28450dcb1eebe30b --- /dev/null +++ b/low-shot-task-specific-100-ex/sst/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd5c668f408d63f497259a79124b33e953210b9fb047bdfd418e598fe5018820 +size 14575 diff --git a/low-shot-task-specific-100-ex/sst/best_model/scheduler.pt b/low-shot-task-specific-100-ex/sst/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..920b76ebb92522ccb9b9a6f0fea021ad71358e06 --- /dev/null +++ b/low-shot-task-specific-100-ex/sst/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daa948d8422c0ff5c84b04b3a278a49a8c2106994063cdc84b33b076944943d4 +size 627 diff --git a/low-shot-task-specific-100-ex/sst/best_model/trainer_state.json b/low-shot-task-specific-100-ex/sst/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..53314106524931b4dac2ac4cb000909c7e3466e9 --- /dev/null +++ b/low-shot-task-specific-100-ex/sst/best_model/trainer_state.json @@ -0,0 +1,95 @@ +{ + "best_metric": 4.71249532699585, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/sst/checkpoint-20", + "epoch": 8.0, + "eval_steps": 500, + "global_step": 20, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.8, + "eval_loss": 7.7774858474731445, + "eval_runtime": 0.6674, + "eval_samples_per_second": 29.965, + "eval_steps_per_second": 4.495, + "step": 2 + }, + { + "epoch": 2.0, + "eval_loss": 7.758080959320068, + "eval_runtime": 0.6648, + "eval_samples_per_second": 30.083, + "eval_steps_per_second": 4.512, + "step": 5 + }, + { + "epoch": 2.8, + "eval_loss": 7.7222137451171875, + "eval_runtime": 0.6677, + "eval_samples_per_second": 29.954, + "eval_steps_per_second": 4.493, + "step": 7 + }, + { + "epoch": 4.0, + "learning_rate": 4.2e-05, + "loss": 7.716, + "step": 10 + }, + { + "epoch": 4.0, + "eval_loss": 7.386101722717285, + "eval_runtime": 0.6691, + "eval_samples_per_second": 29.892, + "eval_steps_per_second": 4.484, + "step": 10 + }, + { + "epoch": 4.8, + "eval_loss": 7.176695346832275, + "eval_runtime": 0.6671, + "eval_samples_per_second": 29.982, + "eval_steps_per_second": 4.497, + "step": 12 + }, + { + "epoch": 6.0, + "eval_loss": 6.573421478271484, + "eval_runtime": 0.6678, + "eval_samples_per_second": 29.951, + "eval_steps_per_second": 4.493, + "step": 15 + }, + { + "epoch": 6.8, + "eval_loss": 6.066993236541748, + "eval_runtime": 0.6693, + "eval_samples_per_second": 29.881, + "eval_steps_per_second": 4.482, + "step": 17 + }, + { + "epoch": 8.0, + "learning_rate": 0.000102, + "loss": 6.5057, + "step": 20 + }, + { + "epoch": 8.0, + "eval_loss": 4.71249532699585, + "eval_runtime": 0.6679, + "eval_samples_per_second": 29.944, + "eval_steps_per_second": 4.492, + "step": 20 + } + ], + "logging_steps": 10, + "max_steps": 20, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1760506745978880.0, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific-100-ex/sst/best_model/training_args.bin b/low-shot-task-specific-100-ex/sst/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cf1ab3fc037dc8dc2c606c56b7f3e1e9fa0ff075 --- /dev/null +++ b/low-shot-task-specific-100-ex/sst/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82db56540ff893c3d36fa725b2ccaa3a282af73561fa81b1cd27f7673a28d02f +size 4091 diff --git a/low-shot-task-specific-100-ex/sum/best_model/adapter_config.json b/low-shot-task-specific-100-ex/sum/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific-100-ex/sum/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific-100-ex/sum/best_model/adapter_model.bin b/low-shot-task-specific-100-ex/sum/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..567056a0d5ef8311e0f4a4652ddf71405394affd --- /dev/null +++ b/low-shot-task-specific-100-ex/sum/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7cc16d00544954ce77b8cae15ccb1f3d592abfd2e8a630f9f4afd1fecf5a7be +size 104973389 diff --git a/low-shot-task-specific-100-ex/sum/best_model/optimizer.pt b/low-shot-task-specific-100-ex/sum/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..64a3a073ab327ab9ebc672cd8cab68b7901b6f2b --- /dev/null +++ b/low-shot-task-specific-100-ex/sum/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a11eba00d1ce3e5f0d03227932d4e26433f2f6b6a7c8dc1f7f157da65ae61a16 +size 209984517 diff --git a/low-shot-task-specific-100-ex/sum/best_model/rng_state.pth b/low-shot-task-specific-100-ex/sum/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..411f311183b61678aeb511e12775b50c9f09b1bf --- /dev/null +++ b/low-shot-task-specific-100-ex/sum/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cfb4b1b5009a9b015311afd9778b0ef0a4a8bc389c04629a1d2e68a84aeea44 +size 14575 diff --git a/low-shot-task-specific-100-ex/sum/best_model/scheduler.pt b/low-shot-task-specific-100-ex/sum/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..10701e0231c43fd7fbee03df6bea6a9e4d381788 --- /dev/null +++ b/low-shot-task-specific-100-ex/sum/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b63e6b896793d947452e2a5278b1333dd1dcc71d94b37f92a47c6766156ba1e +size 627 diff --git a/low-shot-task-specific-100-ex/sum/best_model/trainer_state.json b/low-shot-task-specific-100-ex/sum/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f505dad02db5ae5dcecca92c17803301bdb6dcfd --- /dev/null +++ b/low-shot-task-specific-100-ex/sum/best_model/trainer_state.json @@ -0,0 +1,95 @@ +{ + "best_metric": 1.7415841817855835, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/sum/checkpoint-20", + "epoch": 8.0, + "eval_steps": 500, + "global_step": 20, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.8, + "eval_loss": 3.6972098350524902, + "eval_runtime": 0.6541, + "eval_samples_per_second": 30.578, + "eval_steps_per_second": 4.587, + "step": 2 + }, + { + "epoch": 2.0, + "eval_loss": 3.5827407836914062, + "eval_runtime": 0.6516, + "eval_samples_per_second": 30.695, + "eval_steps_per_second": 4.604, + "step": 5 + }, + { + "epoch": 2.8, + "eval_loss": 3.435373306274414, + "eval_runtime": 0.6534, + "eval_samples_per_second": 30.611, + "eval_steps_per_second": 4.592, + "step": 7 + }, + { + "epoch": 4.0, + "learning_rate": 5.9999999999999995e-05, + "loss": 3.5457, + "step": 10 + }, + { + "epoch": 4.0, + "eval_loss": 3.144989013671875, + "eval_runtime": 0.6516, + "eval_samples_per_second": 30.692, + "eval_steps_per_second": 4.604, + "step": 10 + }, + { + "epoch": 4.8, + "eval_loss": 2.9391090869903564, + "eval_runtime": 0.6511, + "eval_samples_per_second": 30.718, + "eval_steps_per_second": 4.608, + "step": 12 + }, + { + "epoch": 6.0, + "eval_loss": 2.312290668487549, + "eval_runtime": 0.6505, + "eval_samples_per_second": 30.745, + "eval_steps_per_second": 4.612, + "step": 15 + }, + { + "epoch": 6.8, + "eval_loss": 1.938306450843811, + "eval_runtime": 0.6508, + "eval_samples_per_second": 30.732, + "eval_steps_per_second": 4.61, + "step": 17 + }, + { + "epoch": 8.0, + "learning_rate": 0.00011999999999999999, + "loss": 2.4305, + "step": 20 + }, + { + "epoch": 8.0, + "eval_loss": 1.7415841817855835, + "eval_runtime": 0.6535, + "eval_samples_per_second": 30.603, + "eval_steps_per_second": 4.59, + "step": 20 + } + ], + "logging_steps": 10, + "max_steps": 20, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1186858480435200.0, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific-100-ex/sum/best_model/training_args.bin b/low-shot-task-specific-100-ex/sum/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5f47ff9df2b2338467f66974fee13bb6446c709a --- /dev/null +++ b/low-shot-task-specific-100-ex/sum/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9b167b4b835edf4fa5e13863bdad1f68e733f002739a766fe1eb9a9eb6f48df +size 4091 diff --git a/low-shot-task-specific-100-ex/svamp/best_model/adapter_config.json b/low-shot-task-specific-100-ex/svamp/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific-100-ex/svamp/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific-100-ex/svamp/best_model/adapter_model.bin b/low-shot-task-specific-100-ex/svamp/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..de4d065bce814b54125559ba6465e6c94d71dc49 --- /dev/null +++ b/low-shot-task-specific-100-ex/svamp/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f29211dc7f547a0a871873375eefed004d258e206cecfee346d1c18ddb963d3 +size 104973389 diff --git a/low-shot-task-specific-100-ex/svamp/best_model/optimizer.pt b/low-shot-task-specific-100-ex/svamp/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2255368a029550268469229d48f8da09b16e27b8 --- /dev/null +++ b/low-shot-task-specific-100-ex/svamp/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2b859941698531e737b7ef66017873e12852369c66d25abd3373ec677259117 +size 209984517 diff --git a/low-shot-task-specific-100-ex/svamp/best_model/rng_state.pth b/low-shot-task-specific-100-ex/svamp/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..91b504fd87b8370f2678923974333a9a245291ee --- /dev/null +++ b/low-shot-task-specific-100-ex/svamp/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5077750d4ebc29a06666f871f496d7ff06c1cb14dbd8f8954de69896bf4426ab +size 14575 diff --git a/low-shot-task-specific-100-ex/svamp/best_model/scheduler.pt b/low-shot-task-specific-100-ex/svamp/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..10701e0231c43fd7fbee03df6bea6a9e4d381788 --- /dev/null +++ b/low-shot-task-specific-100-ex/svamp/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b63e6b896793d947452e2a5278b1333dd1dcc71d94b37f92a47c6766156ba1e +size 627 diff --git a/low-shot-task-specific-100-ex/svamp/best_model/trainer_state.json b/low-shot-task-specific-100-ex/svamp/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9e63ddf622281a1ebae94d6b0818fe08d6111dd4 --- /dev/null +++ b/low-shot-task-specific-100-ex/svamp/best_model/trainer_state.json @@ -0,0 +1,95 @@ +{ + "best_metric": 1.6565091609954834, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/svamp/checkpoint-20", + "epoch": 8.0, + "eval_steps": 500, + "global_step": 20, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.8, + "eval_loss": 5.190371036529541, + "eval_runtime": 0.8961, + "eval_samples_per_second": 22.319, + "eval_steps_per_second": 3.348, + "step": 2 + }, + { + "epoch": 2.0, + "eval_loss": 5.100682735443115, + "eval_runtime": 0.9032, + "eval_samples_per_second": 22.144, + "eval_steps_per_second": 3.322, + "step": 5 + }, + { + "epoch": 2.8, + "eval_loss": 4.914952278137207, + "eval_runtime": 0.9065, + "eval_samples_per_second": 22.062, + "eval_steps_per_second": 3.309, + "step": 7 + }, + { + "epoch": 4.0, + "learning_rate": 5.9999999999999995e-05, + "loss": 4.937, + "step": 10 + }, + { + "epoch": 4.0, + "eval_loss": 4.304265022277832, + "eval_runtime": 0.9048, + "eval_samples_per_second": 22.105, + "eval_steps_per_second": 3.316, + "step": 10 + }, + { + "epoch": 4.8, + "eval_loss": 3.561671733856201, + "eval_runtime": 0.9075, + "eval_samples_per_second": 22.04, + "eval_steps_per_second": 3.306, + "step": 12 + }, + { + "epoch": 6.0, + "eval_loss": 2.8418495655059814, + "eval_runtime": 0.9069, + "eval_samples_per_second": 22.052, + "eval_steps_per_second": 3.308, + "step": 15 + }, + { + "epoch": 6.8, + "eval_loss": 2.3915348052978516, + "eval_runtime": 0.9035, + "eval_samples_per_second": 22.136, + "eval_steps_per_second": 3.32, + "step": 17 + }, + { + "epoch": 8.0, + "learning_rate": 0.00011999999999999999, + "loss": 3.0207, + "step": 20 + }, + { + "epoch": 8.0, + "eval_loss": 1.6565091609954834, + "eval_runtime": 0.905, + "eval_samples_per_second": 22.099, + "eval_steps_per_second": 3.315, + "step": 20 + } + ], + "logging_steps": 10, + "max_steps": 20, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 3347929963560960.0, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific-100-ex/svamp/best_model/training_args.bin b/low-shot-task-specific-100-ex/svamp/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..785156c063423357186f06ea80e2ca6be6fce34d --- /dev/null +++ b/low-shot-task-specific-100-ex/svamp/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b132a2d5ba1c5592adec662b49a14662d09b6d2d689b790811b0c67535815c7d +size 4091 diff --git a/low-shot-task-specific-100-ex/word-sorting/best_model/adapter_config.json b/low-shot-task-specific-100-ex/word-sorting/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific-100-ex/word-sorting/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific-100-ex/word-sorting/best_model/adapter_model.bin b/low-shot-task-specific-100-ex/word-sorting/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..d05555687d58d80a9ed4c7765b75a028df73a1ab --- /dev/null +++ b/low-shot-task-specific-100-ex/word-sorting/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c3c5a2be24d9f05a68e2795dcb390c54ada897a16d601b0dd2b44cab21718f0 +size 104973389 diff --git a/low-shot-task-specific-100-ex/word-sorting/best_model/optimizer.pt b/low-shot-task-specific-100-ex/word-sorting/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0270bac01057e744fcc9121afc064d938b54e7b --- /dev/null +++ b/low-shot-task-specific-100-ex/word-sorting/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d4146bb863afab1cb22cde74559a41111b3d731420d88faada97d9deff22af9 +size 209984517 diff --git a/low-shot-task-specific-100-ex/word-sorting/best_model/rng_state.pth b/low-shot-task-specific-100-ex/word-sorting/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9b4d383cdfd8717456d5b802d5ec9ba07a37a9cd --- /dev/null +++ b/low-shot-task-specific-100-ex/word-sorting/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28c03a1d71c19dc35634d295a6d0b4ae49b617b7f63b6772fee61d4c6abab573 +size 14575 diff --git a/low-shot-task-specific-100-ex/word-sorting/best_model/scheduler.pt b/low-shot-task-specific-100-ex/word-sorting/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..10701e0231c43fd7fbee03df6bea6a9e4d381788 --- /dev/null +++ b/low-shot-task-specific-100-ex/word-sorting/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b63e6b896793d947452e2a5278b1333dd1dcc71d94b37f92a47c6766156ba1e +size 627 diff --git a/low-shot-task-specific-100-ex/word-sorting/best_model/trainer_state.json b/low-shot-task-specific-100-ex/word-sorting/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fd7bbbbd5e763361e260a3d8d594b2ceba8121ef --- /dev/null +++ b/low-shot-task-specific-100-ex/word-sorting/best_model/trainer_state.json @@ -0,0 +1,95 @@ +{ + "best_metric": 0.0560903362929821, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/word-sorting/checkpoint-20", + "epoch": 8.0, + "eval_steps": 500, + "global_step": 20, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.8, + "eval_loss": 0.3754672408103943, + "eval_runtime": 1.8736, + "eval_samples_per_second": 10.675, + "eval_steps_per_second": 1.601, + "step": 2 + }, + { + "epoch": 2.0, + "eval_loss": 0.35889530181884766, + "eval_runtime": 1.8792, + "eval_samples_per_second": 10.643, + "eval_steps_per_second": 1.596, + "step": 5 + }, + { + "epoch": 2.8, + "eval_loss": 0.3372827470302582, + "eval_runtime": 1.8739, + "eval_samples_per_second": 10.673, + "eval_steps_per_second": 1.601, + "step": 7 + }, + { + "epoch": 4.0, + "learning_rate": 5.9999999999999995e-05, + "loss": 0.3948, + "step": 10 + }, + { + "epoch": 4.0, + "eval_loss": 0.272649347782135, + "eval_runtime": 1.8766, + "eval_samples_per_second": 10.658, + "eval_steps_per_second": 1.599, + "step": 10 + }, + { + "epoch": 4.8, + "eval_loss": 0.2144305408000946, + "eval_runtime": 1.8768, + "eval_samples_per_second": 10.656, + "eval_steps_per_second": 1.598, + "step": 12 + }, + { + "epoch": 6.0, + "eval_loss": 0.11929650604724884, + "eval_runtime": 1.8692, + "eval_samples_per_second": 10.7, + "eval_steps_per_second": 1.605, + "step": 15 + }, + { + "epoch": 6.8, + "eval_loss": 0.06970790028572083, + "eval_runtime": 1.869, + "eval_samples_per_second": 10.701, + "eval_steps_per_second": 1.605, + "step": 17 + }, + { + "epoch": 8.0, + "learning_rate": 0.00011999999999999999, + "loss": 0.1551, + "step": 20 + }, + { + "epoch": 8.0, + "eval_loss": 0.0560903362929821, + "eval_runtime": 1.8656, + "eval_samples_per_second": 10.721, + "eval_steps_per_second": 1.608, + "step": 20 + } + ], + "logging_steps": 10, + "max_steps": 20, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 8184378271334400.0, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific-100-ex/word-sorting/best_model/training_args.bin b/low-shot-task-specific-100-ex/word-sorting/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d1bbfa1c279a135c632e653e5d383d86e036022f --- /dev/null +++ b/low-shot-task-specific-100-ex/word-sorting/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc2a8b2e2f9ddeafce47d8123a31984865c2c00940312ef44f0f086aa55882d8 +size 4091