Pingzhi Li
commited on
Commit
·
b7d5f24
1
Parent(s):
62f8325
init
Browse files- bigbench/checkpoints/nomic_instruction_embedding.pt +3 -0
- bigbench/events.out.tfevents.1726071103.unites3.cs.unc.edu.336166.0 +3 -0
- bigbench/events.out.tfevents.1726071165.unites3.cs.unc.edu.338337.0 +3 -0
- bigbench/logs/initial_config.json +85 -0
- bigbench/logs/log.txt +223 -0
- t0-v3/checkpoints/nomic_instruction_embedding.pt +3 -0
- t0-v3/events.out.tfevents.1726072268.unites3.cs.unc.edu.355379.0 +3 -0
- t0-v3/logs/initial_config.json +85 -0
- t0-v3/logs/log.txt +141 -0
bigbench/checkpoints/nomic_instruction_embedding.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dec21e9d5b2a57bd526cc3e7d27356266ab1a531064188cd809e95a3a6fc7dd4
|
| 3 |
+
size 152547
|
bigbench/events.out.tfevents.1726071103.unites3.cs.unc.edu.336166.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c8431667d2c5189597ea006ca78735bc891e39b401d671f761e73a08e699bfdf
|
| 3 |
+
size 40
|
bigbench/events.out.tfevents.1726071165.unites3.cs.unc.edu.338337.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e74dfdcc49e54fde7fb0311d7ffa2b4889cef136dff1ca16050492c1052c67e
|
| 3 |
+
size 40
|
bigbench/logs/initial_config.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"debug": false,
|
| 3 |
+
"project_name": "chatgpt-instruction-nomic-embedding",
|
| 4 |
+
"name": "t0-bigbench",
|
| 5 |
+
"project_dir": "/home/pingzhi/phatgoose-cl/src_simple",
|
| 6 |
+
"data_dir": "/nas-hdd/prateek/data",
|
| 7 |
+
"output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs",
|
| 8 |
+
"config_dir": "/home/pingzhi/phatgoose-cl/src_simple/configs",
|
| 9 |
+
"seed": 42,
|
| 10 |
+
"hf_write_token": null,
|
| 11 |
+
"origin_model": "google/t5-xl-lm-adapt",
|
| 12 |
+
"model_class": "seq2seq_lm",
|
| 13 |
+
"model_type": "encdec",
|
| 14 |
+
"peft_type": "lora",
|
| 15 |
+
"load_model_dtype": "float32",
|
| 16 |
+
"val_fraction": 0.2,
|
| 17 |
+
"dataset": [
|
| 18 |
+
"t0-bigbench"
|
| 19 |
+
],
|
| 20 |
+
"eval_dataset": null,
|
| 21 |
+
"eval_split": "val",
|
| 22 |
+
"num_steps": 1500,
|
| 23 |
+
"effective_train_batch_size": 128,
|
| 24 |
+
"patience": 3,
|
| 25 |
+
"verbose": false,
|
| 26 |
+
"do_test": false,
|
| 27 |
+
"eval_steps": 100,
|
| 28 |
+
"save_last": true,
|
| 29 |
+
"save_best": true,
|
| 30 |
+
"logging_steps": 5,
|
| 31 |
+
"gradient_checkpointing": false,
|
| 32 |
+
"moe_inference": false,
|
| 33 |
+
"inference_batch_size_scale": 1,
|
| 34 |
+
"checkpoint_dir_or_path": null,
|
| 35 |
+
"cl_checkpoint_path": null,
|
| 36 |
+
"load_checkpoint_dataset": null,
|
| 37 |
+
"ae_checkpoint_dir": null,
|
| 38 |
+
"init_datasets": [
|
| 39 |
+
"t0-cl-init1"
|
| 40 |
+
],
|
| 41 |
+
"selected_expert_ids": null,
|
| 42 |
+
"merge_num_clusters": null,
|
| 43 |
+
"global_clustering": false,
|
| 44 |
+
"hierarchical_num_clusters": null,
|
| 45 |
+
"hierarchical_cluster_token_routing": false,
|
| 46 |
+
"save_router_state_dict": false,
|
| 47 |
+
"bias_router_embedding_path": null,
|
| 48 |
+
"bias_input_embedding_path": null,
|
| 49 |
+
"optimizer": "adamw",
|
| 50 |
+
"lr": 0.003,
|
| 51 |
+
"trainable_param_names": ".*lora.*",
|
| 52 |
+
"scheduler": "linear_decay_with_warmup",
|
| 53 |
+
"warmup_steps": null,
|
| 54 |
+
"warmup_ratio": 0.02,
|
| 55 |
+
"weight_decay": 0,
|
| 56 |
+
"scale_parameter": true,
|
| 57 |
+
"mix_precision": "bf16",
|
| 58 |
+
"gradient_clipping": 1.0,
|
| 59 |
+
"target_modules": "all-linear",
|
| 60 |
+
"lora_rank": 16,
|
| 61 |
+
"lora_alpha": 1,
|
| 62 |
+
"lora_dropout": 0.0,
|
| 63 |
+
"use_rslora": false,
|
| 64 |
+
"init_lora_weights": true,
|
| 65 |
+
"lora_bias": "none",
|
| 66 |
+
"moe_router_aux_loss_coef": 0.0,
|
| 67 |
+
"moe_top_k": 2,
|
| 68 |
+
"moe_top_p": 1.0,
|
| 69 |
+
"moe_reweight_output": true,
|
| 70 |
+
"bias_routing_scale": 0,
|
| 71 |
+
"bias_routing_dim": -1,
|
| 72 |
+
"lora_init_method": "usage-based",
|
| 73 |
+
"gate_init_method": "zero",
|
| 74 |
+
"zeroshot_tolerance": 0.05,
|
| 75 |
+
"upper_bound_tolerance": 0.05,
|
| 76 |
+
"single_lora_gate_train_steps": 200,
|
| 77 |
+
"molora_gate_train_samples": 1000,
|
| 78 |
+
"molora_gate_train_steps": 100,
|
| 79 |
+
"layer_norm_after_train_single_lora": true,
|
| 80 |
+
"cpu_cont": 96,
|
| 81 |
+
"run_output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench",
|
| 82 |
+
"log_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/logs",
|
| 83 |
+
"prediction_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/prediction",
|
| 84 |
+
"checkpoint_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/checkpoints"
|
| 85 |
+
}
|
bigbench/logs/log.txt
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2024-09-11 12:11:43,168 - log.txt - [INFO] - Start experiment chatgpt-instruction-nomic-embedding/t0-bigbench
|
| 2 |
+
2024-09-11 12:11:43,168 - log.txt - [INFO] - {
|
| 3 |
+
"debug": false,
|
| 4 |
+
"project_name": "chatgpt-instruction-nomic-embedding",
|
| 5 |
+
"name": "t0-bigbench",
|
| 6 |
+
"project_dir": "/home/pingzhi/phatgoose-cl/src_simple",
|
| 7 |
+
"data_dir": "/nas-hdd/prateek/data",
|
| 8 |
+
"output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs",
|
| 9 |
+
"config_dir": "/home/pingzhi/phatgoose-cl/src_simple/configs",
|
| 10 |
+
"seed": 42,
|
| 11 |
+
"hf_write_token": null,
|
| 12 |
+
"origin_model": "google/t5-xl-lm-adapt",
|
| 13 |
+
"model_class": "seq2seq_lm",
|
| 14 |
+
"model_type": "encdec",
|
| 15 |
+
"peft_type": "lora",
|
| 16 |
+
"load_model_dtype": "float32",
|
| 17 |
+
"val_fraction": 0.2,
|
| 18 |
+
"dataset": [
|
| 19 |
+
"t0-bigbench"
|
| 20 |
+
],
|
| 21 |
+
"eval_dataset": null,
|
| 22 |
+
"eval_split": "val",
|
| 23 |
+
"num_steps": 1500,
|
| 24 |
+
"effective_train_batch_size": 128,
|
| 25 |
+
"patience": 3,
|
| 26 |
+
"verbose": false,
|
| 27 |
+
"do_test": false,
|
| 28 |
+
"eval_steps": 100,
|
| 29 |
+
"save_last": true,
|
| 30 |
+
"save_best": true,
|
| 31 |
+
"logging_steps": 5,
|
| 32 |
+
"gradient_checkpointing": false,
|
| 33 |
+
"moe_inference": false,
|
| 34 |
+
"inference_batch_size_scale": 1,
|
| 35 |
+
"checkpoint_dir_or_path": null,
|
| 36 |
+
"cl_checkpoint_path": null,
|
| 37 |
+
"load_checkpoint_dataset": null,
|
| 38 |
+
"ae_checkpoint_dir": null,
|
| 39 |
+
"init_datasets": [
|
| 40 |
+
"t0-cl-init1"
|
| 41 |
+
],
|
| 42 |
+
"selected_expert_ids": null,
|
| 43 |
+
"merge_num_clusters": null,
|
| 44 |
+
"global_clustering": false,
|
| 45 |
+
"hierarchical_num_clusters": null,
|
| 46 |
+
"hierarchical_cluster_token_routing": false,
|
| 47 |
+
"save_router_state_dict": false,
|
| 48 |
+
"bias_router_embedding_path": null,
|
| 49 |
+
"bias_input_embedding_path": null,
|
| 50 |
+
"optimizer": "adamw",
|
| 51 |
+
"lr": 0.003,
|
| 52 |
+
"trainable_param_names": ".*lora.*",
|
| 53 |
+
"scheduler": "linear_decay_with_warmup",
|
| 54 |
+
"warmup_steps": null,
|
| 55 |
+
"warmup_ratio": 0.02,
|
| 56 |
+
"weight_decay": 0,
|
| 57 |
+
"scale_parameter": true,
|
| 58 |
+
"mix_precision": "bf16",
|
| 59 |
+
"gradient_clipping": 1.0,
|
| 60 |
+
"target_modules": "all-linear",
|
| 61 |
+
"lora_rank": 16,
|
| 62 |
+
"lora_alpha": 1,
|
| 63 |
+
"lora_dropout": 0.0,
|
| 64 |
+
"use_rslora": false,
|
| 65 |
+
"init_lora_weights": true,
|
| 66 |
+
"lora_bias": "none",
|
| 67 |
+
"moe_router_aux_loss_coef": 0.0,
|
| 68 |
+
"moe_top_k": 2,
|
| 69 |
+
"moe_top_p": 1.0,
|
| 70 |
+
"moe_reweight_output": true,
|
| 71 |
+
"bias_routing_scale": 0,
|
| 72 |
+
"bias_routing_dim": -1,
|
| 73 |
+
"lora_init_method": "usage-based",
|
| 74 |
+
"gate_init_method": "zero",
|
| 75 |
+
"zeroshot_tolerance": 0.05,
|
| 76 |
+
"upper_bound_tolerance": 0.05,
|
| 77 |
+
"single_lora_gate_train_steps": 200,
|
| 78 |
+
"molora_gate_train_samples": 1000,
|
| 79 |
+
"molora_gate_train_steps": 100,
|
| 80 |
+
"layer_norm_after_train_single_lora": true,
|
| 81 |
+
"cpu_cont": 96,
|
| 82 |
+
"run_output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench",
|
| 83 |
+
"log_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/logs",
|
| 84 |
+
"prediction_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/prediction",
|
| 85 |
+
"checkpoint_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/checkpoints",
|
| 86 |
+
"finish_flag_file": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/exp_completed.txt"
|
| 87 |
+
}
|
| 88 |
+
2024-09-11 12:12:45,792 - log.txt - [INFO] - Start experiment chatgpt-instruction-nomic-embedding/t0-bigbench
|
| 89 |
+
2024-09-11 12:12:45,792 - log.txt - [INFO] - {
|
| 90 |
+
"debug": false,
|
| 91 |
+
"project_name": "chatgpt-instruction-nomic-embedding",
|
| 92 |
+
"name": "t0-bigbench",
|
| 93 |
+
"project_dir": "/home/pingzhi/phatgoose-cl/src_simple",
|
| 94 |
+
"data_dir": "/nas-hdd/prateek/data",
|
| 95 |
+
"output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs",
|
| 96 |
+
"config_dir": "/home/pingzhi/phatgoose-cl/src_simple/configs",
|
| 97 |
+
"seed": 42,
|
| 98 |
+
"hf_write_token": null,
|
| 99 |
+
"origin_model": "google/t5-xl-lm-adapt",
|
| 100 |
+
"model_class": "seq2seq_lm",
|
| 101 |
+
"model_type": "encdec",
|
| 102 |
+
"peft_type": "lora",
|
| 103 |
+
"load_model_dtype": "float32",
|
| 104 |
+
"val_fraction": 0.2,
|
| 105 |
+
"dataset": [
|
| 106 |
+
"t0-bigbench"
|
| 107 |
+
],
|
| 108 |
+
"eval_dataset": null,
|
| 109 |
+
"eval_split": "val",
|
| 110 |
+
"num_steps": 1500,
|
| 111 |
+
"effective_train_batch_size": 128,
|
| 112 |
+
"patience": 3,
|
| 113 |
+
"verbose": false,
|
| 114 |
+
"do_test": false,
|
| 115 |
+
"eval_steps": 100,
|
| 116 |
+
"save_last": true,
|
| 117 |
+
"save_best": true,
|
| 118 |
+
"logging_steps": 5,
|
| 119 |
+
"gradient_checkpointing": false,
|
| 120 |
+
"moe_inference": false,
|
| 121 |
+
"inference_batch_size_scale": 1,
|
| 122 |
+
"checkpoint_dir_or_path": null,
|
| 123 |
+
"cl_checkpoint_path": null,
|
| 124 |
+
"load_checkpoint_dataset": null,
|
| 125 |
+
"ae_checkpoint_dir": null,
|
| 126 |
+
"init_datasets": [
|
| 127 |
+
"t0-cl-init1"
|
| 128 |
+
],
|
| 129 |
+
"selected_expert_ids": null,
|
| 130 |
+
"merge_num_clusters": null,
|
| 131 |
+
"global_clustering": false,
|
| 132 |
+
"hierarchical_num_clusters": null,
|
| 133 |
+
"hierarchical_cluster_token_routing": false,
|
| 134 |
+
"save_router_state_dict": false,
|
| 135 |
+
"bias_router_embedding_path": null,
|
| 136 |
+
"bias_input_embedding_path": null,
|
| 137 |
+
"optimizer": "adamw",
|
| 138 |
+
"lr": 0.003,
|
| 139 |
+
"trainable_param_names": ".*lora.*",
|
| 140 |
+
"scheduler": "linear_decay_with_warmup",
|
| 141 |
+
"warmup_steps": null,
|
| 142 |
+
"warmup_ratio": 0.02,
|
| 143 |
+
"weight_decay": 0,
|
| 144 |
+
"scale_parameter": true,
|
| 145 |
+
"mix_precision": "bf16",
|
| 146 |
+
"gradient_clipping": 1.0,
|
| 147 |
+
"target_modules": "all-linear",
|
| 148 |
+
"lora_rank": 16,
|
| 149 |
+
"lora_alpha": 1,
|
| 150 |
+
"lora_dropout": 0.0,
|
| 151 |
+
"use_rslora": false,
|
| 152 |
+
"init_lora_weights": true,
|
| 153 |
+
"lora_bias": "none",
|
| 154 |
+
"moe_router_aux_loss_coef": 0.0,
|
| 155 |
+
"moe_top_k": 2,
|
| 156 |
+
"moe_top_p": 1.0,
|
| 157 |
+
"moe_reweight_output": true,
|
| 158 |
+
"bias_routing_scale": 0,
|
| 159 |
+
"bias_routing_dim": -1,
|
| 160 |
+
"lora_init_method": "usage-based",
|
| 161 |
+
"gate_init_method": "zero",
|
| 162 |
+
"zeroshot_tolerance": 0.05,
|
| 163 |
+
"upper_bound_tolerance": 0.05,
|
| 164 |
+
"single_lora_gate_train_steps": 200,
|
| 165 |
+
"molora_gate_train_samples": 1000,
|
| 166 |
+
"molora_gate_train_steps": 100,
|
| 167 |
+
"layer_norm_after_train_single_lora": true,
|
| 168 |
+
"cpu_cont": 96,
|
| 169 |
+
"run_output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench",
|
| 170 |
+
"log_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/logs",
|
| 171 |
+
"prediction_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/prediction",
|
| 172 |
+
"checkpoint_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/checkpoints",
|
| 173 |
+
"finish_flag_file": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/exp_completed.txt"
|
| 174 |
+
}
|
| 175 |
+
2024-09-11 12:12:52,740 - log.txt - [INFO] - Tasks ['bbbooleanexpressions', 'bbcausaljudgement', 'bbdateunderstanding', 'bbdisambiguationqa', 'bbdycklanguages', 'bbformalfallacies', 'bbgeometricshapes', 'bbhyperbaton', 'bblogicaldeduction', 'bbmovierecommendation', 'bbmultisteparithmetictwo', 'bbnavigate', 'bbobjectcounting', 'bbpenguinsinatable', 'bbreasoningaboutcoloredobjects', 'bbruinnames', 'bbsalienttranslationerrordetection', 'bbsnarks', 'bbsportsunderstanding', 'bbtemporalsequences', 'bbtrackingshuffledobjects', 'bbweboflies', 'bbwordsorting', 'bbautodebugging', 'bbbbqlitejson', 'bbcodelinedescription', 'bbconceptualcombinations', 'bbconlangtranslation', 'bbemojimovie', 'bbhinduknowledge', 'bbknownunknowns', 'bblanguageidentification', 'bblinguisticspuzzles', 'bblogicgridpuzzle', 'bbmisconceptionsrussian', 'bbnovelconcepts', 'bboperators', 'bbparsinlureadingcomprehension', 'bbplaydialogsameordifferent', 'bbrepeatcopylogic', 'bbstrangestories', 'bbstrategyqa', 'bbsymbolinterpretation', 'bbvitamincfactverification', 'bbwinowhy']
|
| 176 |
+
2024-09-11 12:12:53,268 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'lukaemon/bbh', 'boolean_expressions'] Datasize 128
|
| 177 |
+
2024-09-11 12:12:54,203 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'causal_judgment'] Datasize 38
|
| 178 |
+
2024-09-11 12:12:55,111 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'date_understanding'] Datasize 73
|
| 179 |
+
2024-09-11 12:12:55,785 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'disambiguation_qa'] Datasize 51
|
| 180 |
+
2024-09-11 12:12:56,465 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'dyck_languages'] Datasize 128
|
| 181 |
+
2024-09-11 12:12:57,202 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'formal_fallacies_syllogisms_negation'] Datasize 128
|
| 182 |
+
2024-09-11 12:12:57,833 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'geometric_shapes'] Datasize 71
|
| 183 |
+
2024-09-11 12:12:58,288 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'hyperbaton'] Datasize 128
|
| 184 |
+
2024-09-11 12:12:58,944 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'logical_deduction'] Datasize 128
|
| 185 |
+
2024-09-11 12:12:59,695 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'movie_recommendation'] Datasize 100
|
| 186 |
+
2024-09-11 12:13:00,333 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'lukaemon/bbh', 'multistep_arithmetic_two'] Datasize 128
|
| 187 |
+
2024-09-11 12:13:01,013 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'navigate'] Datasize 128
|
| 188 |
+
2024-09-11 12:13:01,652 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'object_counting'] Datasize 128
|
| 189 |
+
2024-09-11 12:13:02,102 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'penguins_in_a_table'] Datasize 29
|
| 190 |
+
2024-09-11 12:13:03,039 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'reasoning_about_colored_objects'] Datasize 128
|
| 191 |
+
2024-09-11 12:13:03,485 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'ruin_names'] Datasize 89
|
| 192 |
+
2024-09-11 12:13:04,229 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'salient_translation_error_detection'] Datasize 128
|
| 193 |
+
2024-09-11 12:13:04,895 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'snarks'] Datasize 36
|
| 194 |
+
2024-09-11 12:13:05,470 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'sports_understanding'] Datasize 128
|
| 195 |
+
2024-09-11 12:13:06,097 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'temporal_sequences'] Datasize 128
|
| 196 |
+
2024-09-11 12:13:06,888 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'tracking_shuffled_objects'] Datasize 128
|
| 197 |
+
2024-09-11 12:13:07,509 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'lukaemon/bbh', 'web_of_lies'] Datasize 128
|
| 198 |
+
2024-09-11 12:13:08,207 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'word_sorting'] Datasize 128
|
| 199 |
+
2024-09-11 12:13:08,872 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'auto_debugging'] Datasize 16
|
| 200 |
+
2024-09-11 12:13:09,336 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'bbq_lite_json'] Datasize 128
|
| 201 |
+
2024-09-11 12:13:10,051 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'code_line_description'] Datasize 16
|
| 202 |
+
2024-09-11 12:13:10,666 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'conceptual_combinations'] Datasize 19
|
| 203 |
+
2024-09-11 12:13:11,334 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'conlang_translation'] Datasize 32
|
| 204 |
+
2024-09-11 12:13:12,346 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'emoji_movie'] Datasize 20
|
| 205 |
+
2024-09-11 12:13:13,003 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'hindu_knowledge'] Datasize 35
|
| 206 |
+
2024-09-11 12:13:13,628 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'known_unknowns'] Datasize 16
|
| 207 |
+
2024-09-11 12:13:14,295 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'language_identification'] Datasize 128
|
| 208 |
+
2024-09-11 12:13:14,927 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'linguistics_puzzles'] Datasize 128
|
| 209 |
+
2024-09-11 12:13:15,592 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'logic_grid_puzzle'] Datasize 128
|
| 210 |
+
2024-09-11 12:13:16,266 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'misconceptions_russian'] Datasize 16
|
| 211 |
+
2024-09-11 12:13:17,076 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'novel_concepts'] Datasize 16
|
| 212 |
+
2024-09-11 12:13:17,559 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'operators'] Datasize 42
|
| 213 |
+
2024-09-11 12:13:19,634 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'parsinlu_reading_comprehension'] Datasize 103
|
| 214 |
+
2024-09-11 12:13:20,109 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'play_dialog_same_or_different'] Datasize 128
|
| 215 |
+
2024-09-11 12:13:20,768 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'repeat_copy_logic'] Datasize 16
|
| 216 |
+
2024-09-11 12:13:21,397 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'strange_stories'] Datasize 34
|
| 217 |
+
2024-09-11 12:13:22,057 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'strategyqa'] Datasize 128
|
| 218 |
+
2024-09-11 12:13:22,720 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'symbol_interpretation'] Datasize 128
|
| 219 |
+
2024-09-11 12:13:23,385 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'vitaminc_fact_verification'] Datasize 128
|
| 220 |
+
2024-09-11 12:13:24,006 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'winowhy'] Datasize 128
|
| 221 |
+
2024-09-11 12:13:24,210 - sentence_transformers.SentenceTransformer - [INFO] - Use pytorch device_name: cuda
|
| 222 |
+
2024-09-11 12:13:24,210 - sentence_transformers.SentenceTransformer - [INFO] - Load pretrained SentenceTransformer: nomic-ai/nomic-embed-text-v1.5
|
| 223 |
+
2024-09-11 12:13:31,883 - transformers_modules.nomic-ai.nomic-bert-2048.4bb68f63016e88e53e48df904c6ab4e6f718e198.modeling_hf_nomic_bert - [WARNING] - <All keys matched successfully>
|
t0-v3/checkpoints/nomic_instruction_embedding.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6812ce093d78304741dbc17bb4c587171786408024f3e4e925f5f923013f63c
|
| 3 |
+
size 155508
|
t0-v3/events.out.tfevents.1726072268.unites3.cs.unc.edu.355379.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19dc6309a28a8d55423de12ea314aed49b4227984ecfe957fe26188360a345b0
|
| 3 |
+
size 40
|
t0-v3/logs/initial_config.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"debug": false,
|
| 3 |
+
"project_name": "chatgpt-instruction-nomic-embedding",
|
| 4 |
+
"name": "t0-v3",
|
| 5 |
+
"project_dir": "/home/pingzhi/phatgoose-cl/src_simple",
|
| 6 |
+
"data_dir": "/nas-hdd/prateek/data",
|
| 7 |
+
"output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs",
|
| 8 |
+
"config_dir": "/home/pingzhi/phatgoose-cl/src_simple/configs",
|
| 9 |
+
"seed": 42,
|
| 10 |
+
"hf_write_token": null,
|
| 11 |
+
"origin_model": "google/t5-xl-lm-adapt",
|
| 12 |
+
"model_class": "seq2seq_lm",
|
| 13 |
+
"model_type": "encdec",
|
| 14 |
+
"peft_type": "lora",
|
| 15 |
+
"load_model_dtype": "float32",
|
| 16 |
+
"val_fraction": 0.2,
|
| 17 |
+
"dataset": [
|
| 18 |
+
"t0"
|
| 19 |
+
],
|
| 20 |
+
"eval_dataset": null,
|
| 21 |
+
"eval_split": "val",
|
| 22 |
+
"num_steps": 1500,
|
| 23 |
+
"effective_train_batch_size": 128,
|
| 24 |
+
"patience": 3,
|
| 25 |
+
"verbose": false,
|
| 26 |
+
"do_test": false,
|
| 27 |
+
"eval_steps": 100,
|
| 28 |
+
"save_last": true,
|
| 29 |
+
"save_best": true,
|
| 30 |
+
"logging_steps": 5,
|
| 31 |
+
"gradient_checkpointing": false,
|
| 32 |
+
"moe_inference": false,
|
| 33 |
+
"inference_batch_size_scale": 1,
|
| 34 |
+
"checkpoint_dir_or_path": null,
|
| 35 |
+
"cl_checkpoint_path": null,
|
| 36 |
+
"load_checkpoint_dataset": null,
|
| 37 |
+
"ae_checkpoint_dir": null,
|
| 38 |
+
"init_datasets": [
|
| 39 |
+
"t0-cl-init1"
|
| 40 |
+
],
|
| 41 |
+
"selected_expert_ids": null,
|
| 42 |
+
"merge_num_clusters": null,
|
| 43 |
+
"global_clustering": false,
|
| 44 |
+
"hierarchical_num_clusters": null,
|
| 45 |
+
"hierarchical_cluster_token_routing": false,
|
| 46 |
+
"save_router_state_dict": false,
|
| 47 |
+
"bias_router_embedding_path": null,
|
| 48 |
+
"bias_input_embedding_path": null,
|
| 49 |
+
"optimizer": "adamw",
|
| 50 |
+
"lr": 0.003,
|
| 51 |
+
"trainable_param_names": ".*lora.*",
|
| 52 |
+
"scheduler": "linear_decay_with_warmup",
|
| 53 |
+
"warmup_steps": null,
|
| 54 |
+
"warmup_ratio": 0.02,
|
| 55 |
+
"weight_decay": 0,
|
| 56 |
+
"scale_parameter": true,
|
| 57 |
+
"mix_precision": "bf16",
|
| 58 |
+
"gradient_clipping": 1.0,
|
| 59 |
+
"target_modules": "all-linear",
|
| 60 |
+
"lora_rank": 16,
|
| 61 |
+
"lora_alpha": 1,
|
| 62 |
+
"lora_dropout": 0.0,
|
| 63 |
+
"use_rslora": false,
|
| 64 |
+
"init_lora_weights": true,
|
| 65 |
+
"lora_bias": "none",
|
| 66 |
+
"moe_router_aux_loss_coef": 0.0,
|
| 67 |
+
"moe_top_k": 2,
|
| 68 |
+
"moe_top_p": 1.0,
|
| 69 |
+
"moe_reweight_output": true,
|
| 70 |
+
"bias_routing_scale": 0,
|
| 71 |
+
"bias_routing_dim": -1,
|
| 72 |
+
"lora_init_method": "usage-based",
|
| 73 |
+
"gate_init_method": "zero",
|
| 74 |
+
"zeroshot_tolerance": 0.05,
|
| 75 |
+
"upper_bound_tolerance": 0.05,
|
| 76 |
+
"single_lora_gate_train_steps": 200,
|
| 77 |
+
"molora_gate_train_samples": 1000,
|
| 78 |
+
"molora_gate_train_steps": 100,
|
| 79 |
+
"layer_norm_after_train_single_lora": true,
|
| 80 |
+
"cpu_cont": 96,
|
| 81 |
+
"run_output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v3",
|
| 82 |
+
"log_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v3/logs",
|
| 83 |
+
"prediction_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v3/prediction",
|
| 84 |
+
"checkpoint_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v3/checkpoints"
|
| 85 |
+
}
|
t0-v3/logs/log.txt
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2024-09-11 12:31:08,919 - log.txt - [INFO] - Start experiment chatgpt-instruction-nomic-embedding/t0-v3
|
| 2 |
+
2024-09-11 12:31:08,919 - log.txt - [INFO] - {
|
| 3 |
+
"debug": false,
|
| 4 |
+
"project_name": "chatgpt-instruction-nomic-embedding",
|
| 5 |
+
"name": "t0-v3",
|
| 6 |
+
"project_dir": "/home/pingzhi/phatgoose-cl/src_simple",
|
| 7 |
+
"data_dir": "/nas-hdd/prateek/data",
|
| 8 |
+
"output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs",
|
| 9 |
+
"config_dir": "/home/pingzhi/phatgoose-cl/src_simple/configs",
|
| 10 |
+
"seed": 42,
|
| 11 |
+
"hf_write_token": null,
|
| 12 |
+
"origin_model": "google/t5-xl-lm-adapt",
|
| 13 |
+
"model_class": "seq2seq_lm",
|
| 14 |
+
"model_type": "encdec",
|
| 15 |
+
"peft_type": "lora",
|
| 16 |
+
"load_model_dtype": "float32",
|
| 17 |
+
"val_fraction": 0.2,
|
| 18 |
+
"dataset": [
|
| 19 |
+
"t0"
|
| 20 |
+
],
|
| 21 |
+
"eval_dataset": null,
|
| 22 |
+
"eval_split": "val",
|
| 23 |
+
"num_steps": 1500,
|
| 24 |
+
"effective_train_batch_size": 128,
|
| 25 |
+
"patience": 3,
|
| 26 |
+
"verbose": false,
|
| 27 |
+
"do_test": false,
|
| 28 |
+
"eval_steps": 100,
|
| 29 |
+
"save_last": true,
|
| 30 |
+
"save_best": true,
|
| 31 |
+
"logging_steps": 5,
|
| 32 |
+
"gradient_checkpointing": false,
|
| 33 |
+
"moe_inference": false,
|
| 34 |
+
"inference_batch_size_scale": 1,
|
| 35 |
+
"checkpoint_dir_or_path": null,
|
| 36 |
+
"cl_checkpoint_path": null,
|
| 37 |
+
"load_checkpoint_dataset": null,
|
| 38 |
+
"ae_checkpoint_dir": null,
|
| 39 |
+
"init_datasets": [
|
| 40 |
+
"t0-cl-init1"
|
| 41 |
+
],
|
| 42 |
+
"selected_expert_ids": null,
|
| 43 |
+
"merge_num_clusters": null,
|
| 44 |
+
"global_clustering": false,
|
| 45 |
+
"hierarchical_num_clusters": null,
|
| 46 |
+
"hierarchical_cluster_token_routing": false,
|
| 47 |
+
"save_router_state_dict": false,
|
| 48 |
+
"bias_router_embedding_path": null,
|
| 49 |
+
"bias_input_embedding_path": null,
|
| 50 |
+
"optimizer": "adamw",
|
| 51 |
+
"lr": 0.003,
|
| 52 |
+
"trainable_param_names": ".*lora.*",
|
| 53 |
+
"scheduler": "linear_decay_with_warmup",
|
| 54 |
+
"warmup_steps": null,
|
| 55 |
+
"warmup_ratio": 0.02,
|
| 56 |
+
"weight_decay": 0,
|
| 57 |
+
"scale_parameter": true,
|
| 58 |
+
"mix_precision": "bf16",
|
| 59 |
+
"gradient_clipping": 1.0,
|
| 60 |
+
"target_modules": "all-linear",
|
| 61 |
+
"lora_rank": 16,
|
| 62 |
+
"lora_alpha": 1,
|
| 63 |
+
"lora_dropout": 0.0,
|
| 64 |
+
"use_rslora": false,
|
| 65 |
+
"init_lora_weights": true,
|
| 66 |
+
"lora_bias": "none",
|
| 67 |
+
"moe_router_aux_loss_coef": 0.0,
|
| 68 |
+
"moe_top_k": 2,
|
| 69 |
+
"moe_top_p": 1.0,
|
| 70 |
+
"moe_reweight_output": true,
|
| 71 |
+
"bias_routing_scale": 0,
|
| 72 |
+
"bias_routing_dim": -1,
|
| 73 |
+
"lora_init_method": "usage-based",
|
| 74 |
+
"gate_init_method": "zero",
|
| 75 |
+
"zeroshot_tolerance": 0.05,
|
| 76 |
+
"upper_bound_tolerance": 0.05,
|
| 77 |
+
"single_lora_gate_train_steps": 200,
|
| 78 |
+
"molora_gate_train_samples": 1000,
|
| 79 |
+
"molora_gate_train_steps": 100,
|
| 80 |
+
"layer_norm_after_train_single_lora": true,
|
| 81 |
+
"cpu_cont": 96,
|
| 82 |
+
"run_output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v3",
|
| 83 |
+
"log_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v3/logs",
|
| 84 |
+
"prediction_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v3/prediction",
|
| 85 |
+
"checkpoint_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v3/checkpoints",
|
| 86 |
+
"finish_flag_file": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v3/exp_completed.txt"
|
| 87 |
+
}
|
| 88 |
+
2024-09-11 12:31:16,181 - log.txt - [INFO] - Tasks ['p3socialiqa', 'p3wiqa', 'p3cosmosqa', 'p3quail', 'p3quartz', 'p3qasc', 'p3commonsenseqa', 'p3quarel', 'p3dream', 'p3sciq', 'p3wikihop', 'p3ropes', 'p3adversarialqa', 'p3duorc', 'p3quoref', 'p3hotpotqa', 'p3wikiqa', 'p3amazonpolarity', 'p3appreviews', 'p3rottentomatoes', 'p3imdb', 'p3yelp', 'p3agnews', 'p3dbpedia14', 'p3trec', 'p3wikibio', 'p3commongen', 'p3cnndailymail', 'p3multinews', 'p3gigaword', 'p3samsum', 'p3xsum', 'p3paws', 'p3qqp', 'p3mrpc', 'p3hswag', 'p3copa', 'p3storycloze', 'p3cb', 'p3rte', 'p3anlir1', 'p3anlir2', 'p3anlir3', 'p3winogrande', 'p3wscfixed', 'p3wic']
|
| 89 |
+
2024-09-11 12:31:19,691 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'social_i_qa'] Num Templates: 4 Datasize 128
|
| 90 |
+
2024-09-11 12:31:27,056 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'wiqa'] Num Templates: 2 Datasize 128
|
| 91 |
+
2024-09-11 12:31:36,032 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'cosmos_qa'] Num Templates: 10 Datasize 128
|
| 92 |
+
2024-09-11 12:31:41,061 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'quail'] Num Templates: 10 Datasize 128
|
| 93 |
+
2024-09-11 12:31:45,655 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'quartz'] Num Templates: 8 Datasize 128
|
| 94 |
+
2024-09-11 12:31:49,910 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'qasc'] Num Templates: 5 Datasize 128
|
| 95 |
+
2024-09-11 12:31:53,940 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'commonsense_qa'] Num Templates: 4 Datasize 128
|
| 96 |
+
2024-09-11 12:31:59,871 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'quarel'] Num Templates: 5 Datasize 128
|
| 97 |
+
2024-09-11 12:32:03,248 - datasets_modules.datasets.dream.0835c7949b04e4dc7d094375c7b502ae12c6b17dae8e715d8c363257a391545a.dream - [INFO] - ⏳ Generating examples from = /home/pingzhi/.cache/huggingface/datasets/downloads/dbb86d6157ce35037d870dd075e10618881304a1fc78d42a73fff127aba929db
|
| 98 |
+
2024-09-11 12:32:03,875 - datasets_modules.datasets.dream.0835c7949b04e4dc7d094375c7b502ae12c6b17dae8e715d8c363257a391545a.dream - [INFO] - ⏳ Generating examples from = /home/pingzhi/.cache/huggingface/datasets/downloads/3c262ee1f86e2b935d3198f592e78680f0d2c509ce07037e657c4ccdd111fb17
|
| 99 |
+
2024-09-11 12:32:03,980 - datasets_modules.datasets.dream.0835c7949b04e4dc7d094375c7b502ae12c6b17dae8e715d8c363257a391545a.dream - [INFO] - ⏳ Generating examples from = /home/pingzhi/.cache/huggingface/datasets/downloads/1df6bad6ef1eba61e7a1b53e1a7a8b9213070120a2576e235726c41d68775bd1
|
| 100 |
+
2024-09-11 12:32:04,129 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'dream'] Num Templates: 2 Datasize 128
|
| 101 |
+
2024-09-11 12:32:07,731 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'sciq'] Num Templates: 4 Datasize 128
|
| 102 |
+
2024-09-11 12:32:17,431 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'MoE-UNC/wikihop'] Num Templates: 5 Datasize 128
|
| 103 |
+
2024-09-11 12:32:21,595 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'ropes'] Num Templates: 10 Datasize 128
|
| 104 |
+
2024-09-11 12:32:26,485 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'adversarial_qa', 'adversarialQA'] Num Templates: 4 Datasize 128
|
| 105 |
+
2024-09-11 12:32:34,160 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'duorc', 'ParaphraseRC'] Num Templates: 5 Datasize 128
|
| 106 |
+
2024-09-11 12:32:39,060 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'quoref'] Num Templates: 10 Datasize 128
|
| 107 |
+
2024-09-11 12:33:11,624 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'hotpot_qa', 'fullwiki'] Num Templates: 5 Datasize 128
|
| 108 |
+
2024-09-11 12:33:15,430 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'wiki_qa'] Num Templates: 5 Datasize 128
|
| 109 |
+
2024-09-11 12:33:46,127 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'amazon_polarity'] Num Templates: 9 Datasize 128
|
| 110 |
+
2024-09-11 12:33:49,335 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'app_reviews'] Num Templates: 1 Datasize 128
|
| 111 |
+
2024-09-11 12:33:52,788 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'rotten_tomatoes'] Num Templates: 10 Datasize 128
|
| 112 |
+
2024-09-11 12:33:58,154 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'imdb'] Num Templates: 10 Datasize 128
|
| 113 |
+
2024-09-11 12:34:12,928 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'yelp_review_full'] Num Templates: 7 Datasize 128
|
| 114 |
+
2024-09-11 12:34:16,251 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'ag_news'] Num Templates: 7 Datasize 128
|
| 115 |
+
2024-09-11 12:34:23,493 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'dbpedia_14'] Num Templates: 4 Datasize 128
|
| 116 |
+
2024-09-11 12:34:26,596 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'trec'] Num Templates: 1 Datasize 100
|
| 117 |
+
2024-09-11 12:36:25,678 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'wiki_bio'] Num Templates: 1 Datasize 128
|
| 118 |
+
2024-09-11 12:36:31,310 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'common_gen'] Num Templates: 6 Datasize 128
|
| 119 |
+
2024-09-11 12:36:54,360 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'cnn_dailymail', '3.0.0'] Num Templates: 7 Datasize 128
|
| 120 |
+
2024-09-11 12:37:18,511 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'multi_news'] Num Templates: 5 Datasize 128
|
| 121 |
+
2024-09-11 12:38:48,856 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'gigaword'] Num Templates: 7 Datasize 128
|
| 122 |
+
2024-09-11 12:38:53,303 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'samsum'] Num Templates: 6 Datasize 128
|
| 123 |
+
2024-09-11 12:39:56,978 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'xsum'] Num Templates: 10 Datasize 128
|
| 124 |
+
2024-09-11 12:40:02,461 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'paws', 'labeled_final'] Num Templates: 11 Datasize 128
|
| 125 |
+
2024-09-11 12:40:08,618 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'glue', 'qqp'] Num Templates: 5 Datasize 128
|
| 126 |
+
2024-09-11 12:40:11,539 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'glue', 'mrpc'] Num Templates: 5 Datasize 128
|
| 127 |
+
2024-09-11 12:40:24,009 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'hellaswag'] Num Templates: 4 Datasize 128
|
| 128 |
+
2024-09-11 12:40:26,356 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'super_glue', 'copa'] Num Templates: 8 Datasize 128
|
| 129 |
+
2024-09-11 12:40:30,240 - root - [WARNING] - Tried instantiating `DatasetTemplates` for MoE-UNC/story_cloze, but no prompts found. Please ignore this warning if you are creating new prompts for this dataset.
|
| 130 |
+
2024-09-11 12:40:30,274 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'MoE-UNC/story_cloze'] Num Templates: 5 Datasize 128
|
| 131 |
+
2024-09-11 12:40:31,536 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'super_glue', 'cb'] Num Templates: 15 Datasize 128
|
| 132 |
+
2024-09-11 12:40:33,106 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'super_glue', 'rte'] Num Templates: 10 Datasize 128
|
| 133 |
+
2024-09-11 12:40:43,150 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'anli'] Num Templates: 15 Datasize 128
|
| 134 |
+
2024-09-11 12:40:46,111 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'anli'] Num Templates: 15 Datasize 128
|
| 135 |
+
2024-09-11 12:40:49,644 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'anli'] Num Templates: 15 Datasize 128
|
| 136 |
+
2024-09-11 12:40:52,538 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'winogrande', 'winogrande_xl'] Num Templates: 5 Datasize 128
|
| 137 |
+
2024-09-11 12:40:53,754 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'super_glue', 'wsc.fixed'] Num Templates: 10 Datasize 128
|
| 138 |
+
2024-09-11 12:40:56,186 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'super_glue', 'wic'] Num Templates: 10 Datasize 128
|
| 139 |
+
2024-09-11 12:40:56,448 - sentence_transformers.SentenceTransformer - [INFO] - Use pytorch device_name: cuda
|
| 140 |
+
2024-09-11 12:40:56,448 - sentence_transformers.SentenceTransformer - [INFO] - Load pretrained SentenceTransformer: nomic-ai/nomic-embed-text-v1.5
|
| 141 |
+
2024-09-11 12:40:58,768 - transformers_modules.nomic-ai.nomic-bert-2048.4bb68f63016e88e53e48df904c6ab4e6f718e198.modeling_hf_nomic_bert - [WARNING] - <All keys matched successfully>
|