Pingzhi Li commited on
Commit
d77ca67
·
1 Parent(s): b7d5f24
t0-v4/checkpoints/nomic_instruction_embedding.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:363206603ad9b63334955f262080289db9a70e012f1371ffb23392766206de52
3
+ size 155508
t0-v4/events.out.tfevents.1726078631.unites3.cs.unc.edu.462460.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48efc75bcfa6edab7796f25bc682e1922a1c8867468a1e17c6efe7c0e39bb5a7
3
+ size 40
t0-v4/logs/initial_config.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "debug": false,
3
+ "project_name": "chatgpt-instruction-nomic-embedding",
4
+ "name": "t0-v4",
5
+ "project_dir": "/home/pingzhi/phatgoose-cl/src_simple",
6
+ "data_dir": "/nas-hdd/prateek/data",
7
+ "output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs",
8
+ "config_dir": "/home/pingzhi/phatgoose-cl/src_simple/configs",
9
+ "seed": 42,
10
+ "hf_write_token": null,
11
+ "origin_model": "google/t5-xl-lm-adapt",
12
+ "model_class": "seq2seq_lm",
13
+ "model_type": "encdec",
14
+ "peft_type": "lora",
15
+ "load_model_dtype": "float32",
16
+ "val_fraction": 0.2,
17
+ "dataset": [
18
+ "t0"
19
+ ],
20
+ "eval_dataset": null,
21
+ "eval_split": "val",
22
+ "num_steps": 1500,
23
+ "effective_train_batch_size": 128,
24
+ "patience": 3,
25
+ "verbose": false,
26
+ "do_test": false,
27
+ "eval_steps": 100,
28
+ "save_last": true,
29
+ "save_best": true,
30
+ "logging_steps": 5,
31
+ "gradient_checkpointing": false,
32
+ "moe_inference": false,
33
+ "inference_batch_size_scale": 1,
34
+ "checkpoint_dir_or_path": null,
35
+ "cl_checkpoint_path": null,
36
+ "load_checkpoint_dataset": null,
37
+ "ae_checkpoint_dir": null,
38
+ "init_datasets": [
39
+ "t0-cl-init1"
40
+ ],
41
+ "selected_expert_ids": null,
42
+ "merge_num_clusters": null,
43
+ "global_clustering": false,
44
+ "hierarchical_num_clusters": null,
45
+ "hierarchical_cluster_token_routing": false,
46
+ "save_router_state_dict": false,
47
+ "bias_router_embedding_path": null,
48
+ "bias_input_embedding_path": null,
49
+ "optimizer": "adamw",
50
+ "lr": 0.003,
51
+ "trainable_param_names": ".*lora.*",
52
+ "scheduler": "linear_decay_with_warmup",
53
+ "warmup_steps": null,
54
+ "warmup_ratio": 0.02,
55
+ "weight_decay": 0,
56
+ "scale_parameter": true,
57
+ "mix_precision": "bf16",
58
+ "gradient_clipping": 1.0,
59
+ "target_modules": "all-linear",
60
+ "lora_rank": 16,
61
+ "lora_alpha": 1,
62
+ "lora_dropout": 0.0,
63
+ "use_rslora": false,
64
+ "init_lora_weights": true,
65
+ "lora_bias": "none",
66
+ "moe_router_aux_loss_coef": 0.0,
67
+ "moe_top_k": 2,
68
+ "moe_top_p": 1.0,
69
+ "moe_reweight_output": true,
70
+ "bias_routing_scale": 0,
71
+ "bias_routing_dim": -1,
72
+ "lora_init_method": "usage-based",
73
+ "gate_init_method": "zero",
74
+ "zeroshot_tolerance": 0.05,
75
+ "upper_bound_tolerance": 0.05,
76
+ "single_lora_gate_train_steps": 200,
77
+ "molora_gate_train_samples": 1000,
78
+ "molora_gate_train_steps": 100,
79
+ "layer_norm_after_train_single_lora": true,
80
+ "cpu_cont": 96,
81
+ "run_output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v4",
82
+ "log_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v4/logs",
83
+ "prediction_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v4/prediction",
84
+ "checkpoint_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v4/checkpoints"
85
+ }
t0-v4/logs/log.txt ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-09-11 14:17:11,625 - log.txt - [INFO] - Start experiment chatgpt-instruction-nomic-embedding/t0-v4
2
+ 2024-09-11 14:17:11,625 - log.txt - [INFO] - {
3
+ "debug": false,
4
+ "project_name": "chatgpt-instruction-nomic-embedding",
5
+ "name": "t0-v4",
6
+ "project_dir": "/home/pingzhi/phatgoose-cl/src_simple",
7
+ "data_dir": "/nas-hdd/prateek/data",
8
+ "output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs",
9
+ "config_dir": "/home/pingzhi/phatgoose-cl/src_simple/configs",
10
+ "seed": 42,
11
+ "hf_write_token": null,
12
+ "origin_model": "google/t5-xl-lm-adapt",
13
+ "model_class": "seq2seq_lm",
14
+ "model_type": "encdec",
15
+ "peft_type": "lora",
16
+ "load_model_dtype": "float32",
17
+ "val_fraction": 0.2,
18
+ "dataset": [
19
+ "t0"
20
+ ],
21
+ "eval_dataset": null,
22
+ "eval_split": "val",
23
+ "num_steps": 1500,
24
+ "effective_train_batch_size": 128,
25
+ "patience": 3,
26
+ "verbose": false,
27
+ "do_test": false,
28
+ "eval_steps": 100,
29
+ "save_last": true,
30
+ "save_best": true,
31
+ "logging_steps": 5,
32
+ "gradient_checkpointing": false,
33
+ "moe_inference": false,
34
+ "inference_batch_size_scale": 1,
35
+ "checkpoint_dir_or_path": null,
36
+ "cl_checkpoint_path": null,
37
+ "load_checkpoint_dataset": null,
38
+ "ae_checkpoint_dir": null,
39
+ "init_datasets": [
40
+ "t0-cl-init1"
41
+ ],
42
+ "selected_expert_ids": null,
43
+ "merge_num_clusters": null,
44
+ "global_clustering": false,
45
+ "hierarchical_num_clusters": null,
46
+ "hierarchical_cluster_token_routing": false,
47
+ "save_router_state_dict": false,
48
+ "bias_router_embedding_path": null,
49
+ "bias_input_embedding_path": null,
50
+ "optimizer": "adamw",
51
+ "lr": 0.003,
52
+ "trainable_param_names": ".*lora.*",
53
+ "scheduler": "linear_decay_with_warmup",
54
+ "warmup_steps": null,
55
+ "warmup_ratio": 0.02,
56
+ "weight_decay": 0,
57
+ "scale_parameter": true,
58
+ "mix_precision": "bf16",
59
+ "gradient_clipping": 1.0,
60
+ "target_modules": "all-linear",
61
+ "lora_rank": 16,
62
+ "lora_alpha": 1,
63
+ "lora_dropout": 0.0,
64
+ "use_rslora": false,
65
+ "init_lora_weights": true,
66
+ "lora_bias": "none",
67
+ "moe_router_aux_loss_coef": 0.0,
68
+ "moe_top_k": 2,
69
+ "moe_top_p": 1.0,
70
+ "moe_reweight_output": true,
71
+ "bias_routing_scale": 0,
72
+ "bias_routing_dim": -1,
73
+ "lora_init_method": "usage-based",
74
+ "gate_init_method": "zero",
75
+ "zeroshot_tolerance": 0.05,
76
+ "upper_bound_tolerance": 0.05,
77
+ "single_lora_gate_train_steps": 200,
78
+ "molora_gate_train_samples": 1000,
79
+ "molora_gate_train_steps": 100,
80
+ "layer_norm_after_train_single_lora": true,
81
+ "cpu_cont": 96,
82
+ "run_output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v4",
83
+ "log_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v4/logs",
84
+ "prediction_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v4/prediction",
85
+ "checkpoint_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v4/checkpoints",
86
+ "finish_flag_file": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v4/exp_completed.txt"
87
+ }
88
+ 2024-09-11 14:17:18,316 - log.txt - [INFO] - Tasks ['p3socialiqa', 'p3wiqa', 'p3cosmosqa', 'p3quail', 'p3quartz', 'p3qasc', 'p3commonsenseqa', 'p3quarel', 'p3dream', 'p3sciq', 'p3wikihop', 'p3ropes', 'p3adversarialqa', 'p3duorc', 'p3quoref', 'p3hotpotqa', 'p3wikiqa', 'p3amazonpolarity', 'p3appreviews', 'p3rottentomatoes', 'p3imdb', 'p3yelp', 'p3agnews', 'p3dbpedia14', 'p3trec', 'p3wikibio', 'p3commongen', 'p3cnndailymail', 'p3multinews', 'p3gigaword', 'p3samsum', 'p3xsum', 'p3paws', 'p3qqp', 'p3mrpc', 'p3hswag', 'p3copa', 'p3storycloze', 'p3cb', 'p3rte', 'p3anlir1', 'p3anlir2', 'p3anlir3', 'p3winogrande', 'p3wscfixed', 'p3wic']
89
+ 2024-09-11 14:17:19,935 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'social_i_qa'] Num Templates: 4 Datasize 128
90
+ 2024-09-11 14:17:22,070 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'wiqa'] Num Templates: 2 Datasize 128
91
+ 2024-09-11 14:17:24,069 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'cosmos_qa'] Num Templates: 10 Datasize 128
92
+ 2024-09-11 14:17:26,907 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'quail'] Num Templates: 10 Datasize 128
93
+ 2024-09-11 14:17:29,561 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'quartz'] Num Templates: 8 Datasize 128
94
+ 2024-09-11 14:17:31,981 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'qasc'] Num Templates: 5 Datasize 128
95
+ 2024-09-11 14:17:34,492 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'commonsense_qa'] Num Templates: 4 Datasize 128
96
+ 2024-09-11 14:17:36,947 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'quarel'] Num Templates: 5 Datasize 128
97
+ 2024-09-11 14:17:38,760 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'dream'] Num Templates: 2 Datasize 128
98
+ 2024-09-11 14:17:41,104 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'sciq'] Num Templates: 4 Datasize 128
99
+ 2024-09-11 14:17:42,704 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'MoE-UNC/wikihop'] Num Templates: 5 Datasize 128
100
+ 2024-09-11 14:17:45,187 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'ropes'] Num Templates: 10 Datasize 128
101
+ 2024-09-11 14:17:47,879 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'adversarial_qa', 'adversarialQA'] Num Templates: 4 Datasize 128
102
+ 2024-09-11 14:17:52,220 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'duorc', 'ParaphraseRC'] Num Templates: 5 Datasize 128
103
+ 2024-09-11 14:17:54,156 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'quoref'] Num Templates: 10 Datasize 128
104
+ 2024-09-11 14:17:55,658 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'hotpot_qa', 'fullwiki'] Num Templates: 5 Datasize 128
105
+ 2024-09-11 14:17:59,481 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'wiki_qa'] Num Templates: 5 Datasize 128
106
+ 2024-09-11 14:18:03,108 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'amazon_polarity'] Num Templates: 9 Datasize 128
107
+ 2024-09-11 14:18:05,068 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'app_reviews'] Num Templates: 1 Datasize 128
108
+ 2024-09-11 14:18:06,970 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'rotten_tomatoes'] Num Templates: 10 Datasize 128
109
+ 2024-09-11 14:18:09,884 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'imdb'] Num Templates: 10 Datasize 128
110
+ 2024-09-11 14:18:12,193 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'yelp_review_full'] Num Templates: 7 Datasize 128
111
+ 2024-09-11 14:18:14,277 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'ag_news'] Num Templates: 7 Datasize 128
112
+ 2024-09-11 14:18:16,545 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'dbpedia_14'] Num Templates: 4 Datasize 128
113
+ 2024-09-11 14:18:18,280 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'trec'] Num Templates: 1 Datasize 100
114
+ 2024-09-11 14:18:21,112 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'wiki_bio'] Num Templates: 1 Datasize 128
115
+ 2024-09-11 14:18:23,877 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'common_gen'] Num Templates: 6 Datasize 128
116
+ 2024-09-11 14:18:27,213 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'cnn_dailymail', '3.0.0'] Num Templates: 7 Datasize 128
117
+ 2024-09-11 14:18:29,546 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'multi_news'] Num Templates: 5 Datasize 128
118
+ 2024-09-11 14:18:32,866 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'gigaword'] Num Templates: 7 Datasize 128
119
+ 2024-09-11 14:18:35,238 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'samsum'] Num Templates: 6 Datasize 128
120
+ 2024-09-11 14:18:37,088 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'xsum'] Num Templates: 10 Datasize 128
121
+ 2024-09-11 14:18:40,582 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'paws', 'labeled_final'] Num Templates: 11 Datasize 128
122
+ 2024-09-11 14:18:43,367 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'glue', 'qqp'] Num Templates: 5 Datasize 128
123
+ 2024-09-11 14:18:45,352 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'glue', 'mrpc'] Num Templates: 5 Datasize 128
124
+ 2024-09-11 14:18:47,112 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'hellaswag'] Num Templates: 4 Datasize 128
125
+ 2024-09-11 14:18:48,387 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'super_glue', 'copa'] Num Templates: 8 Datasize 128
126
+ 2024-09-11 14:18:49,854 - root - [WARNING] - Tried instantiating `DatasetTemplates` for MoE-UNC/story_cloze, but no prompts found. Please ignore this warning if you are creating new prompts for this dataset.
127
+ 2024-09-11 14:18:49,886 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'MoE-UNC/story_cloze'] Num Templates: 5 Datasize 128
128
+ 2024-09-11 14:18:51,053 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'super_glue', 'cb'] Num Templates: 15 Datasize 128
129
+ 2024-09-11 14:18:52,171 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'super_glue', 'rte'] Num Templates: 10 Datasize 128
130
+ 2024-09-11 14:18:56,259 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'anli'] Num Templates: 15 Datasize 128
131
+ 2024-09-11 14:18:59,502 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'anli'] Num Templates: 15 Datasize 128
132
+ 2024-09-11 14:19:03,139 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'anli'] Num Templates: 15 Datasize 128
133
+ 2024-09-11 14:19:04,440 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'winogrande', 'winogrande_xl'] Num Templates: 5 Datasize 128
134
+ 2024-09-11 14:19:05,559 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'super_glue', 'wsc.fixed'] Num Templates: 10 Datasize 128
135
+ 2024-09-11 14:19:06,690 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'super_glue', 'wic'] Num Templates: 10 Datasize 128
136
+ 2024-09-11 14:19:06,926 - sentence_transformers.SentenceTransformer - [INFO] - Use pytorch device_name: cuda
137
+ 2024-09-11 14:19:06,926 - sentence_transformers.SentenceTransformer - [INFO] - Load pretrained SentenceTransformer: nomic-ai/nomic-embed-text-v1.5
138
+ 2024-09-11 14:19:09,181 - transformers_modules.nomic-ai.nomic-bert-2048.4bb68f63016e88e53e48df904c6ab4e6f718e198.modeling_hf_nomic_bert - [WARNING] - <All keys matched successfully>