Phando commited on
Commit
3427225
·
verified ·
1 Parent(s): ff3ab7a

Delete bigbench/logs

Browse files
bigbench/logs/initial_config.json DELETED
@@ -1,85 +0,0 @@
1
- {
2
- "debug": false,
3
- "project_name": "chatgpt-instruction-nomic-embedding",
4
- "name": "t0-bigbench",
5
- "project_dir": "/home/pingzhi/phatgoose-cl/src_simple",
6
- "data_dir": "/nas-hdd/prateek/data",
7
- "output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs",
8
- "config_dir": "/home/pingzhi/phatgoose-cl/src_simple/configs",
9
- "seed": 42,
10
- "hf_write_token": null,
11
- "origin_model": "google/t5-xl-lm-adapt",
12
- "model_class": "seq2seq_lm",
13
- "model_type": "encdec",
14
- "peft_type": "lora",
15
- "load_model_dtype": "float32",
16
- "val_fraction": 0.2,
17
- "dataset": [
18
- "t0-bigbench"
19
- ],
20
- "eval_dataset": null,
21
- "eval_split": "val",
22
- "num_steps": 1500,
23
- "effective_train_batch_size": 128,
24
- "patience": 3,
25
- "verbose": false,
26
- "do_test": false,
27
- "eval_steps": 100,
28
- "save_last": true,
29
- "save_best": true,
30
- "logging_steps": 5,
31
- "gradient_checkpointing": false,
32
- "moe_inference": false,
33
- "inference_batch_size_scale": 1,
34
- "checkpoint_dir_or_path": null,
35
- "cl_checkpoint_path": null,
36
- "load_checkpoint_dataset": null,
37
- "ae_checkpoint_dir": null,
38
- "init_datasets": [
39
- "t0-cl-init1"
40
- ],
41
- "selected_expert_ids": null,
42
- "merge_num_clusters": null,
43
- "global_clustering": false,
44
- "hierarchical_num_clusters": null,
45
- "hierarchical_cluster_token_routing": false,
46
- "save_router_state_dict": false,
47
- "bias_router_embedding_path": null,
48
- "bias_input_embedding_path": null,
49
- "optimizer": "adamw",
50
- "lr": 0.003,
51
- "trainable_param_names": ".*lora.*",
52
- "scheduler": "linear_decay_with_warmup",
53
- "warmup_steps": null,
54
- "warmup_ratio": 0.02,
55
- "weight_decay": 0,
56
- "scale_parameter": true,
57
- "mix_precision": "bf16",
58
- "gradient_clipping": 1.0,
59
- "target_modules": "all-linear",
60
- "lora_rank": 16,
61
- "lora_alpha": 1,
62
- "lora_dropout": 0.0,
63
- "use_rslora": false,
64
- "init_lora_weights": true,
65
- "lora_bias": "none",
66
- "moe_router_aux_loss_coef": 0.0,
67
- "moe_top_k": 2,
68
- "moe_top_p": 1.0,
69
- "moe_reweight_output": true,
70
- "bias_routing_scale": 0,
71
- "bias_routing_dim": -1,
72
- "lora_init_method": "usage-based",
73
- "gate_init_method": "zero",
74
- "zeroshot_tolerance": 0.05,
75
- "upper_bound_tolerance": 0.05,
76
- "single_lora_gate_train_steps": 200,
77
- "molora_gate_train_samples": 1000,
78
- "molora_gate_train_steps": 100,
79
- "layer_norm_after_train_single_lora": true,
80
- "cpu_cont": 96,
81
- "run_output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench",
82
- "log_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/logs",
83
- "prediction_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/prediction",
84
- "checkpoint_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/checkpoints"
85
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bigbench/logs/log.txt DELETED
@@ -1,223 +0,0 @@
1
- 2024-09-11 12:11:43,168 - log.txt - [INFO] - Start experiment chatgpt-instruction-nomic-embedding/t0-bigbench
2
- 2024-09-11 12:11:43,168 - log.txt - [INFO] - {
3
- "debug": false,
4
- "project_name": "chatgpt-instruction-nomic-embedding",
5
- "name": "t0-bigbench",
6
- "project_dir": "/home/pingzhi/phatgoose-cl/src_simple",
7
- "data_dir": "/nas-hdd/prateek/data",
8
- "output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs",
9
- "config_dir": "/home/pingzhi/phatgoose-cl/src_simple/configs",
10
- "seed": 42,
11
- "hf_write_token": null,
12
- "origin_model": "google/t5-xl-lm-adapt",
13
- "model_class": "seq2seq_lm",
14
- "model_type": "encdec",
15
- "peft_type": "lora",
16
- "load_model_dtype": "float32",
17
- "val_fraction": 0.2,
18
- "dataset": [
19
- "t0-bigbench"
20
- ],
21
- "eval_dataset": null,
22
- "eval_split": "val",
23
- "num_steps": 1500,
24
- "effective_train_batch_size": 128,
25
- "patience": 3,
26
- "verbose": false,
27
- "do_test": false,
28
- "eval_steps": 100,
29
- "save_last": true,
30
- "save_best": true,
31
- "logging_steps": 5,
32
- "gradient_checkpointing": false,
33
- "moe_inference": false,
34
- "inference_batch_size_scale": 1,
35
- "checkpoint_dir_or_path": null,
36
- "cl_checkpoint_path": null,
37
- "load_checkpoint_dataset": null,
38
- "ae_checkpoint_dir": null,
39
- "init_datasets": [
40
- "t0-cl-init1"
41
- ],
42
- "selected_expert_ids": null,
43
- "merge_num_clusters": null,
44
- "global_clustering": false,
45
- "hierarchical_num_clusters": null,
46
- "hierarchical_cluster_token_routing": false,
47
- "save_router_state_dict": false,
48
- "bias_router_embedding_path": null,
49
- "bias_input_embedding_path": null,
50
- "optimizer": "adamw",
51
- "lr": 0.003,
52
- "trainable_param_names": ".*lora.*",
53
- "scheduler": "linear_decay_with_warmup",
54
- "warmup_steps": null,
55
- "warmup_ratio": 0.02,
56
- "weight_decay": 0,
57
- "scale_parameter": true,
58
- "mix_precision": "bf16",
59
- "gradient_clipping": 1.0,
60
- "target_modules": "all-linear",
61
- "lora_rank": 16,
62
- "lora_alpha": 1,
63
- "lora_dropout": 0.0,
64
- "use_rslora": false,
65
- "init_lora_weights": true,
66
- "lora_bias": "none",
67
- "moe_router_aux_loss_coef": 0.0,
68
- "moe_top_k": 2,
69
- "moe_top_p": 1.0,
70
- "moe_reweight_output": true,
71
- "bias_routing_scale": 0,
72
- "bias_routing_dim": -1,
73
- "lora_init_method": "usage-based",
74
- "gate_init_method": "zero",
75
- "zeroshot_tolerance": 0.05,
76
- "upper_bound_tolerance": 0.05,
77
- "single_lora_gate_train_steps": 200,
78
- "molora_gate_train_samples": 1000,
79
- "molora_gate_train_steps": 100,
80
- "layer_norm_after_train_single_lora": true,
81
- "cpu_cont": 96,
82
- "run_output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench",
83
- "log_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/logs",
84
- "prediction_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/prediction",
85
- "checkpoint_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/checkpoints",
86
- "finish_flag_file": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/exp_completed.txt"
87
- }
88
- 2024-09-11 12:12:45,792 - log.txt - [INFO] - Start experiment chatgpt-instruction-nomic-embedding/t0-bigbench
89
- 2024-09-11 12:12:45,792 - log.txt - [INFO] - {
90
- "debug": false,
91
- "project_name": "chatgpt-instruction-nomic-embedding",
92
- "name": "t0-bigbench",
93
- "project_dir": "/home/pingzhi/phatgoose-cl/src_simple",
94
- "data_dir": "/nas-hdd/prateek/data",
95
- "output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs",
96
- "config_dir": "/home/pingzhi/phatgoose-cl/src_simple/configs",
97
- "seed": 42,
98
- "hf_write_token": null,
99
- "origin_model": "google/t5-xl-lm-adapt",
100
- "model_class": "seq2seq_lm",
101
- "model_type": "encdec",
102
- "peft_type": "lora",
103
- "load_model_dtype": "float32",
104
- "val_fraction": 0.2,
105
- "dataset": [
106
- "t0-bigbench"
107
- ],
108
- "eval_dataset": null,
109
- "eval_split": "val",
110
- "num_steps": 1500,
111
- "effective_train_batch_size": 128,
112
- "patience": 3,
113
- "verbose": false,
114
- "do_test": false,
115
- "eval_steps": 100,
116
- "save_last": true,
117
- "save_best": true,
118
- "logging_steps": 5,
119
- "gradient_checkpointing": false,
120
- "moe_inference": false,
121
- "inference_batch_size_scale": 1,
122
- "checkpoint_dir_or_path": null,
123
- "cl_checkpoint_path": null,
124
- "load_checkpoint_dataset": null,
125
- "ae_checkpoint_dir": null,
126
- "init_datasets": [
127
- "t0-cl-init1"
128
- ],
129
- "selected_expert_ids": null,
130
- "merge_num_clusters": null,
131
- "global_clustering": false,
132
- "hierarchical_num_clusters": null,
133
- "hierarchical_cluster_token_routing": false,
134
- "save_router_state_dict": false,
135
- "bias_router_embedding_path": null,
136
- "bias_input_embedding_path": null,
137
- "optimizer": "adamw",
138
- "lr": 0.003,
139
- "trainable_param_names": ".*lora.*",
140
- "scheduler": "linear_decay_with_warmup",
141
- "warmup_steps": null,
142
- "warmup_ratio": 0.02,
143
- "weight_decay": 0,
144
- "scale_parameter": true,
145
- "mix_precision": "bf16",
146
- "gradient_clipping": 1.0,
147
- "target_modules": "all-linear",
148
- "lora_rank": 16,
149
- "lora_alpha": 1,
150
- "lora_dropout": 0.0,
151
- "use_rslora": false,
152
- "init_lora_weights": true,
153
- "lora_bias": "none",
154
- "moe_router_aux_loss_coef": 0.0,
155
- "moe_top_k": 2,
156
- "moe_top_p": 1.0,
157
- "moe_reweight_output": true,
158
- "bias_routing_scale": 0,
159
- "bias_routing_dim": -1,
160
- "lora_init_method": "usage-based",
161
- "gate_init_method": "zero",
162
- "zeroshot_tolerance": 0.05,
163
- "upper_bound_tolerance": 0.05,
164
- "single_lora_gate_train_steps": 200,
165
- "molora_gate_train_samples": 1000,
166
- "molora_gate_train_steps": 100,
167
- "layer_norm_after_train_single_lora": true,
168
- "cpu_cont": 96,
169
- "run_output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench",
170
- "log_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/logs",
171
- "prediction_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/prediction",
172
- "checkpoint_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/checkpoints",
173
- "finish_flag_file": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/exp_completed.txt"
174
- }
175
- 2024-09-11 12:12:52,740 - log.txt - [INFO] - Tasks ['bbbooleanexpressions', 'bbcausaljudgement', 'bbdateunderstanding', 'bbdisambiguationqa', 'bbdycklanguages', 'bbformalfallacies', 'bbgeometricshapes', 'bbhyperbaton', 'bblogicaldeduction', 'bbmovierecommendation', 'bbmultisteparithmetictwo', 'bbnavigate', 'bbobjectcounting', 'bbpenguinsinatable', 'bbreasoningaboutcoloredobjects', 'bbruinnames', 'bbsalienttranslationerrordetection', 'bbsnarks', 'bbsportsunderstanding', 'bbtemporalsequences', 'bbtrackingshuffledobjects', 'bbweboflies', 'bbwordsorting', 'bbautodebugging', 'bbbbqlitejson', 'bbcodelinedescription', 'bbconceptualcombinations', 'bbconlangtranslation', 'bbemojimovie', 'bbhinduknowledge', 'bbknownunknowns', 'bblanguageidentification', 'bblinguisticspuzzles', 'bblogicgridpuzzle', 'bbmisconceptionsrussian', 'bbnovelconcepts', 'bboperators', 'bbparsinlureadingcomprehension', 'bbplaydialogsameordifferent', 'bbrepeatcopylogic', 'bbstrangestories', 'bbstrategyqa', 'bbsymbolinterpretation', 'bbvitamincfactverification', 'bbwinowhy']
176
- 2024-09-11 12:12:53,268 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'lukaemon/bbh', 'boolean_expressions'] Datasize 128
177
- 2024-09-11 12:12:54,203 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'causal_judgment'] Datasize 38
178
- 2024-09-11 12:12:55,111 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'date_understanding'] Datasize 73
179
- 2024-09-11 12:12:55,785 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'disambiguation_qa'] Datasize 51
180
- 2024-09-11 12:12:56,465 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'dyck_languages'] Datasize 128
181
- 2024-09-11 12:12:57,202 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'formal_fallacies_syllogisms_negation'] Datasize 128
182
- 2024-09-11 12:12:57,833 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'geometric_shapes'] Datasize 71
183
- 2024-09-11 12:12:58,288 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'hyperbaton'] Datasize 128
184
- 2024-09-11 12:12:58,944 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'logical_deduction'] Datasize 128
185
- 2024-09-11 12:12:59,695 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'movie_recommendation'] Datasize 100
186
- 2024-09-11 12:13:00,333 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'lukaemon/bbh', 'multistep_arithmetic_two'] Datasize 128
187
- 2024-09-11 12:13:01,013 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'navigate'] Datasize 128
188
- 2024-09-11 12:13:01,652 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'object_counting'] Datasize 128
189
- 2024-09-11 12:13:02,102 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'penguins_in_a_table'] Datasize 29
190
- 2024-09-11 12:13:03,039 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'reasoning_about_colored_objects'] Datasize 128
191
- 2024-09-11 12:13:03,485 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'ruin_names'] Datasize 89
192
- 2024-09-11 12:13:04,229 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'salient_translation_error_detection'] Datasize 128
193
- 2024-09-11 12:13:04,895 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'snarks'] Datasize 36
194
- 2024-09-11 12:13:05,470 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'sports_understanding'] Datasize 128
195
- 2024-09-11 12:13:06,097 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'temporal_sequences'] Datasize 128
196
- 2024-09-11 12:13:06,888 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'tracking_shuffled_objects'] Datasize 128
197
- 2024-09-11 12:13:07,509 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'lukaemon/bbh', 'web_of_lies'] Datasize 128
198
- 2024-09-11 12:13:08,207 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'word_sorting'] Datasize 128
199
- 2024-09-11 12:13:08,872 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'auto_debugging'] Datasize 16
200
- 2024-09-11 12:13:09,336 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'bbq_lite_json'] Datasize 128
201
- 2024-09-11 12:13:10,051 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'code_line_description'] Datasize 16
202
- 2024-09-11 12:13:10,666 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'conceptual_combinations'] Datasize 19
203
- 2024-09-11 12:13:11,334 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'conlang_translation'] Datasize 32
204
- 2024-09-11 12:13:12,346 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'emoji_movie'] Datasize 20
205
- 2024-09-11 12:13:13,003 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'hindu_knowledge'] Datasize 35
206
- 2024-09-11 12:13:13,628 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'known_unknowns'] Datasize 16
207
- 2024-09-11 12:13:14,295 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'language_identification'] Datasize 128
208
- 2024-09-11 12:13:14,927 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'linguistics_puzzles'] Datasize 128
209
- 2024-09-11 12:13:15,592 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'logic_grid_puzzle'] Datasize 128
210
- 2024-09-11 12:13:16,266 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'misconceptions_russian'] Datasize 16
211
- 2024-09-11 12:13:17,076 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'novel_concepts'] Datasize 16
212
- 2024-09-11 12:13:17,559 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'operators'] Datasize 42
213
- 2024-09-11 12:13:19,634 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'parsinlu_reading_comprehension'] Datasize 103
214
- 2024-09-11 12:13:20,109 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'play_dialog_same_or_different'] Datasize 128
215
- 2024-09-11 12:13:20,768 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'repeat_copy_logic'] Datasize 16
216
- 2024-09-11 12:13:21,397 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'strange_stories'] Datasize 34
217
- 2024-09-11 12:13:22,057 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'strategyqa'] Datasize 128
218
- 2024-09-11 12:13:22,720 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'symbol_interpretation'] Datasize 128
219
- 2024-09-11 12:13:23,385 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'vitaminc_fact_verification'] Datasize 128
220
- 2024-09-11 12:13:24,006 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'tasksource/bigbench', 'winowhy'] Datasize 128
221
- 2024-09-11 12:13:24,210 - sentence_transformers.SentenceTransformer - [INFO] - Use pytorch device_name: cuda
222
- 2024-09-11 12:13:24,210 - sentence_transformers.SentenceTransformer - [INFO] - Load pretrained SentenceTransformer: nomic-ai/nomic-embed-text-v1.5
223
- 2024-09-11 12:13:31,883 - transformers_modules.nomic-ai.nomic-bert-2048.4bb68f63016e88e53e48df904c6ab4e6f718e198.modeling_hf_nomic_bert - [WARNING] - <All keys matched successfully>