MoE-UNC
/

gpt-generated-instruction-nomic-embeddings

Model card Files Files and versions Community

Phando commited on Oct 19, 2024

Commit

3427225

verified ·

1 Parent(s): ff3ab7a

Delete bigbench/logs

Browse files

Files changed (2) hide show

bigbench/logs/initial_config.json +0 -85
bigbench/logs/log.txt +0 -223

bigbench/logs/initial_config.json DELETED Viewed

@@ -1,85 +0,0 @@
-{
-    "debug": false,
-    "project_name": "chatgpt-instruction-nomic-embedding",
-    "name": "t0-bigbench",
-    "project_dir": "/home/pingzhi/phatgoose-cl/src_simple",
-    "data_dir": "/nas-hdd/prateek/data",
-    "output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs",
-    "config_dir": "/home/pingzhi/phatgoose-cl/src_simple/configs",
-    "seed": 42,
-    "hf_write_token": null,
-    "origin_model": "google/t5-xl-lm-adapt",
-    "model_class": "seq2seq_lm",
-    "model_type": "encdec",
-    "peft_type": "lora",
-    "load_model_dtype": "float32",
-    "val_fraction": 0.2,
-    "dataset": [
-        "t0-bigbench"
-    ],
-    "eval_dataset": null,
-    "eval_split": "val",
-    "num_steps": 1500,
-    "effective_train_batch_size": 128,
-    "patience": 3,
-    "verbose": false,
-    "do_test": false,
-    "eval_steps": 100,
-    "save_last": true,
-    "save_best": true,
-    "logging_steps": 5,
-    "gradient_checkpointing": false,
-    "moe_inference": false,
-    "inference_batch_size_scale": 1,
-    "checkpoint_dir_or_path": null,
-    "cl_checkpoint_path": null,
-    "load_checkpoint_dataset": null,
-    "ae_checkpoint_dir": null,
-    "init_datasets": [
-        "t0-cl-init1"
-    ],
-    "selected_expert_ids": null,
-    "merge_num_clusters": null,
-    "global_clustering": false,
-    "hierarchical_num_clusters": null,
-    "hierarchical_cluster_token_routing": false,
-    "save_router_state_dict": false,
-    "bias_router_embedding_path": null,
-    "bias_input_embedding_path": null,
-    "optimizer": "adamw",
-    "lr": 0.003,
-    "trainable_param_names": ".*lora.*",
-    "scheduler": "linear_decay_with_warmup",
-    "warmup_steps": null,
-    "warmup_ratio": 0.02,
-    "weight_decay": 0,
-    "scale_parameter": true,
-    "mix_precision": "bf16",
-    "gradient_clipping": 1.0,
-    "target_modules": "all-linear",
-    "lora_rank": 16,
-    "lora_alpha": 1,
-    "lora_dropout": 0.0,
-    "use_rslora": false,
-    "init_lora_weights": true,
-    "lora_bias": "none",
-    "moe_router_aux_loss_coef": 0.0,
-    "moe_top_k": 2,
-    "moe_top_p": 1.0,
-    "moe_reweight_output": true,
-    "bias_routing_scale": 0,
-    "bias_routing_dim": -1,
-    "lora_init_method": "usage-based",
-    "gate_init_method": "zero",
-    "zeroshot_tolerance": 0.05,
-    "upper_bound_tolerance": 0.05,
-    "single_lora_gate_train_steps": 200,
-    "molora_gate_train_samples": 1000,
-    "molora_gate_train_steps": 100,
-    "layer_norm_after_train_single_lora": true,
-    "cpu_cont": 96,
-    "run_output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench",
-    "log_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/logs",
-    "prediction_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/prediction",
-    "checkpoint_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/checkpoints"
-}

bigbench/logs/log.txt DELETED Viewed

@@ -1,223 +0,0 @@
-2024-09-11 12:11:43,168 - log.txt - [INFO] - Start experiment chatgpt-instruction-nomic-embedding/t0-bigbench
-2024-09-11 12:11:43,168 - log.txt - [INFO] - {
-    "debug": false,
-    "project_name": "chatgpt-instruction-nomic-embedding",
-    "name": "t0-bigbench",
-    "project_dir": "/home/pingzhi/phatgoose-cl/src_simple",
-    "data_dir": "/nas-hdd/prateek/data",
-    "output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs",
-    "config_dir": "/home/pingzhi/phatgoose-cl/src_simple/configs",
-    "seed": 42,
-    "hf_write_token": null,
-    "origin_model": "google/t5-xl-lm-adapt",
-    "model_class": "seq2seq_lm",
-    "model_type": "encdec",
-    "peft_type": "lora",
-    "load_model_dtype": "float32",
-    "val_fraction": 0.2,
-    "dataset": [
-        "t0-bigbench"
-    ],
-    "eval_dataset": null,
-    "eval_split": "val",
-    "num_steps": 1500,
-    "effective_train_batch_size": 128,
-    "patience": 3,
-    "verbose": false,
-    "do_test": false,
-    "eval_steps": 100,
-    "save_last": true,
-    "save_best": true,
-    "logging_steps": 5,
-    "gradient_checkpointing": false,
-    "moe_inference": false,
-    "inference_batch_size_scale": 1,
-    "checkpoint_dir_or_path": null,
-    "cl_checkpoint_path": null,
-    "load_checkpoint_dataset": null,
-    "ae_checkpoint_dir": null,
-    "init_datasets": [
-        "t0-cl-init1"
-    ],
-    "selected_expert_ids": null,
-    "merge_num_clusters": null,
-    "global_clustering": false,
-    "hierarchical_num_clusters": null,
-    "hierarchical_cluster_token_routing": false,
-    "save_router_state_dict": false,
-    "bias_router_embedding_path": null,
-    "bias_input_embedding_path": null,
-    "optimizer": "adamw",
-    "lr": 0.003,
-    "trainable_param_names": ".*lora.*",
-    "scheduler": "linear_decay_with_warmup",
-    "warmup_steps": null,
-    "warmup_ratio": 0.02,
-    "weight_decay": 0,
-    "scale_parameter": true,
-    "mix_precision": "bf16",
-    "gradient_clipping": 1.0,
-    "target_modules": "all-linear",
-    "lora_rank": 16,
-    "lora_alpha": 1,
-    "lora_dropout": 0.0,
-    "use_rslora": false,
-    "init_lora_weights": true,
-    "lora_bias": "none",
-    "moe_router_aux_loss_coef": 0.0,
-    "moe_top_k": 2,
-    "moe_top_p": 1.0,
-    "moe_reweight_output": true,
-    "bias_routing_scale": 0,
-    "bias_routing_dim": -1,
-    "lora_init_method": "usage-based",
-    "gate_init_method": "zero",
-    "zeroshot_tolerance": 0.05,
-    "upper_bound_tolerance": 0.05,
-    "single_lora_gate_train_steps": 200,
-    "molora_gate_train_samples": 1000,
-    "molora_gate_train_steps": 100,
-    "layer_norm_after_train_single_lora": true,
-    "cpu_cont": 96,
-    "run_output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench",
-    "log_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/logs",
-    "prediction_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/prediction",
-    "checkpoint_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/checkpoints",
-    "finish_flag_file": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/exp_completed.txt"
-}
-2024-09-11 12:12:45,792 - log.txt - [INFO] - Start experiment chatgpt-instruction-nomic-embedding/t0-bigbench
-2024-09-11 12:12:45,792 - log.txt - [INFO] - {
-    "debug": false,
-    "project_name": "chatgpt-instruction-nomic-embedding",
-    "name": "t0-bigbench",
-    "project_dir": "/home/pingzhi/phatgoose-cl/src_simple",
-    "data_dir": "/nas-hdd/prateek/data",
-    "output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs",
-    "config_dir": "/home/pingzhi/phatgoose-cl/src_simple/configs",
-    "seed": 42,
-    "hf_write_token": null,
-    "origin_model": "google/t5-xl-lm-adapt",
-    "model_class": "seq2seq_lm",
-    "model_type": "encdec",
-    "peft_type": "lora",
-    "load_model_dtype": "float32",
-    "val_fraction": 0.2,
-    "dataset": [
-        "t0-bigbench"
-    ],
-    "eval_dataset": null,
-    "eval_split": "val",
-    "num_steps": 1500,
-    "effective_train_batch_size": 128,
-    "patience": 3,
-    "verbose": false,
-    "do_test": false,
-    "eval_steps": 100,
-    "save_last": true,
-    "save_best": true,
-    "logging_steps": 5,
-    "gradient_checkpointing": false,
-    "moe_inference": false,
-    "inference_batch_size_scale": 1,
-    "checkpoint_dir_or_path": null,
-    "cl_checkpoint_path": null,
-    "load_checkpoint_dataset": null,
-    "ae_checkpoint_dir": null,
-    "init_datasets": [
-        "t0-cl-init1"
-    ],
-    "selected_expert_ids": null,
-    "merge_num_clusters": null,
-    "global_clustering": false,
-    "hierarchical_num_clusters": null,
-    "hierarchical_cluster_token_routing": false,
-    "save_router_state_dict": false,
-    "bias_router_embedding_path": null,
-    "bias_input_embedding_path": null,
-    "optimizer": "adamw",
-    "lr": 0.003,
-    "trainable_param_names": ".*lora.*",
-    "scheduler": "linear_decay_with_warmup",
-    "warmup_steps": null,
-    "warmup_ratio": 0.02,
-    "weight_decay": 0,
-    "scale_parameter": true,
-    "mix_precision": "bf16",
-    "gradient_clipping": 1.0,
-    "target_modules": "all-linear",
-    "lora_rank": 16,
-    "lora_alpha": 1,
-    "lora_dropout": 0.0,
-    "use_rslora": false,
-    "init_lora_weights": true,
-    "lora_bias": "none",
-    "moe_router_aux_loss_coef": 0.0,
-    "moe_top_k": 2,
-    "moe_top_p": 1.0,
-    "moe_reweight_output": true,
-    "bias_routing_scale": 0,
-    "bias_routing_dim": -1,
-    "lora_init_method": "usage-based",
-    "gate_init_method": "zero",
-    "zeroshot_tolerance": 0.05,
-    "upper_bound_tolerance": 0.05,
-    "single_lora_gate_train_steps": 200,
-    "molora_gate_train_samples": 1000,
-    "molora_gate_train_steps": 100,
-    "layer_norm_after_train_single_lora": true,
-    "cpu_cont": 96,
-    "run_output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench",
-    "log_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/logs",
-    "prediction_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/prediction",
-    "checkpoint_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/checkpoints",
-    "finish_flag_file": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-bigbench/exp_completed.txt"
-}
-2024-09-11 12:12:52,740 - log.txt - [INFO] - Tasks	['bbbooleanexpressions', 'bbcausaljudgement', 'bbdateunderstanding', 'bbdisambiguationqa', 'bbdycklanguages', 'bbformalfallacies', 'bbgeometricshapes', 'bbhyperbaton', 'bblogicaldeduction', 'bbmovierecommendation', 'bbmultisteparithmetictwo', 'bbnavigate', 'bbobjectcounting', 'bbpenguinsinatable', 'bbreasoningaboutcoloredobjects', 'bbruinnames', 'bbsalienttranslationerrordetection', 'bbsnarks', 'bbsportsunderstanding', 'bbtemporalsequences', 'bbtrackingshuffledobjects', 'bbweboflies', 'bbwordsorting', 'bbautodebugging', 'bbbbqlitejson', 'bbcodelinedescription', 'bbconceptualcombinations', 'bbconlangtranslation', 'bbemojimovie', 'bbhinduknowledge', 'bbknownunknowns', 'bblanguageidentification', 'bblinguisticspuzzles', 'bblogicgridpuzzle', 'bbmisconceptionsrussian', 'bbnovelconcepts', 'bboperators', 'bbparsinlureadingcomprehension', 'bbplaydialogsameordifferent', 'bbrepeatcopylogic', 'bbstrangestories', 'bbstrategyqa', 'bbsymbolinterpretation', 'bbvitamincfactverification', 'bbwinowhy']
-2024-09-11 12:12:53,268 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'lukaemon/bbh', 'boolean_expressions']	 Datasize 128
-2024-09-11 12:12:54,203 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'causal_judgment']	 Datasize 38
-2024-09-11 12:12:55,111 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'date_understanding']	 Datasize 73
-2024-09-11 12:12:55,785 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'disambiguation_qa']	 Datasize 51
-2024-09-11 12:12:56,465 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'dyck_languages']	 Datasize 128
-2024-09-11 12:12:57,202 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'formal_fallacies_syllogisms_negation']	 Datasize 128
-2024-09-11 12:12:57,833 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'geometric_shapes']	 Datasize 71
-2024-09-11 12:12:58,288 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'hyperbaton']	 Datasize 128
-2024-09-11 12:12:58,944 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'logical_deduction']	 Datasize 128
-2024-09-11 12:12:59,695 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'movie_recommendation']	 Datasize 100
-2024-09-11 12:13:00,333 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'lukaemon/bbh', 'multistep_arithmetic_two']	 Datasize 128
-2024-09-11 12:13:01,013 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'navigate']	 Datasize 128
-2024-09-11 12:13:01,652 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'object_counting']	 Datasize 128
-2024-09-11 12:13:02,102 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'penguins_in_a_table']	 Datasize 29
-2024-09-11 12:13:03,039 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'reasoning_about_colored_objects']	 Datasize 128
-2024-09-11 12:13:03,485 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'ruin_names']	 Datasize 89
-2024-09-11 12:13:04,229 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'salient_translation_error_detection']	 Datasize 128
-2024-09-11 12:13:04,895 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'snarks']	 Datasize 36
-2024-09-11 12:13:05,470 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'sports_understanding']	 Datasize 128
-2024-09-11 12:13:06,097 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'temporal_sequences']	 Datasize 128
-2024-09-11 12:13:06,888 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'tracking_shuffled_objects']	 Datasize 128
-2024-09-11 12:13:07,509 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'lukaemon/bbh', 'web_of_lies']	 Datasize 128
-2024-09-11 12:13:08,207 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'word_sorting']	 Datasize 128
-2024-09-11 12:13:08,872 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'auto_debugging']	 Datasize 16
-2024-09-11 12:13:09,336 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'bbq_lite_json']	 Datasize 128
-2024-09-11 12:13:10,051 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'code_line_description']	 Datasize 16
-2024-09-11 12:13:10,666 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'conceptual_combinations']	 Datasize 19
-2024-09-11 12:13:11,334 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'conlang_translation']	 Datasize 32
-2024-09-11 12:13:12,346 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'emoji_movie']	 Datasize 20
-2024-09-11 12:13:13,003 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'hindu_knowledge']	 Datasize 35
-2024-09-11 12:13:13,628 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'known_unknowns']	 Datasize 16
-2024-09-11 12:13:14,295 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'language_identification']	 Datasize 128
-2024-09-11 12:13:14,927 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'linguistics_puzzles']	 Datasize 128
-2024-09-11 12:13:15,592 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'logic_grid_puzzle']	 Datasize 128
-2024-09-11 12:13:16,266 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'misconceptions_russian']	 Datasize 16
-2024-09-11 12:13:17,076 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'novel_concepts']	 Datasize 16
-2024-09-11 12:13:17,559 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'operators']	 Datasize 42
-2024-09-11 12:13:19,634 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'parsinlu_reading_comprehension']	 Datasize 103
-2024-09-11 12:13:20,109 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'play_dialog_same_or_different']	 Datasize 128
-2024-09-11 12:13:20,768 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'repeat_copy_logic']	 Datasize 16
-2024-09-11 12:13:21,397 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'strange_stories']	 Datasize 34
-2024-09-11 12:13:22,057 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'strategyqa']	 Datasize 128
-2024-09-11 12:13:22,720 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'symbol_interpretation']	 Datasize 128
-2024-09-11 12:13:23,385 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'vitaminc_fact_verification']	 Datasize 128
-2024-09-11 12:13:24,006 - log.txt - [INFO] - Val	Dataset Path: ['huggingface', 'tasksource/bigbench', 'winowhy']	 Datasize 128
-2024-09-11 12:13:24,210 - sentence_transformers.SentenceTransformer - [INFO] - Use pytorch device_name: cuda
-2024-09-11 12:13:24,210 - sentence_transformers.SentenceTransformer - [INFO] - Load pretrained SentenceTransformer: nomic-ai/nomic-embed-text-v1.5
-2024-09-11 12:13:31,883 - transformers_modules.nomic-ai.nomic-bert-2048.4bb68f63016e88e53e48df904c6ab4e6f718e198.modeling_hf_nomic_bert - [WARNING] - <All keys matched successfully>