2024-09-11 12:31:08,919 - log.txt - [INFO] - Start experiment chatgpt-instruction-nomic-embedding/t0-v3 2024-09-11 12:31:08,919 - log.txt - [INFO] - { "debug": false, "project_name": "chatgpt-instruction-nomic-embedding", "name": "t0-v3", "project_dir": "/home/pingzhi/phatgoose-cl/src_simple", "data_dir": "/nas-hdd/prateek/data", "output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs", "config_dir": "/home/pingzhi/phatgoose-cl/src_simple/configs", "seed": 42, "hf_write_token": null, "origin_model": "google/t5-xl-lm-adapt", "model_class": "seq2seq_lm", "model_type": "encdec", "peft_type": "lora", "load_model_dtype": "float32", "val_fraction": 0.2, "dataset": [ "t0" ], "eval_dataset": null, "eval_split": "val", "num_steps": 1500, "effective_train_batch_size": 128, "patience": 3, "verbose": false, "do_test": false, "eval_steps": 100, "save_last": true, "save_best": true, "logging_steps": 5, "gradient_checkpointing": false, "moe_inference": false, "inference_batch_size_scale": 1, "checkpoint_dir_or_path": null, "cl_checkpoint_path": null, "load_checkpoint_dataset": null, "ae_checkpoint_dir": null, "init_datasets": [ "t0-cl-init1" ], "selected_expert_ids": null, "merge_num_clusters": null, "global_clustering": false, "hierarchical_num_clusters": null, "hierarchical_cluster_token_routing": false, "save_router_state_dict": false, "bias_router_embedding_path": null, "bias_input_embedding_path": null, "optimizer": "adamw", "lr": 0.003, "trainable_param_names": ".*lora.*", "scheduler": "linear_decay_with_warmup", "warmup_steps": null, "warmup_ratio": 0.02, "weight_decay": 0, "scale_parameter": true, "mix_precision": "bf16", "gradient_clipping": 1.0, "target_modules": "all-linear", "lora_rank": 16, "lora_alpha": 1, "lora_dropout": 0.0, "use_rslora": false, "init_lora_weights": true, "lora_bias": "none", "moe_router_aux_loss_coef": 0.0, "moe_top_k": 2, "moe_top_p": 1.0, "moe_reweight_output": true, "bias_routing_scale": 0, "bias_routing_dim": -1, "lora_init_method": "usage-based", "gate_init_method": "zero", "zeroshot_tolerance": 0.05, "upper_bound_tolerance": 0.05, "single_lora_gate_train_steps": 200, "molora_gate_train_samples": 1000, "molora_gate_train_steps": 100, "layer_norm_after_train_single_lora": true, "cpu_cont": 96, "run_output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v3", "log_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v3/logs", "prediction_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v3/prediction", "checkpoint_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v3/checkpoints", "finish_flag_file": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v3/exp_completed.txt" } 2024-09-11 12:31:16,181 - log.txt - [INFO] - Tasks ['p3socialiqa', 'p3wiqa', 'p3cosmosqa', 'p3quail', 'p3quartz', 'p3qasc', 'p3commonsenseqa', 'p3quarel', 'p3dream', 'p3sciq', 'p3wikihop', 'p3ropes', 'p3adversarialqa', 'p3duorc', 'p3quoref', 'p3hotpotqa', 'p3wikiqa', 'p3amazonpolarity', 'p3appreviews', 'p3rottentomatoes', 'p3imdb', 'p3yelp', 'p3agnews', 'p3dbpedia14', 'p3trec', 'p3wikibio', 'p3commongen', 'p3cnndailymail', 'p3multinews', 'p3gigaword', 'p3samsum', 'p3xsum', 'p3paws', 'p3qqp', 'p3mrpc', 'p3hswag', 'p3copa', 'p3storycloze', 'p3cb', 'p3rte', 'p3anlir1', 'p3anlir2', 'p3anlir3', 'p3winogrande', 'p3wscfixed', 'p3wic'] 2024-09-11 12:31:19,691 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'social_i_qa'] Num Templates: 4 Datasize 128 2024-09-11 12:31:27,056 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'wiqa'] Num Templates: 2 Datasize 128 2024-09-11 12:31:36,032 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'cosmos_qa'] Num Templates: 10 Datasize 128 2024-09-11 12:31:41,061 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'quail'] Num Templates: 10 Datasize 128 2024-09-11 12:31:45,655 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'quartz'] Num Templates: 8 Datasize 128 2024-09-11 12:31:49,910 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'qasc'] Num Templates: 5 Datasize 128 2024-09-11 12:31:53,940 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'commonsense_qa'] Num Templates: 4 Datasize 128 2024-09-11 12:31:59,871 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'quarel'] Num Templates: 5 Datasize 128 2024-09-11 12:32:03,248 - datasets_modules.datasets.dream.0835c7949b04e4dc7d094375c7b502ae12c6b17dae8e715d8c363257a391545a.dream - [INFO] - ⏳ Generating examples from = /home/pingzhi/.cache/huggingface/datasets/downloads/dbb86d6157ce35037d870dd075e10618881304a1fc78d42a73fff127aba929db 2024-09-11 12:32:03,875 - datasets_modules.datasets.dream.0835c7949b04e4dc7d094375c7b502ae12c6b17dae8e715d8c363257a391545a.dream - [INFO] - ⏳ Generating examples from = /home/pingzhi/.cache/huggingface/datasets/downloads/3c262ee1f86e2b935d3198f592e78680f0d2c509ce07037e657c4ccdd111fb17 2024-09-11 12:32:03,980 - datasets_modules.datasets.dream.0835c7949b04e4dc7d094375c7b502ae12c6b17dae8e715d8c363257a391545a.dream - [INFO] - ⏳ Generating examples from = /home/pingzhi/.cache/huggingface/datasets/downloads/1df6bad6ef1eba61e7a1b53e1a7a8b9213070120a2576e235726c41d68775bd1 2024-09-11 12:32:04,129 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'dream'] Num Templates: 2 Datasize 128 2024-09-11 12:32:07,731 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'sciq'] Num Templates: 4 Datasize 128 2024-09-11 12:32:17,431 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'MoE-UNC/wikihop'] Num Templates: 5 Datasize 128 2024-09-11 12:32:21,595 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'ropes'] Num Templates: 10 Datasize 128 2024-09-11 12:32:26,485 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'adversarial_qa', 'adversarialQA'] Num Templates: 4 Datasize 128 2024-09-11 12:32:34,160 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'duorc', 'ParaphraseRC'] Num Templates: 5 Datasize 128 2024-09-11 12:32:39,060 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'quoref'] Num Templates: 10 Datasize 128 2024-09-11 12:33:11,624 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'hotpot_qa', 'fullwiki'] Num Templates: 5 Datasize 128 2024-09-11 12:33:15,430 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'wiki_qa'] Num Templates: 5 Datasize 128 2024-09-11 12:33:46,127 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'amazon_polarity'] Num Templates: 9 Datasize 128 2024-09-11 12:33:49,335 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'app_reviews'] Num Templates: 1 Datasize 128 2024-09-11 12:33:52,788 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'rotten_tomatoes'] Num Templates: 10 Datasize 128 2024-09-11 12:33:58,154 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'imdb'] Num Templates: 10 Datasize 128 2024-09-11 12:34:12,928 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'yelp_review_full'] Num Templates: 7 Datasize 128 2024-09-11 12:34:16,251 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'ag_news'] Num Templates: 7 Datasize 128 2024-09-11 12:34:23,493 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'dbpedia_14'] Num Templates: 4 Datasize 128 2024-09-11 12:34:26,596 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'trec'] Num Templates: 1 Datasize 100 2024-09-11 12:36:25,678 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'wiki_bio'] Num Templates: 1 Datasize 128 2024-09-11 12:36:31,310 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'common_gen'] Num Templates: 6 Datasize 128 2024-09-11 12:36:54,360 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'cnn_dailymail', '3.0.0'] Num Templates: 7 Datasize 128 2024-09-11 12:37:18,511 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'multi_news'] Num Templates: 5 Datasize 128 2024-09-11 12:38:48,856 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'gigaword'] Num Templates: 7 Datasize 128 2024-09-11 12:38:53,303 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'samsum'] Num Templates: 6 Datasize 128 2024-09-11 12:39:56,978 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'xsum'] Num Templates: 10 Datasize 128 2024-09-11 12:40:02,461 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'paws', 'labeled_final'] Num Templates: 11 Datasize 128 2024-09-11 12:40:08,618 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'glue', 'qqp'] Num Templates: 5 Datasize 128 2024-09-11 12:40:11,539 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'glue', 'mrpc'] Num Templates: 5 Datasize 128 2024-09-11 12:40:24,009 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'hellaswag'] Num Templates: 4 Datasize 128 2024-09-11 12:40:26,356 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'super_glue', 'copa'] Num Templates: 8 Datasize 128 2024-09-11 12:40:30,240 - root - [WARNING] - Tried instantiating `DatasetTemplates` for MoE-UNC/story_cloze, but no prompts found. Please ignore this warning if you are creating new prompts for this dataset. 2024-09-11 12:40:30,274 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'MoE-UNC/story_cloze'] Num Templates: 5 Datasize 128 2024-09-11 12:40:31,536 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'super_glue', 'cb'] Num Templates: 15 Datasize 128 2024-09-11 12:40:33,106 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'super_glue', 'rte'] Num Templates: 10 Datasize 128 2024-09-11 12:40:43,150 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'anli'] Num Templates: 15 Datasize 128 2024-09-11 12:40:46,111 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'anli'] Num Templates: 15 Datasize 128 2024-09-11 12:40:49,644 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'anli'] Num Templates: 15 Datasize 128 2024-09-11 12:40:52,538 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'winogrande', 'winogrande_xl'] Num Templates: 5 Datasize 128 2024-09-11 12:40:53,754 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'super_glue', 'wsc.fixed'] Num Templates: 10 Datasize 128 2024-09-11 12:40:56,186 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'super_glue', 'wic'] Num Templates: 10 Datasize 128 2024-09-11 12:40:56,448 - sentence_transformers.SentenceTransformer - [INFO] - Use pytorch device_name: cuda 2024-09-11 12:40:56,448 - sentence_transformers.SentenceTransformer - [INFO] - Load pretrained SentenceTransformer: nomic-ai/nomic-embed-text-v1.5 2024-09-11 12:40:58,768 - transformers_modules.nomic-ai.nomic-bert-2048.4bb68f63016e88e53e48df904c6ab4e6f718e198.modeling_hf_nomic_bert - [WARNING] -