Spaces:
Runtime error
Runtime error
File size: 3,008 Bytes
8f3eda5 a9853a7 8f3eda5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
from omegaconf import OmegaConf
from torch.cuda import is_available as use_cuda
model_config = {
"name": "google/t5-large-ssm-nq",
"class_name": "AutoModelForSeq2SeqLM",
"tokenizer_class": "AutoTokenizer",
"tokenizer_name": "google/t5-large-ssm-nq",
"inner_params": [
"encoder.block.22.layer.1.DenseReluDense.wi.weight",
"encoder.block.22.layer.1.DenseReluDense.wo.weight",
"encoder.block.23.layer.1.DenseReluDense.wi.weight",
"encoder.block.23.layer.1.DenseReluDense.wo.weight",
"decoder.block.22.layer.2.DenseReluDense.wi.weight",
"decoder.block.22.layer.2.DenseReluDense.wo.weight",
"decoder.block.23.layer.2.DenseReluDense.wi.weight",
"decoder.block.23.layer.2.DenseReluDense.wo.weight",
],
"pt": None,
"small_name": "t5-small",
}
ft_config = OmegaConf.create({
"device": "cuda" if use_cuda() else "cpu",
"edit_lr": 5e-6,
"train_base": False,
"grad_clip": 100,
"ft": {
"verbose": False,
"max_edit_steps": 100,
"time_limit": None,
"locality": {
"enabled": False,
"oracle": True,
"cedit": 1e-2,
"batch_size": 1,
},
"rank": None,
"opt": "RMSprop",
"init_std": 0.01,
},
"model": model_config,
})
lu_config = OmegaConf.create({
"device": "cuda" if use_cuda() else "cpu",
"lu": {
"threshold": 2.75,
"onehot_logit": 1,
},
"model": model_config,
})
ke_config = OmegaConf.create({
"device": "cuda" if use_cuda() else "cpu",
"train_base": False,
"lr": 1e-5,
"model": model_config,
})
enn_config = OmegaConf.create({
"device": "cuda" if use_cuda() else "cpu",
"lr": 1e-5,
"edit_lr": 1e-2,
"lr_lr": 1e-3,
"train_base": True,
"grad_clip": 100,
"dropout": 0,
"no_grad_layers": None,
"enn": {
"first_order": False,
"n_edit_steps": 1,
},
"model": model_config,
})
mend_config = OmegaConf.create({
"device": "cuda" if use_cuda() else "cpu",
"lr": 1e-6,
"edit_lr": 1e-4,
"lr_lr": 1e-4,
"train_base": True,
"grad_clip": 100,
"dropout": 0,
"no_grad_layers": None,
"gtn": {
"one_sided": False,
"n_hidden": 1,
"hidden_dim": None,
"init": "id",
"norm": True,
"combine": True,
"x_only": False,
"delta_only": False,
"act": "relu",
"rank": 1920,
"mlp_class": "IDMLP",
"shared": True,
"descent": False,
},
"model": model_config,
})
serac_config = OmegaConf.create({
"device": "cuda" if use_cuda() else "cpu",
"lr": 1e-5,
"edit_lr": 1e-2,
"lr_lr": 0,
"train_base": False,
"grad_clip": 100,
"dropout": 0,
"no_grad_layers": None,
"rep": {
"cls_name": "distilbert-base-cased",
"cls_class": "AutoModel",
"supervised": "true",
"cos": False,
"freeze": None,
"square": True,
"bound_embeds": False,
"use_all_negatives": False,
"freeze_cntr": False,
"dist_heads": 1,
"cross_attend": False,
"lora": None,
"soft_weighting": False,
"checkpoint_grad": False,
"cache_embeds": True,
},
"model": model_config,
})
|