Delete ._saes_Qwen_Qwen2.5-Coder-32B-Instruct_batch_top_k/resid_post_layer_50/trainer_2/config.json
Browse files
._saes_Qwen_Qwen2.5-Coder-32B-Instruct_batch_top_k/resid_post_layer_50/trainer_2/config.json
DELETED
|
@@ -1,32 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"trainer": {
|
| 3 |
-
"trainer_class": "BatchTopKTrainer",
|
| 4 |
-
"dict_class": "BatchTopKSAE",
|
| 5 |
-
"lr": 0.0003,
|
| 6 |
-
"steps": 24414,
|
| 7 |
-
"auxk_alpha": 0.03125,
|
| 8 |
-
"warmup_steps": 1000,
|
| 9 |
-
"decay_start": 19531,
|
| 10 |
-
"threshold_beta": 0.999,
|
| 11 |
-
"threshold_start_step": 1000,
|
| 12 |
-
"top_k_aux": 2560,
|
| 13 |
-
"seed": 0,
|
| 14 |
-
"activation_dim": 5120,
|
| 15 |
-
"dict_size": 65536,
|
| 16 |
-
"k": 80,
|
| 17 |
-
"device": "cuda:0",
|
| 18 |
-
"layer": 50,
|
| 19 |
-
"lm_name": "Qwen/Qwen2.5-Coder-32B-Instruct",
|
| 20 |
-
"wandb_name": "BatchTopKTrainer-Qwen/Qwen2.5-Coder-32B-Instruct-resid_post_layer_50_trainer_2",
|
| 21 |
-
"submodule_name": "resid_post_layer_50"
|
| 22 |
-
},
|
| 23 |
-
"buffer": {
|
| 24 |
-
"d_submodule": 5120,
|
| 25 |
-
"io": "out",
|
| 26 |
-
"n_ctxs": 244,
|
| 27 |
-
"ctx_len": 1024,
|
| 28 |
-
"refresh_batch_size": 4,
|
| 29 |
-
"out_batch_size": 2048,
|
| 30 |
-
"device": "cuda:0"
|
| 31 |
-
}
|
| 32 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|