diff --git a/k128-sae-mlp-32k-seed2/config.json b/k128-sae-mlp-32k-seed2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..47b96306c29f5f0b34a755ef400fbd1b539a3a57 --- /dev/null +++ b/k128-sae-mlp-32k-seed2/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 128, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.6.mlp"], "init_seeds": [12], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k128-sae-mlp-32k-seed2", "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "/mnt/ssd-1/pile_preshuffled/standard/document.bin", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 48} \ No newline at end of file diff --git a/k128-sae-mlp-32k-seed2/layers.6.mlp/cfg.json b/k128-sae-mlp-32k-seed2/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ceca9fc3149cd1fb2534452364fa658bd9b3964c --- /dev/null +++ b/k128-sae-mlp-32k-seed2/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k128-sae-mlp-32k-seed2/layers.6.mlp/sae.safetensors b/k128-sae-mlp-32k-seed2/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14f4bad0448c324ac10e92fe4fe8c81105001c8b --- /dev/null +++ b/k128-sae-mlp-32k-seed2/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:982e007f252a2a25ec7c710d4aa0f91c885105a4b1f306a29014fcae08791f34 +size 201461072 diff --git a/k128-sae-mlp-32k-seed2/lr_scheduler.pt b/k128-sae-mlp-32k-seed2/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..09d44e04064f4b6efd4e262a530222ffda2bae63 --- /dev/null +++ b/k128-sae-mlp-32k-seed2/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a645210450394a692a612289cb5fe097161dfa420f3c634f6516bc67841ac2b4 +size 1012 diff --git a/k128-sae-mlp-32k-seed2/optimizer.pt b/k128-sae-mlp-32k-seed2/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..041f80c240f87df58c260c344105fa3540d99c35 --- /dev/null +++ b/k128-sae-mlp-32k-seed2/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d2b3073f5967404890d8a79cbd64d43bb0d7b879689950b689c218dcd77656a +size 102316366 diff --git a/k128-sae-mlp-32k-seed2/state.pt b/k128-sae-mlp-32k-seed2/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..01533c33130fc242a2bdcd63d1d2c558f7a84a1a --- /dev/null +++ b/k128-sae-mlp-32k-seed2/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:737bd1d625a5ef15ed9dfef1d3972156ee7a9eeb778f2e3314a0eb79957c340e +size 263314 diff --git a/k128-sae-mlp-32k/config.json b/k128-sae-mlp-32k/config.json new file mode 100644 index 0000000000000000000000000000000000000000..731995f4a2669fed9ea7d52a9377d80b6050ecc3 --- /dev/null +++ b/k128-sae-mlp-32k/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 128, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.6.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k128-sae-mlp-32k", "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "/mnt/ssd-1/pile_preshuffled/standard/document.bin", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "finetune": null, "seed": 42, "number_seeds": 1, "data_preprocessing_num_proc": 48} \ No newline at end of file diff --git a/k128-sae-mlp-32k/layers.0/cfg.json b/k128-sae-mlp-32k/layers.0/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ceca9fc3149cd1fb2534452364fa658bd9b3964c --- /dev/null +++ b/k128-sae-mlp-32k/layers.0/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k128-sae-mlp-32k/layers.0/sae.safetensors b/k128-sae-mlp-32k/layers.0/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..313753d206fd28686dbe2bb83361579006ee06a7 --- /dev/null +++ b/k128-sae-mlp-32k/layers.0/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:381e53945c4e729d56340c87186304271066abfbaaec1daea36570cb7305ba72 +size 201461072 diff --git a/k128-sae-mlp-32k/layers.3/cfg.json b/k128-sae-mlp-32k/layers.3/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ceca9fc3149cd1fb2534452364fa658bd9b3964c --- /dev/null +++ b/k128-sae-mlp-32k/layers.3/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k128-sae-mlp-32k/layers.3/sae.safetensors b/k128-sae-mlp-32k/layers.3/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d8edd902e6bd6657b18340ecad135d0777e177d --- /dev/null +++ b/k128-sae-mlp-32k/layers.3/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e196f4c0d8dcecbcec820ecaf99740a7d57953e9e4ffeda6ab6706ef527e161 +size 201461072 diff --git a/k128-sae-mlp-32k/layers.6.mlp/cfg.json b/k128-sae-mlp-32k/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ceca9fc3149cd1fb2534452364fa658bd9b3964c --- /dev/null +++ b/k128-sae-mlp-32k/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k128-sae-mlp-32k/layers.6.mlp/sae.safetensors b/k128-sae-mlp-32k/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8c6aa14fb741a06f51c1d2ebfd8be81525405d48 --- /dev/null +++ b/k128-sae-mlp-32k/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be708e5c0f44bb82718ec6131451a2261ce932632a46891e752f0e5ca5ccd54c +size 201461072 diff --git a/k128-sae-mlp-32k/layers.6/cfg.json b/k128-sae-mlp-32k/layers.6/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ceca9fc3149cd1fb2534452364fa658bd9b3964c --- /dev/null +++ b/k128-sae-mlp-32k/layers.6/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k128-sae-mlp-32k/layers.6/sae.safetensors b/k128-sae-mlp-32k/layers.6/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6449f453087d4e97460b5278353316411f650263 --- /dev/null +++ b/k128-sae-mlp-32k/layers.6/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:841debc7b22d1c18f30034cfd317c17ad5335cbeea90963138e8d6672455a9f6 +size 201461072 diff --git a/k128-sae-mlp-32k/layers.9/cfg.json b/k128-sae-mlp-32k/layers.9/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ceca9fc3149cd1fb2534452364fa658bd9b3964c --- /dev/null +++ b/k128-sae-mlp-32k/layers.9/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k128-sae-mlp-32k/layers.9/sae.safetensors b/k128-sae-mlp-32k/layers.9/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..50082aa76a98d37da500bde7292d163da16024ab --- /dev/null +++ b/k128-sae-mlp-32k/layers.9/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfbe2c11cc22575ff61cdfe7217224065696228ac8dbe4e4b0bbb7b32b3ce072 +size 201461072 diff --git a/k128-sae-mlp-32k/lr_scheduler.pt b/k128-sae-mlp-32k/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..09d44e04064f4b6efd4e262a530222ffda2bae63 --- /dev/null +++ b/k128-sae-mlp-32k/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a645210450394a692a612289cb5fe097161dfa420f3c634f6516bc67841ac2b4 +size 1012 diff --git a/k128-sae-mlp-32k/optimizer.pt b/k128-sae-mlp-32k/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..10dfc9b6084e4fa733104dec2c791a1f323a5cf2 --- /dev/null +++ b/k128-sae-mlp-32k/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c57f542b71d5e3f8ac02a440577311bee95125ed3a6fe25cb1c426e743f436e1 +size 102316366 diff --git a/k128-sae-mlp-32k/state.pt b/k128-sae-mlp-32k/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..03cc75942de1983c33f5c4788435759981d216bf --- /dev/null +++ b/k128-sae-mlp-32k/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0320a194ff05c45b557bd1f8239ddf3513695a8b17d1a50512bac5eb72e2f6b0 +size 263314 diff --git a/k256-gpt2-exp36-seed2/h.6/cfg.json b/k256-gpt2-exp36-seed2/h.6/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1b511ed9f5fb399d4207d4838a6fae2cb5808bb7 --- /dev/null +++ b/k256-gpt2-exp36-seed2/h.6/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k256-gpt2-exp36-seed2/h.6/sae.safetensors b/k256-gpt2-exp36-seed2/h.6/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4c3976fd176a72d7c12a9f46a9cf9c70fa60c23c --- /dev/null +++ b/k256-gpt2-exp36-seed2/h.6/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32d9338237034971b45396431c0631a8e3d2e50e78b9a4541e5dc16d4ce25a56 +size 169983304 diff --git a/k256-gpt2-exp36/config.json b/k256-gpt2-exp36/config.json new file mode 100644 index 0000000000000000000000000000000000000000..afe8332b580a598da2ab850663c0cb43604028fc --- /dev/null +++ b/k256-gpt2-exp36/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["h.6"], "init_seeds": [23, 2], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k256-gpt2-exp36", "wandb_log_frequency": 1, "model": "openai-community/gpt2", "dataset": "Skylion007/openwebtext", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 48} \ No newline at end of file diff --git a/k256-gpt2-exp36/h.6/cfg.json b/k256-gpt2-exp36/h.6/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1b511ed9f5fb399d4207d4838a6fae2cb5808bb7 --- /dev/null +++ b/k256-gpt2-exp36/h.6/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k256-gpt2-exp36/h.6/sae.safetensors b/k256-gpt2-exp36/h.6/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5ff937028fecdaee8e6490ef631b9236a9bde0b7 --- /dev/null +++ b/k256-gpt2-exp36/h.6/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b703f22d92c8f998a44a756e843cfe57e3d822e72b2509b178547e006c7feab +size 169983304 diff --git a/k256-gpt2-exp36/lr_scheduler.pt b/k256-gpt2-exp36/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8011b6d8e24e211aa70896912cdcdf4cd828730c --- /dev/null +++ b/k256-gpt2-exp36/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b6c10047d444b83edad95e97d140ba3caa158f05ea2fcc70e7ead72b318db6c +size 1076 diff --git a/k256-gpt2-exp36/optimizer.pt b/k256-gpt2-exp36/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..502c407371083bf119594b01cbeae5adb0c44008 --- /dev/null +++ b/k256-gpt2-exp36/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8f701e3797f8a65ec9593c3c4cb571badb5656e01dc4cd1e883638731b82a1f +size 172659328 diff --git a/k256-gpt2-exp36/state.pt b/k256-gpt2-exp36/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..166c82a47ce37a8e340bfd9d87d1518612c13de9 --- /dev/null +++ b/k256-gpt2-exp36/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9c839c952a20382a40df09ee7e54964e9325753e0981fea02fb600abc2dab03 +size 443724 diff --git a/k256-sae-mlp-32k-seed2/config.json b/k256-sae-mlp-32k-seed2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..08fa7f10bfba85b6e753ec96c74c55e716285868 --- /dev/null +++ b/k256-sae-mlp-32k-seed2/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 256, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.6.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k256-sae-mlp-32k-seed2", "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "/mnt/ssd-1/pile_preshuffled/standard/document.bin", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "finetune": null, "seed": 5, "number_seeds": 1, "data_preprocessing_num_proc": 48} \ No newline at end of file diff --git a/k256-sae-mlp-32k-seed2/layers.0/cfg.json b/k256-sae-mlp-32k-seed2/layers.0/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8875d035b6f110b2106215c1c42dff81b4a2c752 --- /dev/null +++ b/k256-sae-mlp-32k-seed2/layers.0/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k256-sae-mlp-32k-seed2/layers.0/sae.safetensors b/k256-sae-mlp-32k-seed2/layers.0/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eee3a548254f3a04b88fa537e424ddf042ff3b45 --- /dev/null +++ b/k256-sae-mlp-32k-seed2/layers.0/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8131653080e96b66b9d30ce0ee7677f72cbf8819060d73b2d2f2a37b25340357 +size 201461072 diff --git a/k256-sae-mlp-32k-seed2/layers.3/cfg.json b/k256-sae-mlp-32k-seed2/layers.3/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8875d035b6f110b2106215c1c42dff81b4a2c752 --- /dev/null +++ b/k256-sae-mlp-32k-seed2/layers.3/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k256-sae-mlp-32k-seed2/layers.3/sae.safetensors b/k256-sae-mlp-32k-seed2/layers.3/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d3b9469ec9897fc83fd64e4e8417fc8edb6f5265 --- /dev/null +++ b/k256-sae-mlp-32k-seed2/layers.3/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4879c86e864cedc4443017bd665e3e15bfa580bc9a82028beeb24408a32f3e9d +size 201461072 diff --git a/k256-sae-mlp-32k-seed2/layers.6.mlp/cfg.json b/k256-sae-mlp-32k-seed2/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8875d035b6f110b2106215c1c42dff81b4a2c752 --- /dev/null +++ b/k256-sae-mlp-32k-seed2/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k256-sae-mlp-32k-seed2/layers.6.mlp/sae.safetensors b/k256-sae-mlp-32k-seed2/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8791fa229e6871361094f46dd158626563bc5303 --- /dev/null +++ b/k256-sae-mlp-32k-seed2/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8101a6175d09bc3df7a1e2a279e48420a4420b6353b864b543a104875a26938f +size 201461072 diff --git a/k256-sae-mlp-32k-seed2/layers.6/cfg.json b/k256-sae-mlp-32k-seed2/layers.6/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8875d035b6f110b2106215c1c42dff81b4a2c752 --- /dev/null +++ b/k256-sae-mlp-32k-seed2/layers.6/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k256-sae-mlp-32k-seed2/layers.6/sae.safetensors b/k256-sae-mlp-32k-seed2/layers.6/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..13baec572231549ce193b3f88631f505ef54c7dc --- /dev/null +++ b/k256-sae-mlp-32k-seed2/layers.6/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dfc911a48ea63fdc6447df969fe85fa7271190d56ddff72859b13250de9644a +size 201461072 diff --git a/k256-sae-mlp-32k-seed2/layers.9/cfg.json b/k256-sae-mlp-32k-seed2/layers.9/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8875d035b6f110b2106215c1c42dff81b4a2c752 --- /dev/null +++ b/k256-sae-mlp-32k-seed2/layers.9/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k256-sae-mlp-32k-seed2/layers.9/sae.safetensors b/k256-sae-mlp-32k-seed2/layers.9/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..63e426e7a037dd5358816747491ef28b983fc0ab --- /dev/null +++ b/k256-sae-mlp-32k-seed2/layers.9/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:874273198e718a5742f0b4356c43e4448e23e329a43fb32e81fe94fe1b4407f6 +size 201461072 diff --git a/k256-sae-mlp-32k-seed2/lr_scheduler.pt b/k256-sae-mlp-32k-seed2/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..09d44e04064f4b6efd4e262a530222ffda2bae63 --- /dev/null +++ b/k256-sae-mlp-32k-seed2/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a645210450394a692a612289cb5fe097161dfa420f3c634f6516bc67841ac2b4 +size 1012 diff --git a/k256-sae-mlp-32k-seed2/optimizer.pt b/k256-sae-mlp-32k-seed2/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a205b3b24924180c35cc8de8fb8d00e9a5eef61 --- /dev/null +++ b/k256-sae-mlp-32k-seed2/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:193cb2b91a48d8aacbf04f4676a1bd79a0a0e6b768e59e883c904e6e8dbf566f +size 102316366 diff --git a/k256-sae-mlp-32k-seed2/state.pt b/k256-sae-mlp-32k-seed2/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..bcf6408ce6e338f2202f0d1b465d7b88faec5fd7 --- /dev/null +++ b/k256-sae-mlp-32k-seed2/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cc77cffb9fe1350dafac9e95c6aee317cac10f58094321e9955bc6ed8f5a2db +size 263314 diff --git a/k256-sae-mlp-32k/config.json b/k256-sae-mlp-32k/config.json new file mode 100644 index 0000000000000000000000000000000000000000..36080533bbf7baed78e1d231d4a85b568810dc5f --- /dev/null +++ b/k256-sae-mlp-32k/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 256, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.6.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k256-sae-mlp-32k", "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "/mnt/ssd-1/pile_preshuffled/standard/document.bin", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "finetune": null, "seed": 5, "number_seeds": 1, "data_preprocessing_num_proc": 48} \ No newline at end of file diff --git a/k256-sae-mlp-32k/layers.6.mlp/cfg.json b/k256-sae-mlp-32k/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8875d035b6f110b2106215c1c42dff81b4a2c752 --- /dev/null +++ b/k256-sae-mlp-32k/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k256-sae-mlp-32k/layers.6.mlp/sae.safetensors b/k256-sae-mlp-32k/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..54b6abe4feb2089cb4f8e3183c518a99dd96f9b1 --- /dev/null +++ b/k256-sae-mlp-32k/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4344706585162c50e35c2ba9e6124db8a62be22c981b2d145e6f2779aa1fb9fd +size 201461072 diff --git a/k256-sae-mlp-32k/lr_scheduler.pt b/k256-sae-mlp-32k/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..09d44e04064f4b6efd4e262a530222ffda2bae63 --- /dev/null +++ b/k256-sae-mlp-32k/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a645210450394a692a612289cb5fe097161dfa420f3c634f6516bc67841ac2b4 +size 1012 diff --git a/k256-sae-mlp-32k/optimizer.pt b/k256-sae-mlp-32k/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b19eb62c074550c1520800a0ed757c565641db5 --- /dev/null +++ b/k256-sae-mlp-32k/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd3163d8e255e7f61e19f4e8289374c76e105631da8aea840d5017dd5b6444b9 +size 102316366 diff --git a/k256-sae-mlp-32k/state.pt b/k256-sae-mlp-32k/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..abb2275e61ce79390f86a8a35c60b7f4254f6368 --- /dev/null +++ b/k256-sae-mlp-32k/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50c5737c16bf78e4e78cbc9284778cdf6e04acd915501c1957827310ddd71b98 +size 263314 diff --git a/k256-sae-mlp-4k-seed2/config.json b/k256-sae-mlp-4k-seed2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..dddf9de362040d5f5a8524bf12d9cf56dda07a27 --- /dev/null +++ b/k256-sae-mlp-4k-seed2/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 4084, "k": 256, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.6.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k256-sae-mlp-4k-seed2", "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "/mnt/ssd-1/pile_preshuffled/standard/document.bin", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "finetune": null, "seed": 42, "number_seeds": 1, "data_preprocessing_num_proc": 48} \ No newline at end of file diff --git a/k256-sae-mlp-4k-seed2/layers.6.mlp/cfg.json b/k256-sae-mlp-4k-seed2/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4607bbcb881aadd47e9b3503dadd88a8941fd085 --- /dev/null +++ b/k256-sae-mlp-4k-seed2/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 4084, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k256-sae-mlp-4k-seed2/layers.6.mlp/sae.safetensors b/k256-sae-mlp-4k-seed2/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d6b7aeeffc1a2dd4c5c685cee5ebaf0bd05e99ca --- /dev/null +++ b/k256-sae-mlp-4k-seed2/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:643c62a449756f549e072d322919bf80b59f054524084bb9793b227ec04a38bc +size 25111832 diff --git a/k256-sae-mlp-4k-seed2/lr_scheduler.pt b/k256-sae-mlp-4k-seed2/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..67f4a53365c2255fe3d69e453e58b71d1d38c20f --- /dev/null +++ b/k256-sae-mlp-4k-seed2/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d094c2b89e5159477b912d683794fde025bc1ce77bae1a650a8d1fd7d9c5cef +size 1012 diff --git a/k256-sae-mlp-4k-seed2/optimizer.pt b/k256-sae-mlp-4k-seed2/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2021bd79dd0ee3452fbf7fc6d6f3768e4f4445b4 --- /dev/null +++ b/k256-sae-mlp-4k-seed2/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4003789bb128f9c80b35dcb25303529e91d07739d247c837e9113358d9e15da5 +size 12787664 diff --git a/k256-sae-mlp-4k-seed2/state.pt b/k256-sae-mlp-4k-seed2/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..95945661775f4a548ce7f2b46e91224c646e9965 --- /dev/null +++ b/k256-sae-mlp-4k-seed2/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32f3eb5ae34d54c29be1776ceeb45db155bab84593d5ebf57b0eb407bf043dd2 +size 33874 diff --git a/k256-sae-mlp-4k/config.json b/k256-sae-mlp-4k/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c0cec468881d2ddf3f33502d4c881df6da8bd5ce --- /dev/null +++ b/k256-sae-mlp-4k/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 4084, "k": 256, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.6.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k256-sae-mlp-4k", "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "/mnt/ssd-1/pile_preshuffled/standard/document.bin", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "finetune": null, "seed": 42, "number_seeds": 1, "data_preprocessing_num_proc": 48} \ No newline at end of file diff --git a/k256-sae-mlp-4k/layers.6.mlp/cfg.json b/k256-sae-mlp-4k/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4607bbcb881aadd47e9b3503dadd88a8941fd085 --- /dev/null +++ b/k256-sae-mlp-4k/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 4084, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k256-sae-mlp-4k/layers.6.mlp/sae.safetensors b/k256-sae-mlp-4k/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d7b54721d2b2b1ca1f3a5fed6a6266585001aa2 --- /dev/null +++ b/k256-sae-mlp-4k/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f308e8c65879e180a6a494f83d002bb27f71d1957e6141c74cc6d16a17f7093a +size 25111832 diff --git a/k256-sae-mlp-4k/lr_scheduler.pt b/k256-sae-mlp-4k/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..67f4a53365c2255fe3d69e453e58b71d1d38c20f --- /dev/null +++ b/k256-sae-mlp-4k/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d094c2b89e5159477b912d683794fde025bc1ce77bae1a650a8d1fd7d9c5cef +size 1012 diff --git a/k256-sae-mlp-4k/optimizer.pt b/k256-sae-mlp-4k/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..144faeaabb72a0bc7552bd8f4dac496217d39659 --- /dev/null +++ b/k256-sae-mlp-4k/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:724eee7d2f81db6156967a190fa2ea487de5d289b79ba40e4b9961d7409e06b2 +size 12787664 diff --git a/k256-sae-mlp-4k/state.pt b/k256-sae-mlp-4k/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..1543f8995028ebf1050d779a3484f1693b6370a0 --- /dev/null +++ b/k256-sae-mlp-4k/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5286d5a82a13ea733127c641b2e8e41317c372d43f47b55460715e2c3a1c7de4 +size 33874 diff --git a/k32-gpt2-exp36-seed2/config.json b/k32-gpt2-exp36-seed2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2c104c29cbbaf9f1476b93c8475ba8af7877de5f --- /dev/null +++ b/k32-gpt2-exp36-seed2/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["h.2.mlp", "h.4.mlp", "h.8.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": false, "run_name": "k32-smoll-exp36-seed2", "wandb_log_frequency": 1, "model": "openai-community/gpt2", "dataset": "Skylion007/openwebtext", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "finetune": null, "seed": 42, "data_preprocessing_num_proc": 64} \ No newline at end of file diff --git a/k32-gpt2-exp36-seed2/h.0.mlp/cfg.json b/k32-gpt2-exp36-seed2/h.0.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f076a730eafb72f9907a5f57634c346feba760c2 --- /dev/null +++ b/k32-gpt2-exp36-seed2/h.0.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-seed2/h.0.mlp/sae.safetensors b/k32-gpt2-exp36-seed2/h.0.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..44867039af646ce4b1efbccfe62fab7684e781c2 --- /dev/null +++ b/k32-gpt2-exp36-seed2/h.0.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b6322eedf3ab145020f9445e279c6ab68e5578ed4f5dfd845c663c6466d195d +size 169983304 diff --git a/k32-gpt2-exp36-seed2/h.11.mlp/cfg.json b/k32-gpt2-exp36-seed2/h.11.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f076a730eafb72f9907a5f57634c346feba760c2 --- /dev/null +++ b/k32-gpt2-exp36-seed2/h.11.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-seed2/h.11.mlp/sae.safetensors b/k32-gpt2-exp36-seed2/h.11.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4208ad7265066e8b40aa21fb3642fdacd30b0184 --- /dev/null +++ b/k32-gpt2-exp36-seed2/h.11.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68786118331fbd548174503ce0f2f37f41eb1e9c1bfe99e57c99c523df5c14b8 +size 169983304 diff --git a/k32-gpt2-exp36-seed2/h.2.mlp/cfg.json b/k32-gpt2-exp36-seed2/h.2.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f076a730eafb72f9907a5f57634c346feba760c2 --- /dev/null +++ b/k32-gpt2-exp36-seed2/h.2.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-seed2/h.2.mlp/sae.safetensors b/k32-gpt2-exp36-seed2/h.2.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b31b0a8d4ea527d5dae61bf4ea1515b0c983cf2f --- /dev/null +++ b/k32-gpt2-exp36-seed2/h.2.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fbb630e9955759781970201278d4ebad2b5f65ab68992e758d9d904854e3114 +size 169983304 diff --git a/k32-gpt2-exp36-seed2/h.4.mlp/cfg.json b/k32-gpt2-exp36-seed2/h.4.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f076a730eafb72f9907a5f57634c346feba760c2 --- /dev/null +++ b/k32-gpt2-exp36-seed2/h.4.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-seed2/h.4.mlp/sae.safetensors b/k32-gpt2-exp36-seed2/h.4.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e8b26d096b93bbd5335d5af3affaa64ef7f14e23 --- /dev/null +++ b/k32-gpt2-exp36-seed2/h.4.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daeed99721676853cb45b1d16a0a368a1455412365525242043ff8afa46ecf4e +size 169983304 diff --git a/k32-gpt2-exp36-seed2/h.4/cfg.json b/k32-gpt2-exp36-seed2/h.4/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-seed2/h.4/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-seed2/h.4/sae.safetensors b/k32-gpt2-exp36-seed2/h.4/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7419dc2e5dc878f8115f5ba1560b38f3960733d2 --- /dev/null +++ b/k32-gpt2-exp36-seed2/h.4/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18fd172ef3aec6218008d2c10c16d8900da8eae091376cc628fe841c0286ea97 +size 169983304 diff --git a/k32-gpt2-exp36-seed2/h.6.mlp/cfg.json b/k32-gpt2-exp36-seed2/h.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f076a730eafb72f9907a5f57634c346feba760c2 --- /dev/null +++ b/k32-gpt2-exp36-seed2/h.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-seed2/h.6.mlp/sae.safetensors b/k32-gpt2-exp36-seed2/h.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf2c0b740420190a2d505281f3fab76d5db6160c --- /dev/null +++ b/k32-gpt2-exp36-seed2/h.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d7b31e0dcc56098858c4065ed1f69fcdd9b52a50a0ff35ad9830545cbce58db +size 169983304 diff --git a/k32-gpt2-exp36-seed2/h.6/cfg.json b/k32-gpt2-exp36-seed2/h.6/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-seed2/h.6/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-seed2/h.6/sae.safetensors b/k32-gpt2-exp36-seed2/h.6/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3867e22f7f925898c25da153c2b6c15df52e2826 --- /dev/null +++ b/k32-gpt2-exp36-seed2/h.6/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:794f717de934822b54189469b0ee579de328faeb3a03dbae0bba29ec670b7c58 +size 169983304 diff --git a/k32-gpt2-exp36-seed2/h.8.mlp/cfg.json b/k32-gpt2-exp36-seed2/h.8.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f076a730eafb72f9907a5f57634c346feba760c2 --- /dev/null +++ b/k32-gpt2-exp36-seed2/h.8.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-seed2/h.8.mlp/sae.safetensors b/k32-gpt2-exp36-seed2/h.8.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fa43083adf4eb00a64007705d8bb6d232ab3eb85 --- /dev/null +++ b/k32-gpt2-exp36-seed2/h.8.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0617974b7524a464264041b1d0657ea61224438008c46ef20b593c263062729 +size 169983304 diff --git a/k32-gpt2-exp36-seed2/h.8/cfg.json b/k32-gpt2-exp36-seed2/h.8/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-seed2/h.8/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-seed2/h.8/sae.safetensors b/k32-gpt2-exp36-seed2/h.8/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..feba3eba381fbd3f765b5401159de1ea2b0aa2ab --- /dev/null +++ b/k32-gpt2-exp36-seed2/h.8/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4f80fb2a1cb990813214705eadcbe96cd52882b0802394ab0add9b1980fe984 +size 169983304 diff --git a/k32-gpt2-exp36-seed2/lr_scheduler.pt b/k32-gpt2-exp36-seed2/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f6a156aafa8c2f5831de1ae273484b15a2dfa5c --- /dev/null +++ b/k32-gpt2-exp36-seed2/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4c761b8d86e462826be2b76a4c50a444881e344be1754340f574f420a531079 +size 1076 diff --git a/k32-gpt2-exp36-seed2/optimizer.pt b/k32-gpt2-exp36-seed2/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d3f39a0da0037372dffd296af5a2aa10f7836eb --- /dev/null +++ b/k32-gpt2-exp36-seed2/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70b45c6f14dfd7daa4fab6bf3a8343b7a7eb2f5a32aa06a9ad0df345c51bdcca +size 1019908602 diff --git a/k32-gpt2-exp36-seed2/state.pt b/k32-gpt2-exp36-seed2/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe12375a193774082c8810e59eae12e6328cbce4 --- /dev/null +++ b/k32-gpt2-exp36-seed2/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df1c641bdd7981a699147b1c2967605acd230d2fdb5c3135c55aa3a930f5f8ec +size 665094 diff --git a/k32-gpt2-exp36-short-seed2/config.json b/k32-gpt2-exp36-short-seed2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c178a6374f294a21a21fd806cdd61fbd1fe45def --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["h.0.mlp", "h.1.mlp", "h.2.mlp", "h.3.mlp", "h.4.mlp", "h.5.mlp", "h.6.mlp", "h.7.mlp", "h.8.mlp", "h.9.mlp", "h.10.mlp", "h.11.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k32-gpt2-exp36-seed2", "wandb_log_frequency": 1, "model": "openai-community/gpt2", "dataset": "Skylion007/openwebtext", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 400000, "resume": false, "finetune": null, "seed": 52, "number_seeds": 1, "data_preprocessing_num_proc": 48} \ No newline at end of file diff --git a/k32-gpt2-exp36-short-seed2/h.0.mlp/cfg.json b/k32-gpt2-exp36-short-seed2/h.0.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.0.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short-seed2/h.0.mlp/sae.safetensors b/k32-gpt2-exp36-short-seed2/h.0.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..25d29681afc3c85b559d62f7e8ac2e25596f775b --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.0.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1f49882a53617389d504845ed2d85df3297cc7727c9e1ce166dbd7212349a34 +size 169983304 diff --git a/k32-gpt2-exp36-short-seed2/h.1.mlp/cfg.json b/k32-gpt2-exp36-short-seed2/h.1.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.1.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short-seed2/h.1.mlp/sae.safetensors b/k32-gpt2-exp36-short-seed2/h.1.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..02dddee1d961b2ad4d52dddc83ab3aa90775d8ec --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.1.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:912c7f4d04a3f83b3686353a1a2b4d15a9bc86a0a9d51e16b875b6869227cccb +size 169983304 diff --git a/k32-gpt2-exp36-short-seed2/h.10.mlp/cfg.json b/k32-gpt2-exp36-short-seed2/h.10.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.10.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short-seed2/h.10.mlp/sae.safetensors b/k32-gpt2-exp36-short-seed2/h.10.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..67c232fa06b0a98e8b5446c184f105e3095df0ce --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.10.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0ba9f769bb73b8236f3d793262461a285d754e484537316b750015f9c4f69f1 +size 169983304 diff --git a/k32-gpt2-exp36-short-seed2/h.11.mlp/cfg.json b/k32-gpt2-exp36-short-seed2/h.11.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.11.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short-seed2/h.11.mlp/sae.safetensors b/k32-gpt2-exp36-short-seed2/h.11.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d03d302b5d5b8703b893423a2655e217b0f7f11 --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.11.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11e35b5745a25154fe50ff65b9eb544f2cd63a16774d1aaac237e28fe7a5126a +size 169983304 diff --git a/k32-gpt2-exp36-short-seed2/h.2.mlp/cfg.json b/k32-gpt2-exp36-short-seed2/h.2.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.2.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short-seed2/h.2.mlp/sae.safetensors b/k32-gpt2-exp36-short-seed2/h.2.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7093836b7fde28f73842b9cd20f2c50c9a7a2fc9 --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.2.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de254cbba9aac03818236fc9e349b3df9e1d761ffba40514de641b0dab35abfe +size 169983304 diff --git a/k32-gpt2-exp36-short-seed2/h.3.mlp/cfg.json b/k32-gpt2-exp36-short-seed2/h.3.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.3.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short-seed2/h.3.mlp/sae.safetensors b/k32-gpt2-exp36-short-seed2/h.3.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..939004ca2a2fa54e691991c0b0a25b06049f27f6 --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.3.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f360e2252c1bd7b87067f1175700efe063fb2da76137a32c9c1057c2be398e2c +size 169983304 diff --git a/k32-gpt2-exp36-short-seed2/h.4.mlp/cfg.json b/k32-gpt2-exp36-short-seed2/h.4.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.4.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short-seed2/h.4.mlp/sae.safetensors b/k32-gpt2-exp36-short-seed2/h.4.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf472c3f0f2369e59d4b22843c04fbf167187b6d --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.4.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9ef64c198d5be6456f8388309316132ca8c8d6724505136fe845f5c442bcdd4 +size 169983304 diff --git a/k32-gpt2-exp36-short-seed2/h.5.mlp/cfg.json b/k32-gpt2-exp36-short-seed2/h.5.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.5.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short-seed2/h.5.mlp/sae.safetensors b/k32-gpt2-exp36-short-seed2/h.5.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..777b82768ef5825a28c02b8b37a745613279888f --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.5.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:082568ec7199904ab552adbcbf9d12ae3ad752d0c1a2cc846828860b7ab9767e +size 169983304 diff --git a/k32-gpt2-exp36-short-seed2/h.6.mlp/cfg.json b/k32-gpt2-exp36-short-seed2/h.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short-seed2/h.6.mlp/sae.safetensors b/k32-gpt2-exp36-short-seed2/h.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80f17c0299b24dcca547f0528a6a47ad07e0e03e --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33bcc747e5aae04164fb9e3707ab28c397c467e2904156c892312462f1b4b747 +size 169983304 diff --git a/k32-gpt2-exp36-short-seed2/h.7.mlp/cfg.json b/k32-gpt2-exp36-short-seed2/h.7.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.7.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short-seed2/h.7.mlp/sae.safetensors b/k32-gpt2-exp36-short-seed2/h.7.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c51124b8d058da86ffe26d173d544a64acee33a1 --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.7.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e98a95a92714ea247679981339efcc836b4306021c0bba2d49380159540043b +size 169983304 diff --git a/k32-gpt2-exp36-short-seed2/h.8.mlp/cfg.json b/k32-gpt2-exp36-short-seed2/h.8.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.8.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short-seed2/h.8.mlp/sae.safetensors b/k32-gpt2-exp36-short-seed2/h.8.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c90d877eca8b997576a91f47e35fecdd0592cd42 --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.8.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f5062ca83da03ac84ee605553b1e02474d079014683d860820516f7c978bb15 +size 169983304 diff --git a/k32-gpt2-exp36-short-seed2/h.9.mlp/cfg.json b/k32-gpt2-exp36-short-seed2/h.9.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.9.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short-seed2/h.9.mlp/sae.safetensors b/k32-gpt2-exp36-short-seed2/h.9.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0fa67037e13cfd0a47776c0cbaf23879bd99e11 --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/h.9.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f1237bb93ae13ba3240fa74d01628eb9592312cbf9ee85d68bef524e685c6da +size 169983304 diff --git a/k32-gpt2-exp36-short-seed2/lr_scheduler.pt b/k32-gpt2-exp36-short-seed2/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb95da51ee741a5452c1f5d9786a8cb4bb0a4137 --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf26bbff97a93a091edc846ee9792eb8ecc027d5ff8cb3040840a41528d1f924 +size 1268 diff --git a/k32-gpt2-exp36-short-seed2/optimizer.pt b/k32-gpt2-exp36-short-seed2/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d23ac2ea99efb3b723428bc929d03d880cb440f1 --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:250e14c971a4956df55ebd9c718a35b484a19d78b73b543129ebb9480d47e428 +size 1035940730 diff --git a/k32-gpt2-exp36-short-seed2/state.pt b/k32-gpt2-exp36-short-seed2/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..4adadefd036b2e2d74d77a4a3f5a3e1476e918f0 --- /dev/null +++ b/k32-gpt2-exp36-short-seed2/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:492276a7cc4781e7f7443ed06a8376e5439e3ccd0346367c395e340e8db7b3be +size 2657554 diff --git a/k32-gpt2-exp36-short/config.json b/k32-gpt2-exp36-short/config.json new file mode 100644 index 0000000000000000000000000000000000000000..14b8c9d690f87532d9a7711797b55359cc64c4e9 --- /dev/null +++ b/k32-gpt2-exp36-short/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["h.0.mlp", "h.1.mlp", "h.2.mlp", "h.3.mlp", "h.4.mlp", "h.5.mlp", "h.6.mlp", "h.7.mlp", "h.8.mlp", "h.9.mlp", "h.10.mlp", "h.11.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k32-gpt2-exp36", "wandb_log_frequency": 1, "model": "openai-community/gpt2", "dataset": "Skylion007/openwebtext", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 400000, "resume": false, "finetune": null, "seed": 42, "number_seeds": 1, "data_preprocessing_num_proc": 48} \ No newline at end of file diff --git a/k32-gpt2-exp36-short/h.0.mlp/cfg.json b/k32-gpt2-exp36-short/h.0.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short/h.0.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short/h.0.mlp/sae.safetensors b/k32-gpt2-exp36-short/h.0.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8311af034d98d80e4ba643004ac2819d8e093d1c --- /dev/null +++ b/k32-gpt2-exp36-short/h.0.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14355ec073f51837ae4df9341f1557d4d948ac306426073572fc77271510bfd1 +size 169983304 diff --git a/k32-gpt2-exp36-short/h.1.mlp/cfg.json b/k32-gpt2-exp36-short/h.1.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short/h.1.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short/h.1.mlp/sae.safetensors b/k32-gpt2-exp36-short/h.1.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90a1bb75b990df865b6903c2c5c84e07b11d1a44 --- /dev/null +++ b/k32-gpt2-exp36-short/h.1.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17b8dbccd05d09e6077acd501dca7e2cec626eb4f3217979da9cef623d38e744 +size 169983304 diff --git a/k32-gpt2-exp36-short/h.10.mlp/cfg.json b/k32-gpt2-exp36-short/h.10.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short/h.10.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short/h.10.mlp/sae.safetensors b/k32-gpt2-exp36-short/h.10.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a83fb2e444cd16ea59889ed255b74001b9415a92 --- /dev/null +++ b/k32-gpt2-exp36-short/h.10.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6758ab9ce9f241d20a50bf5ba3826f2d3c95872a8086b4161d338f194b9309be +size 169983304 diff --git a/k32-gpt2-exp36-short/h.11.mlp/cfg.json b/k32-gpt2-exp36-short/h.11.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short/h.11.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short/h.11.mlp/sae.safetensors b/k32-gpt2-exp36-short/h.11.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..55539665c390d1c7ecc1f717d5feba8d35114f08 --- /dev/null +++ b/k32-gpt2-exp36-short/h.11.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:049a789e91212894aaed76d5c9a8cc90a3a1070a32fe3d1c5ff13bacf57472ff +size 169983304 diff --git a/k32-gpt2-exp36-short/h.2.mlp/cfg.json b/k32-gpt2-exp36-short/h.2.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short/h.2.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short/h.2.mlp/sae.safetensors b/k32-gpt2-exp36-short/h.2.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..22ed3f5ad60f714f40629c7c453bafb8784c1f3a --- /dev/null +++ b/k32-gpt2-exp36-short/h.2.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e59dd7d8b8acfd5da0cd4f2e6bc15c548da54a385f30266cfa87fc60449104f4 +size 169983304 diff --git a/k32-gpt2-exp36-short/h.3.mlp/cfg.json b/k32-gpt2-exp36-short/h.3.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short/h.3.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short/h.3.mlp/sae.safetensors b/k32-gpt2-exp36-short/h.3.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3972e7cffc1ed119be9e29b20d505f76ff5457dd --- /dev/null +++ b/k32-gpt2-exp36-short/h.3.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f4b7916d649b1781eee6d038888a3eaeac2fce3cc3823440975a5c91b6d80fa +size 169983304 diff --git a/k32-gpt2-exp36-short/h.4.mlp/cfg.json b/k32-gpt2-exp36-short/h.4.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short/h.4.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short/h.4.mlp/sae.safetensors b/k32-gpt2-exp36-short/h.4.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..73996ca5293b99a8a4092436fa7b51370ce0f2b8 --- /dev/null +++ b/k32-gpt2-exp36-short/h.4.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01fb0cc99784e6f0a536babb0ba812f10f850dec6e4e43c588e89b54d2ece21d +size 169983304 diff --git a/k32-gpt2-exp36-short/h.5.mlp/cfg.json b/k32-gpt2-exp36-short/h.5.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short/h.5.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short/h.5.mlp/sae.safetensors b/k32-gpt2-exp36-short/h.5.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..934683f3320952c425b684917ab04568a748d819 --- /dev/null +++ b/k32-gpt2-exp36-short/h.5.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db9d384af781d16c8ef3143f61b5c62c8bf0d9f415def2fbccbbf530d7e6f6df +size 169983304 diff --git a/k32-gpt2-exp36-short/h.6.mlp/cfg.json b/k32-gpt2-exp36-short/h.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short/h.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short/h.6.mlp/sae.safetensors b/k32-gpt2-exp36-short/h.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dcfdff2904a44a86db91b7f6306939697c066352 --- /dev/null +++ b/k32-gpt2-exp36-short/h.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51fbe1655b256dfd75a938500d75d63ab4c3dd7e3444a9964af1fd8bdd202665 +size 169983304 diff --git a/k32-gpt2-exp36-short/h.7.mlp/cfg.json b/k32-gpt2-exp36-short/h.7.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short/h.7.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short/h.7.mlp/sae.safetensors b/k32-gpt2-exp36-short/h.7.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9081134b7f587f912a8ce3a075962889e1086468 --- /dev/null +++ b/k32-gpt2-exp36-short/h.7.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:415c4d4c3ca89ef2baf964d0ce0319ba4c9ef7cd38d4a545bc0e4fe755f7f44b +size 169983304 diff --git a/k32-gpt2-exp36-short/h.8.mlp/cfg.json b/k32-gpt2-exp36-short/h.8.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short/h.8.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short/h.8.mlp/sae.safetensors b/k32-gpt2-exp36-short/h.8.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b39aaef792522cf020f473684ee2bc798094a12 --- /dev/null +++ b/k32-gpt2-exp36-short/h.8.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cfa6c0edffe7bc38bb50f7f9eaf7e138fca9025daebd435a67d90b6e3a0283e +size 169983304 diff --git a/k32-gpt2-exp36-short/h.9.mlp/cfg.json b/k32-gpt2-exp36-short/h.9.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36-short/h.9.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36-short/h.9.mlp/sae.safetensors b/k32-gpt2-exp36-short/h.9.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0213219b3a4319bface3aad264f812d3d349cac --- /dev/null +++ b/k32-gpt2-exp36-short/h.9.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:639ceffc6382fe0e6a8b7ba0f7c28849978338d68d39bffcfba22074da0ba88a +size 169983304 diff --git a/k32-gpt2-exp36-short/lr_scheduler.pt b/k32-gpt2-exp36-short/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb95da51ee741a5452c1f5d9786a8cb4bb0a4137 --- /dev/null +++ b/k32-gpt2-exp36-short/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf26bbff97a93a091edc846ee9792eb8ecc027d5ff8cb3040840a41528d1f924 +size 1268 diff --git a/k32-gpt2-exp36-short/optimizer.pt b/k32-gpt2-exp36-short/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b271851301ae59b329cfe83453512034851cfbca --- /dev/null +++ b/k32-gpt2-exp36-short/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:778c42eb41bea857498ef4bdc5167bee69a42105dc25be9f37ff3464ef648ce2 +size 1035940730 diff --git a/k32-gpt2-exp36-short/state.pt b/k32-gpt2-exp36-short/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ff02b297b6534d819ae6f94e2f0844dcd510179 --- /dev/null +++ b/k32-gpt2-exp36-short/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13eac9c89e35a6cd31c238878711f000bc18a2eda20d308c8153347a30af462a +size 2657554 diff --git a/k32-gpt2-exp36/config.json b/k32-gpt2-exp36/config.json new file mode 100644 index 0000000000000000000000000000000000000000..452d6d6739800a09d7a049376138c2bb0c50f3ad --- /dev/null +++ b/k32-gpt2-exp36/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["h.2.mlp", "h.4.mlp", "h.8.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": false, "run_name": "k32-smoll-exp36", "wandb_log_frequency": 1, "model": "openai-community/gpt2", "dataset": "Skylion007/openwebtext", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "finetune": null, "seed": 42, "data_preprocessing_num_proc": 64} \ No newline at end of file diff --git a/k32-gpt2-exp36/h.0.mlp/cfg.json b/k32-gpt2-exp36/h.0.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f076a730eafb72f9907a5f57634c346feba760c2 --- /dev/null +++ b/k32-gpt2-exp36/h.0.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36/h.0.mlp/sae.safetensors b/k32-gpt2-exp36/h.0.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66d388dc5ff87ed57389abb8f18598ea2b745cb4 --- /dev/null +++ b/k32-gpt2-exp36/h.0.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3bab045982e272e5ea354beee6668deae53e07600c0e671ac7d506fa6337c91 +size 169983304 diff --git a/k32-gpt2-exp36/h.11.mlp/cfg.json b/k32-gpt2-exp36/h.11.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f076a730eafb72f9907a5f57634c346feba760c2 --- /dev/null +++ b/k32-gpt2-exp36/h.11.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36/h.11.mlp/sae.safetensors b/k32-gpt2-exp36/h.11.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..356074de4168243c2a56643d176011084c4e4440 --- /dev/null +++ b/k32-gpt2-exp36/h.11.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac601136b22dd28ab77594bdc2a2993154710698cb0287f8d454f5871bb979d5 +size 169983304 diff --git a/k32-gpt2-exp36/h.2.mlp/cfg.json b/k32-gpt2-exp36/h.2.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f076a730eafb72f9907a5f57634c346feba760c2 --- /dev/null +++ b/k32-gpt2-exp36/h.2.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36/h.2.mlp/sae.safetensors b/k32-gpt2-exp36/h.2.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37c045e726afab180b97e031494c6e5fb3fbf0c2 --- /dev/null +++ b/k32-gpt2-exp36/h.2.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c9e36f57a4f9a7ce91009336208402bd355212bf6f81b2a6f5a33547ea9c0f4 +size 169983304 diff --git a/k32-gpt2-exp36/h.4.mlp/cfg.json b/k32-gpt2-exp36/h.4.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f076a730eafb72f9907a5f57634c346feba760c2 --- /dev/null +++ b/k32-gpt2-exp36/h.4.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36/h.4.mlp/sae.safetensors b/k32-gpt2-exp36/h.4.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ce84c3fcd935feaf4d923661a5c11b79c2d8450d --- /dev/null +++ b/k32-gpt2-exp36/h.4.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6814a7764687786fcbdbd27fb1f4d6ef3a110f82f0a6c59e18ccb4fc3652af9f +size 169983304 diff --git a/k32-gpt2-exp36/h.4/cfg.json b/k32-gpt2-exp36/h.4/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36/h.4/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36/h.4/sae.safetensors b/k32-gpt2-exp36/h.4/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..16273d18e80ce967c7aa2692fc4efca234ef00f1 --- /dev/null +++ b/k32-gpt2-exp36/h.4/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f244633b973a8f515eb853d9d433ddce279219250bfd1bb60ca338132a95631b +size 169983304 diff --git a/k32-gpt2-exp36/h.6.mlp/cfg.json b/k32-gpt2-exp36/h.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f076a730eafb72f9907a5f57634c346feba760c2 --- /dev/null +++ b/k32-gpt2-exp36/h.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36/h.6.mlp/sae.safetensors b/k32-gpt2-exp36/h.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..601baf8d78e2ea6f48ad018205dbf25f7c6cbfec --- /dev/null +++ b/k32-gpt2-exp36/h.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b74e39f2a8c522fe36fa8fea6d21012e1e0106f613a656f999d9222d5683eb4 +size 169983304 diff --git a/k32-gpt2-exp36/h.6/cfg.json b/k32-gpt2-exp36/h.6/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36/h.6/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36/h.6/sae.safetensors b/k32-gpt2-exp36/h.6/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..94422ef4de00f6b14158a77623a70e5277c9de52 --- /dev/null +++ b/k32-gpt2-exp36/h.6/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fda1b0c031a8be3f3826ec58e117d368e751ff08d508d17a48d67a365b3519b3 +size 169983304 diff --git a/k32-gpt2-exp36/h.8.mlp/cfg.json b/k32-gpt2-exp36/h.8.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f076a730eafb72f9907a5f57634c346feba760c2 --- /dev/null +++ b/k32-gpt2-exp36/h.8.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36/h.8.mlp/sae.safetensors b/k32-gpt2-exp36/h.8.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8131e4e6ad6d27adf1ea7d2f117f4f3a27efdedd --- /dev/null +++ b/k32-gpt2-exp36/h.8.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0b5aec20723a1db12f67f8307bec43b97ec72e82eca175c9041e300737e8c7a +size 169983304 diff --git a/k32-gpt2-exp36/h.8/cfg.json b/k32-gpt2-exp36/h.8/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-gpt2-exp36/h.8/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp36/h.8/sae.safetensors b/k32-gpt2-exp36/h.8/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..56d2a5f095fb27d2ad8531cf537bbdb28cbba34c --- /dev/null +++ b/k32-gpt2-exp36/h.8/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95411e550bbfd09f641a29e0d5bc51a47af528388e32995b3acaf2eb05c15ff9 +size 169983304 diff --git a/k32-gpt2-exp36/lr_scheduler.pt b/k32-gpt2-exp36/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f6a156aafa8c2f5831de1ae273484b15a2dfa5c --- /dev/null +++ b/k32-gpt2-exp36/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4c761b8d86e462826be2b76a4c50a444881e344be1754340f574f420a531079 +size 1076 diff --git a/k32-gpt2-exp36/optimizer.pt b/k32-gpt2-exp36/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f314c231f11968df0d6dcbfe2bf17493d232570c --- /dev/null +++ b/k32-gpt2-exp36/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43458ef875e528deb6f224cbe10d00f49acb3ec11e4e40a9db90557b725b69e7 +size 1019908602 diff --git a/k32-gpt2-exp36/state.pt b/k32-gpt2-exp36/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..2448fe94cab1713098d27c464b3889b484f3d3c6 --- /dev/null +++ b/k32-gpt2-exp36/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a5bc2db10b9f36a309333d812571b7f8ce7b655901621c84cec10821d41707c +size 665094 diff --git a/k32-gpt2-exp6-seed2/h.6.mlp/cfg.json b/k32-gpt2-exp6-seed2/h.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8b892e77b606c9df4198ffcb322dcae7eebc3e47 --- /dev/null +++ b/k32-gpt2-exp6-seed2/h.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 6, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp6-seed2/h.6.mlp/sae.safetensors b/k32-gpt2-exp6-seed2/h.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1758c7914e8c7d7879672422edfe90e5f955e6ec --- /dev/null +++ b/k32-gpt2-exp6-seed2/h.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cc7cc7b28504fb8a7711cef1d9a23bec26a4906e55de6aebf21dd2a4cd9d1d +size 28333384 diff --git a/k32-gpt2-exp6/config.json b/k32-gpt2-exp6/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8f07d97bffcc4ae3150369896883e6a1a621592b --- /dev/null +++ b/k32-gpt2-exp6/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 6, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["h.6.mlp"], "init_seeds": [23, 24], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k32-gp2-exp6", "wandb_log_frequency": 1, "model": "openai-community/gpt2", "dataset": "Skylion007/openwebtext", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 48} \ No newline at end of file diff --git a/k32-gpt2-exp6/h.6.mlp/cfg.json b/k32-gpt2-exp6/h.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8b892e77b606c9df4198ffcb322dcae7eebc3e47 --- /dev/null +++ b/k32-gpt2-exp6/h.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 6, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-gpt2-exp6/h.6.mlp/sae.safetensors b/k32-gpt2-exp6/h.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf46243edbffb89e0a3cafde4d61a3876b3d204f --- /dev/null +++ b/k32-gpt2-exp6/h.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45199e893bd1d618fdad2dcb32260d4b1539a3ea752ae34ca906cc9e3c82b57d +size 28333384 diff --git a/k32-gpt2-exp6/lr_scheduler.pt b/k32-gpt2-exp6/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..eee1e34c4de8a0328b4752fe9a5f12f31d564cec --- /dev/null +++ b/k32-gpt2-exp6/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:813530d7a1e15d504320663b106a38c61d68aa05d1f187df7c2a6be8253bbb50 +size 1076 diff --git a/k32-gpt2-exp6/optimizer.pt b/k32-gpt2-exp6/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c918e523d11eab615c22ea153950d9b4051ddfe9 --- /dev/null +++ b/k32-gpt2-exp6/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb2d706159b185fa6922551a426bd79d4bd8065fc34507dbc895210f4423a32 +size 28796224 diff --git a/k32-gpt2-exp6/state.pt b/k32-gpt2-exp6/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..f64dedbd7a79011837876c06962508a15309b6f9 --- /dev/null +++ b/k32-gpt2-exp6/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef03332071f8f2abf965b6f1f0a9c54663801e8314d520cbdad4b10dc7c9a90a +size 75084 diff --git a/k32-llama-mlp-exp36-seed2/layers.16.mlp/cfg.json b/k32-llama-mlp-exp36-seed2/layers.16.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..40cc895d610ecd334f677c690bf82d4f4945664d --- /dev/null +++ b/k32-llama-mlp-exp36-seed2/layers.16.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 4096} \ No newline at end of file diff --git a/k32-llama-mlp-exp36-seed2/layers.16.mlp/sae.safetensors b/k32-llama-mlp-exp36-seed2/layers.16.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4db036ae509ef301fc280aa51dc5e1486c16973e --- /dev/null +++ b/k32-llama-mlp-exp36-seed2/layers.16.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:603aa9561cf1f25f524ab029fc6e72ed832f05b8e59792777c5282efe10417e5 +size 4832444760 diff --git a/k32-llama-mlp-exp36/layers.16.mlp/cfg.json b/k32-llama-mlp-exp36/layers.16.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..40cc895d610ecd334f677c690bf82d4f4945664d --- /dev/null +++ b/k32-llama-mlp-exp36/layers.16.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 4096} \ No newline at end of file diff --git a/k32-llama-mlp-exp36/layers.16.mlp/sae.safetensors b/k32-llama-mlp-exp36/layers.16.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d74c9802bdef18a7cacc7be223124ec1ea69389 --- /dev/null +++ b/k32-llama-mlp-exp36/layers.16.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6652e5d4329b75b27a6ab8ea4cd8b7d376d86d06b27f7b6c78e0826f236ec06 +size 4832444760 diff --git a/k32-sae-exp36-seed2/layers.6.mlp/cfg.json b/k32-sae-exp36-seed2/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-sae-exp36-seed2/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-exp36-seed2/layers.6.mlp/sae.safetensors b/k32-sae-exp36-seed2/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a0d20921790c74b9bc5ebf9abe5ffca5acc29ca --- /dev/null +++ b/k32-sae-exp36-seed2/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed641d8ae3956fc9b11fb6b16b375c95fb93e1255d7a224642a28af5995391b2 +size 169983304 diff --git a/k32-sae-exp36-seed2/layers.6/cfg.json b/k32-sae-exp36-seed2/layers.6/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-sae-exp36-seed2/layers.6/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-exp36-seed2/layers.6/sae.safetensors b/k32-sae-exp36-seed2/layers.6/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0d3c498b3c79ca7bfbe6996f84f59420c962d298 --- /dev/null +++ b/k32-sae-exp36-seed2/layers.6/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbad05667c14d6273fc87787ae3cbee5e779c3808dd7248d620998191a4ab870 +size 169983304 diff --git a/k32-sae-exp36/layers.6.mlp/cfg.json b/k32-sae-exp36/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-sae-exp36/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-exp36/layers.6.mlp/sae.safetensors b/k32-sae-exp36/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1a613e0768fa268e40d7e585969f6968bdc3e65e --- /dev/null +++ b/k32-sae-exp36/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1789a08913af3fa364d7670639e67de3f066fd76591a11eac0d9dc683ab058be +size 169983304 diff --git a/k32-sae-exp36/layers.6/cfg.json b/k32-sae-exp36/layers.6/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fda3d79a785f6cfe6bc38fd86a17bb2509a25deb --- /dev/null +++ b/k32-sae-exp36/layers.6/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-exp36/layers.6/sae.safetensors b/k32-sae-exp36/layers.6/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..215894f8247227d23794bb8b8e2c978d1782aa0f --- /dev/null +++ b/k32-sae-exp36/layers.6/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4d415dc9e1ccc473ffed19b38037e0059bd225b83689d9586eda750d1279328 +size 169983304 diff --git a/k32-sae-mlp-32k-long-seed2/layers.6.mlp/cfg.json b/k32-sae-mlp-32k-long-seed2/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-long-seed2/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-long-seed2/layers.6.mlp/sae.safetensors b/k32-sae-mlp-32k-long-seed2/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f052bf88a56736627d725029b9e974ba6e986a46 --- /dev/null +++ b/k32-sae-mlp-32k-long-seed2/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26c075d34b2715d112673fca39d8e3371f9c936fdf863f74dd39ef9676bd6b35 +size 201461072 diff --git a/k32-sae-mlp-32k-long/layers.6.mlp/cfg.json b/k32-sae-mlp-32k-long/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-long/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-long/layers.6.mlp/sae.safetensors b/k32-sae-mlp-32k-long/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6a53c5ae7e943435513bdfa5ab00252f5aaeedbf --- /dev/null +++ b/k32-sae-mlp-32k-long/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84e734c43c7c0c9ca72946cc0f0151c4a1cadbaa8b9d9894458faccf1084232b +size 201461072 diff --git a/k32-sae-mlp-32k-seed3/layers.6.mlp/sae.safetensors b/k32-sae-mlp-32k-seed3/layers.6.mlp/sae.safetensors index cf7314876bcf8068ee70eac2fd98c145217c913a..97fab08843e4adf6105c2a14522a7c5e44c079c1 100644 --- a/k32-sae-mlp-32k-seed3/layers.6.mlp/sae.safetensors +++ b/k32-sae-mlp-32k-seed3/layers.6.mlp/sae.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ec7c8add04286ffe5865c742bc841dac765e946bd55ab465d6bda2beeea3a63 +oid sha256:be12be66d30b00d93af3725731ba5074db4ed6c53bd8188159cdd2b784596a9f size 201461072 diff --git a/k32-sae-mlp-32k-seed4/layers.6.mlp/cfg.json b/k32-sae-mlp-32k-seed4/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-seed4/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-seed4/layers.6.mlp/sae.safetensors b/k32-sae-mlp-32k-seed4/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3ce7def1ec1308af58a46abf14b08f3c006750c --- /dev/null +++ b/k32-sae-mlp-32k-seed4/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14f6241c1e02e26623b79ed7fde936eb342358dbdc39f68acc54801089f4e62b +size 201461072 diff --git a/k32-sae-mlp-32k-seed4_wrong/layers.6.mlp/cfg.json b/k32-sae-mlp-32k-seed4_wrong/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d415e3c6c98e6921210e830b7dc314f30fb01935 --- /dev/null +++ b/k32-sae-mlp-32k-seed4_wrong/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32678, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-seed4_wrong/layers.6.mlp/sae.safetensors b/k32-sae-mlp-32k-seed4_wrong/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07a031a48624819032908990cd6186bbf1cdb602 --- /dev/null +++ b/k32-sae-mlp-32k-seed4_wrong/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e95046bd22ec003dc4477830ed1de65626f885a693c9678afbaf63005c47673 +size 200907752 diff --git a/k32-sae-mlp-32k-seed5/layers.6.mlp/cfg.json b/k32-sae-mlp-32k-seed5/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-seed5/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-seed5/layers.6.mlp/sae.safetensors b/k32-sae-mlp-32k-seed5/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e1598f9788148ac9c3d8d5ab56a225bee18e5f9e --- /dev/null +++ b/k32-sae-mlp-32k-seed5/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ec991f8dc9e6c647e0b9fc90671b71c827b44c75761e34499261d15e0369f90 +size 201461072 diff --git a/k32-sae-mlp-32k-seed5_wrong/layers.6.mlp/cfg.json b/k32-sae-mlp-32k-seed5_wrong/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d415e3c6c98e6921210e830b7dc314f30fb01935 --- /dev/null +++ b/k32-sae-mlp-32k-seed5_wrong/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32678, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-seed5_wrong/layers.6.mlp/sae.safetensors b/k32-sae-mlp-32k-seed5_wrong/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5a22bddd141e6346089cb49873fcb4882bf9668c --- /dev/null +++ b/k32-sae-mlp-32k-seed5_wrong/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1241204010c35f924bbe2967b672af4764a9ce248b1ea5b188f67dd94863119a +size 200907752 diff --git a/k32-sae-mlp-32k-seed6/config.json b/k32-sae-mlp-32k-seed6/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ec9ed68d9010334fb5d1609fd296d9b7b75faddf --- /dev/null +++ b/k32-sae-mlp-32k-seed6/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.6.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k32-sae-mlp-32k-seed3", "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "/mnt/ssd-1/pile_preshuffled/standard/document.bin", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "finetune": null, "seed": 52, "data_preprocessing_num_proc": 48} \ No newline at end of file diff --git a/k32-sae-mlp-32k-seed6/layers.6.mlp/cfg.json b/k32-sae-mlp-32k-seed6/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-seed6/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-seed6/layers.6.mlp/sae.safetensors b/k32-sae-mlp-32k-seed6/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf7314876bcf8068ee70eac2fd98c145217c913a --- /dev/null +++ b/k32-sae-mlp-32k-seed6/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ec7c8add04286ffe5865c742bc841dac765e946bd55ab465d6bda2beeea3a63 +size 201461072 diff --git a/k32-sae-mlp-32k-seed6/lr_scheduler.pt b/k32-sae-mlp-32k-seed6/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..10665724254e7c22a7a1404d8f7fb5c40849f60e --- /dev/null +++ b/k32-sae-mlp-32k-seed6/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11b5192fa27cf7089cdfbc43a42ead0e86f1257c5babcb88951ac1987b6c5dbf +size 1012 diff --git a/k32-sae-mlp-32k-seed6/optimizer.pt b/k32-sae-mlp-32k-seed6/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f080b79c8d6c39f844c14e8c0194b50c26d39267 --- /dev/null +++ b/k32-sae-mlp-32k-seed6/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d4f923bf2445b699c905a9104f88c726484606c5fd5cb1fe43e3a79d0e8e226 +size 102316366 diff --git a/k32-sae-mlp-32k-seed6/state.pt b/k32-sae-mlp-32k-seed6/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..9cb821d79827d0ded7f45d22e4db52a78ba5b3fd --- /dev/null +++ b/k32-sae-mlp-32k-seed6/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f59861d2107118a34be3f2f6846765e1437ea3812ea7ed34e849aa1890bfa3d +size 263314 diff --git a/k32-sae-mlp-32k-seed6_wrong/config.json b/k32-sae-mlp-32k-seed6_wrong/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c69812206a9af50d4d3151e9e71a46fa6738459 --- /dev/null +++ b/k32-sae-mlp-32k-seed6_wrong/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32678, "k": 32, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.6.mlp"], "init_seeds": [42, 52, 62], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k32-sae-mlp-seds", "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "/mnt/ssd-1/pile_preshuffled/standard/document.bin", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 48} \ No newline at end of file diff --git a/k32-sae-mlp-32k-seed6_wrong/layers.6.mlp/cfg.json b/k32-sae-mlp-32k-seed6_wrong/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d415e3c6c98e6921210e830b7dc314f30fb01935 --- /dev/null +++ b/k32-sae-mlp-32k-seed6_wrong/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32678, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-seed6_wrong/layers.6.mlp/sae.safetensors b/k32-sae-mlp-32k-seed6_wrong/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fa47b2300e61ed1cdb637e309aa7cf8d47a461ae --- /dev/null +++ b/k32-sae-mlp-32k-seed6_wrong/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:015bf2de2ff85fa7df8c45b0be4e7489a6e8b5c375f12fa47779fc11ce306877 +size 200907752 diff --git a/k32-sae-mlp-32k-seed6_wrong/lr_scheduler.pt b/k32-sae-mlp-32k-seed6_wrong/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..86872254b447d74c47d49f6d6e2d9969acc94407 --- /dev/null +++ b/k32-sae-mlp-32k-seed6_wrong/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9b025a6f833455384062a261fc137ad90476a3a834bfd99aeb628b89ec6f81e +size 1076 diff --git a/k32-sae-mlp-32k-seed6_wrong/optimizer.pt b/k32-sae-mlp-32k-seed6_wrong/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4a5952ddba0c5142037179c02600ac262bc142b --- /dev/null +++ b/k32-sae-mlp-32k-seed6_wrong/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fa009815094dfaf065d153fe72f805d387840bb6c1f6ecb9915d857e1340608 +size 306098866 diff --git a/k32-sae-mlp-32k-seed6_wrong/state.pt b/k32-sae-mlp-32k-seed6_wrong/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b946d02b43b47cb6ea845517273bcd57918c623 --- /dev/null +++ b/k32-sae-mlp-32k-seed6_wrong/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b97303dedbec4ad73d50cbfc5ca00a61f824e5ec2593f638b125f481fc3a5f42 +size 785926 diff --git a/k32-sae-mlp-32k-seed7/layers.6.mlp/cfg.json b/k32-sae-mlp-32k-seed7/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-seed7/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-seed7/layers.6.mlp/sae.safetensors b/k32-sae-mlp-32k-seed7/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1aeb54ec42e299b75128cf6fda69f9fae5b1aec6 --- /dev/null +++ b/k32-sae-mlp-32k-seed7/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91c62cf5c68e3a53325e778df150647d425ca17f275964c69f7a65440ab8b399 +size 201461072 diff --git a/k32-sae-mlp-32k-seed8/layers.6.mlp/cfg.json b/k32-sae-mlp-32k-seed8/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-seed8/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-seed8/layers.6.mlp/sae.safetensors b/k32-sae-mlp-32k-seed8/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c013d56bd5aa47a099c78a1ded0a6762a3d4c938 --- /dev/null +++ b/k32-sae-mlp-32k-seed8/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33280cb3792575c93d5456cd9f1f9e7d7d638e5a1b5712bc26df7dc8a9c04e87 +size 201461072 diff --git a/k32-sae-mlp-32k-seed9/layers.6.mlp/cfg.json b/k32-sae-mlp-32k-seed9/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-seed9/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-seed9/layers.6.mlp/sae.safetensors b/k32-sae-mlp-32k-seed9/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78ffe8b4e49f117dbefa08ea5c309ccf0e1c2091 --- /dev/null +++ b/k32-sae-mlp-32k-seed9/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc011515786ecc72b1dc9857835fadb2c8c82647d883b7bbe336222fc709a029 +size 201461072 diff --git a/k32-sae-mlp-32k-short-seed2/config.json b/k32-sae-mlp-32k-short-seed2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a1a3f542f39eaaf351923e3aab509dd975ae396 --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.0.mlp", "layers.1.mlp", "layers.2.mlp", "layers.3.mlp", "layers.4.mlp", "layers.5.mlp", "layers.6.mlp", "layers.7.mlp", "layers.8.mlp", "layers.9.mlp", "layers.10.mlp", "layers.11.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k32-sae-mlp-32k-seed6", "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "/mnt/ssd-1/pile_preshuffled/standard/document.bin", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 400000, "resume": false, "finetune": null, "seed": 456, "number_seeds": 1, "data_preprocessing_num_proc": 48} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short-seed2/layers.0.mlp/cfg.json b/k32-sae-mlp-32k-short-seed2/layers.0.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.0.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short-seed2/layers.0.mlp/sae.safetensors b/k32-sae-mlp-32k-short-seed2/layers.0.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..96c4061111429e540b356bda528c36a4aff2daa0 --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.0.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d22ade9bbd721e4ac86cefd4016df1b8f79d832c23cce87c66006d07a6d537ac +size 201461072 diff --git a/k32-sae-mlp-32k-short-seed2/layers.1.mlp/cfg.json b/k32-sae-mlp-32k-short-seed2/layers.1.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.1.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short-seed2/layers.1.mlp/sae.safetensors b/k32-sae-mlp-32k-short-seed2/layers.1.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..99d961c805099a408d2cfad0b285c95956092bbc --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.1.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2110b13ad90fc822a6d3dca2b938c20c61d49a3c05759e90b359b0b2a2c55c5d +size 201461072 diff --git a/k32-sae-mlp-32k-short-seed2/layers.10.mlp/cfg.json b/k32-sae-mlp-32k-short-seed2/layers.10.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.10.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short-seed2/layers.10.mlp/sae.safetensors b/k32-sae-mlp-32k-short-seed2/layers.10.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..888d8c8bbafe63577cc9fe94ddb661e793adc70c --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.10.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:387f67d6982a40f050a9fdb67435a6b6e8f15ef709f51d5e70e5cdbae48b4932 +size 201461072 diff --git a/k32-sae-mlp-32k-short-seed2/layers.11.mlp/cfg.json b/k32-sae-mlp-32k-short-seed2/layers.11.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.11.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short-seed2/layers.11.mlp/sae.safetensors b/k32-sae-mlp-32k-short-seed2/layers.11.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57bfce5fd2f86c2b2d28d3695366ab101ce2673e --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.11.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eed18deac8312c9c4c6f6d69eeaa2444b1e93d776515cbd1975654e6ea94868b +size 201461072 diff --git a/k32-sae-mlp-32k-short-seed2/layers.2.mlp/cfg.json b/k32-sae-mlp-32k-short-seed2/layers.2.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.2.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short-seed2/layers.2.mlp/sae.safetensors b/k32-sae-mlp-32k-short-seed2/layers.2.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f9e0670223e81dc4f35cceeed129f12eed88929f --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.2.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35a9dcf9e97e5f27dd9741a386609cbf41b5ab6403f412181ab71f01e55e3110 +size 201461072 diff --git a/k32-sae-mlp-32k-short-seed2/layers.3.mlp/cfg.json b/k32-sae-mlp-32k-short-seed2/layers.3.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.3.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short-seed2/layers.3.mlp/sae.safetensors b/k32-sae-mlp-32k-short-seed2/layers.3.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2c2417b629e7088d5dd57479eaf42f932061c42f --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.3.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:502d52aa37f1e78b463070bfbd00c19e9c237334a58d7c9d933285a87b6a436c +size 201461072 diff --git a/k32-sae-mlp-32k-short-seed2/layers.4.mlp/cfg.json b/k32-sae-mlp-32k-short-seed2/layers.4.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.4.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short-seed2/layers.4.mlp/sae.safetensors b/k32-sae-mlp-32k-short-seed2/layers.4.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..63386cdf632bc18e60630c4405de58d854b50de9 --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.4.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b19e991374482a59822db4a508180e71a293d6abf7e5a57f310f5674f20cd31e +size 201461072 diff --git a/k32-sae-mlp-32k-short-seed2/layers.5.mlp/cfg.json b/k32-sae-mlp-32k-short-seed2/layers.5.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.5.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short-seed2/layers.5.mlp/sae.safetensors b/k32-sae-mlp-32k-short-seed2/layers.5.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f0d02c8d68cddeace979db7d8d9641faa9fee603 --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.5.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7867786edb4da7376a9830e1429c1c973f3127251d99eeee6199425f7b65bec7 +size 201461072 diff --git a/k32-sae-mlp-32k-short-seed2/layers.6.mlp/cfg.json b/k32-sae-mlp-32k-short-seed2/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short-seed2/layers.6.mlp/sae.safetensors b/k32-sae-mlp-32k-short-seed2/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f37f7702b1bc5669cf14bb0d35d040ed4734ddfc --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b6bea8c6261b265fa68a12bf521cf8c885ea39d08db47ccad563e9005dd9aa1 +size 201461072 diff --git a/k32-sae-mlp-32k-short-seed2/layers.7.mlp/cfg.json b/k32-sae-mlp-32k-short-seed2/layers.7.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.7.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short-seed2/layers.7.mlp/sae.safetensors b/k32-sae-mlp-32k-short-seed2/layers.7.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..abe92d9a22d66ec011ceb638ab8132ca6a3540d3 --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.7.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fbceff0d572e2314a7f8eab22d5fd6d848d985e4370fa6742232c94fc99604d +size 201461072 diff --git a/k32-sae-mlp-32k-short-seed2/layers.8.mlp/cfg.json b/k32-sae-mlp-32k-short-seed2/layers.8.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.8.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short-seed2/layers.8.mlp/sae.safetensors b/k32-sae-mlp-32k-short-seed2/layers.8.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9606cd4ba929fe524a8dda6aa7d80336bf09772c --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.8.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dd96fe6ba31d87a45457108412ca0f67763f3dfeeac2c31cf9aaf111a33b2fd +size 201461072 diff --git a/k32-sae-mlp-32k-short-seed2/layers.9.mlp/cfg.json b/k32-sae-mlp-32k-short-seed2/layers.9.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.9.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short-seed2/layers.9.mlp/sae.safetensors b/k32-sae-mlp-32k-short-seed2/layers.9.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..62bfb4864f1116279514bcc522b7bf3e4cd0e91c --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/layers.9.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30def7ca48c6cac815e4c6ff73a71245c0eb16660e7009d6821979966991d1aa +size 201461072 diff --git a/k32-sae-mlp-32k-short-seed2/lr_scheduler.pt b/k32-sae-mlp-32k-short-seed2/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..306b13fe62a8f268b76f4e7ed99e7d8869edf98d --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f068cbc10ce1c19a87cbd872596d8b855440693bf8d877ad81d02e840f743378 +size 1268 diff --git a/k32-sae-mlp-32k-short-seed2/optimizer.pt b/k32-sae-mlp-32k-short-seed2/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..04b62639da7b4985adfa99ff1c2458b3552adff9 --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c7d3a3f1b6263aad54c65727a3c7250690958b34fc6b915a2444ce3442d2dfa +size 1227759482 diff --git a/k32-sae-mlp-32k-short-seed2/state.pt b/k32-sae-mlp-32k-short-seed2/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..e317ea69e49e9cce0e171484ba81af0b0f133c3f --- /dev/null +++ b/k32-sae-mlp-32k-short-seed2/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c423825169cafc28077d2a2c34dca1cfddf024b6af1975eb3579cfd42a8c8be +size 3149138 diff --git a/k32-sae-mlp-32k-short/config.json b/k32-sae-mlp-32k-short/config.json new file mode 100644 index 0000000000000000000000000000000000000000..373d348811665f6034d790cd40e5cf2c3f5683a5 --- /dev/null +++ b/k32-sae-mlp-32k-short/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.0.mlp", "layers.1.mlp", "layers.2.mlp", "layers.3.mlp", "layers.4.mlp", "layers.5.mlp", "layers.6.mlp", "layers.7.mlp", "layers.8.mlp", "layers.9.mlp", "layers.10.mlp", "layers.11.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k32-sae-mlp-32k-seed5", "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "/mnt/ssd-1/pile_preshuffled/standard/document.bin", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 400000, "resume": false, "finetune": null, "seed": 123, "number_seeds": 1, "data_preprocessing_num_proc": 48} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short/layers.0.mlp/cfg.json b/k32-sae-mlp-32k-short/layers.0.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.0.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short/layers.0.mlp/sae.safetensors b/k32-sae-mlp-32k-short/layers.0.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..11e35cbc6549829c57ad8db6a3499fb3a9868d04 --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.0.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6786cc7dbc9a4e9ae0d24c5e7fa5cf2d24824cc3a582ebe34059bb4214e6fcc +size 201461072 diff --git a/k32-sae-mlp-32k-short/layers.1.mlp/cfg.json b/k32-sae-mlp-32k-short/layers.1.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.1.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short/layers.1.mlp/sae.safetensors b/k32-sae-mlp-32k-short/layers.1.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0fe855f5babe8bb6fb864f8a23560448b32e4fe9 --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.1.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e00c21a791e31a71482fcd6a51913a9ad0b46d9d6c34e34ce6734e46cb531fd +size 201461072 diff --git a/k32-sae-mlp-32k-short/layers.10.mlp/cfg.json b/k32-sae-mlp-32k-short/layers.10.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.10.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short/layers.10.mlp/sae.safetensors b/k32-sae-mlp-32k-short/layers.10.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..42897a13c4162b7395f7ad3bd71960bd5436c57c --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.10.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ec5508570829b621aaa55897803c76dfb79e1693ac8b4497e83a18b6a18e4a6 +size 201461072 diff --git a/k32-sae-mlp-32k-short/layers.11.mlp/cfg.json b/k32-sae-mlp-32k-short/layers.11.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.11.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short/layers.11.mlp/sae.safetensors b/k32-sae-mlp-32k-short/layers.11.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5a79b6e7331a5d3a2508db80488f0b0ce8ae3fa7 --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.11.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a788d0738c15addf39abf073e6120eeb35895875da3d6c2337bd12443dcb329 +size 201461072 diff --git a/k32-sae-mlp-32k-short/layers.2.mlp/cfg.json b/k32-sae-mlp-32k-short/layers.2.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.2.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short/layers.2.mlp/sae.safetensors b/k32-sae-mlp-32k-short/layers.2.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cfff6d89c9867011e060d38ebaee9946b4888a0b --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.2.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deb1d290226844e3eb6df84fe19f8f745d4ba32b993593395415a2cd495fb15b +size 201461072 diff --git a/k32-sae-mlp-32k-short/layers.3.mlp/cfg.json b/k32-sae-mlp-32k-short/layers.3.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.3.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short/layers.3.mlp/sae.safetensors b/k32-sae-mlp-32k-short/layers.3.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..db67a7fd1e272dce545ff2ed6612b3ef6634d608 --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.3.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a480b6c29b6abbecc56e77ab5d3dd617321ce76bb375d2295ee7c50dc80bf122 +size 201461072 diff --git a/k32-sae-mlp-32k-short/layers.4.mlp/cfg.json b/k32-sae-mlp-32k-short/layers.4.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.4.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short/layers.4.mlp/sae.safetensors b/k32-sae-mlp-32k-short/layers.4.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee93f9aa3c421d7e1949097975faaf91136fe522 --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.4.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4676178b45e507bb3ba265a261e671da8d525bd7381eb02e9899cfe0b860b85b +size 201461072 diff --git a/k32-sae-mlp-32k-short/layers.5.mlp/cfg.json b/k32-sae-mlp-32k-short/layers.5.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.5.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short/layers.5.mlp/sae.safetensors b/k32-sae-mlp-32k-short/layers.5.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6317cb2478b3c12c02b55cc37d2cd1331c132dab --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.5.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6ac69db5300732935daeb616065d26bfaf7a3d0fc18280469dbd030d4682202 +size 201461072 diff --git a/k32-sae-mlp-32k-short/layers.6.mlp/cfg.json b/k32-sae-mlp-32k-short/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short/layers.6.mlp/sae.safetensors b/k32-sae-mlp-32k-short/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5bce948b106439cee47458c0744c52011600b160 --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77a73ab2199a9d7c64593f3ac3e2beb00c640390c9a11a94303fd4bbacad9d9f +size 201461072 diff --git a/k32-sae-mlp-32k-short/layers.7.mlp/cfg.json b/k32-sae-mlp-32k-short/layers.7.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.7.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short/layers.7.mlp/sae.safetensors b/k32-sae-mlp-32k-short/layers.7.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e64731267ad0688245a922c44e9d7ba7e06b30a1 --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.7.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8d1f241a05346401f6bc0c3a45c61f1e9228033399d2da70da8abc06742ce0e +size 201461072 diff --git a/k32-sae-mlp-32k-short/layers.8.mlp/cfg.json b/k32-sae-mlp-32k-short/layers.8.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.8.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short/layers.8.mlp/sae.safetensors b/k32-sae-mlp-32k-short/layers.8.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3a5ecef248764ceefa9303cd948225d29b4621c --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.8.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5977aa7986125dea31b75ba95884418409daa50d4eaa8ae81ba24eaad5841b9 +size 201461072 diff --git a/k32-sae-mlp-32k-short/layers.9.mlp/cfg.json b/k32-sae-mlp-32k-short/layers.9.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.9.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-short/layers.9.mlp/sae.safetensors b/k32-sae-mlp-32k-short/layers.9.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c2d9baa3199bb07fe541a6faa605924e0fcd45f --- /dev/null +++ b/k32-sae-mlp-32k-short/layers.9.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:443813a7280a36aeef1db0621c1216977baa31a026ff8a8d8b2824d0e4f71681 +size 201461072 diff --git a/k32-sae-mlp-32k-short/lr_scheduler.pt b/k32-sae-mlp-32k-short/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..306b13fe62a8f268b76f4e7ed99e7d8869edf98d --- /dev/null +++ b/k32-sae-mlp-32k-short/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f068cbc10ce1c19a87cbd872596d8b855440693bf8d877ad81d02e840f743378 +size 1268 diff --git a/k32-sae-mlp-32k-short/optimizer.pt b/k32-sae-mlp-32k-short/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2122c2c3e51b3ee06fd25e78d180b11ea28c1040 --- /dev/null +++ b/k32-sae-mlp-32k-short/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc24222bd778e82f3b5ec5638889ba65ca67f2d65a15e2393f1d5971ce6b1b23 +size 1227759482 diff --git a/k32-sae-mlp-32k-short/state.pt b/k32-sae-mlp-32k-short/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd496865a45a4250b9e836fe5a59b765296ab934 --- /dev/null +++ b/k32-sae-mlp-32k-short/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96bd4ff10079b25e54ce830de221c2285c9694eef6783904c116225ba8b42eea +size 3149138 diff --git a/k32-sae-mlp-32k-transcoder-seed2/layers.6.mlp/cfg.json b/k32-sae-mlp-32k-transcoder-seed2/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..68c104b6daa13c25aa1fa799ac0cd619f8d1335c --- /dev/null +++ b/k32-sae-mlp-32k-transcoder-seed2/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-transcoder-seed2/layers.6.mlp/sae.safetensors b/k32-sae-mlp-32k-transcoder-seed2/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd14b691fe4b00bec0aeadcf9cc09fec50c5db7d --- /dev/null +++ b/k32-sae-mlp-32k-transcoder-seed2/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc96b7f97b3741c9d92248584be5d15dd7072e203a46f2b5256a927297272feb +size 203820448 diff --git a/k32-sae-mlp-32k-transcoder/config.json b/k32-sae-mlp-32k-transcoder/config.json new file mode 100644 index 0000000000000000000000000000000000000000..50798d2915e50cdd2cc994d3aee0f53654648007 --- /dev/null +++ b/k32-sae-mlp-32k-transcoder/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": true}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.6.mlp"], "init_seeds": [23, 24], "layers": [], "layer_stride": 1, "transcode": true, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k32-sae-mlp-32k-trancoder", "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "/mnt/ssd-1/pile_preshuffled/standard/document.bin", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 48} \ No newline at end of file diff --git a/k32-sae-mlp-32k-transcoder/layers.6.mlp/cfg.json b/k32-sae-mlp-32k-transcoder/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..68c104b6daa13c25aa1fa799ac0cd619f8d1335c --- /dev/null +++ b/k32-sae-mlp-32k-transcoder/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": true, "d_in": 768} \ No newline at end of file diff --git a/k32-sae-mlp-32k-transcoder/layers.6.mlp/sae.safetensors b/k32-sae-mlp-32k-transcoder/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dbb4a72ee5dabdf74e0da11ca3f6ec5a81ddd357 --- /dev/null +++ b/k32-sae-mlp-32k-transcoder/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec7753357bd3c28811631fb3ace98809cbcaf286753ead2833baeba0dc80723e +size 203820448 diff --git a/k32-sae-mlp-32k-transcoder/lr_scheduler.pt b/k32-sae-mlp-32k-transcoder/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5874770cc3decc16ff7771b568ec835a60a41d9f --- /dev/null +++ b/k32-sae-mlp-32k-transcoder/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b6cb55abfc3e787335cfa1add99277f96c0248130e87f2085142e228b011c16 +size 1076 diff --git a/k32-sae-mlp-32k-transcoder/optimizer.pt b/k32-sae-mlp-32k-transcoder/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..07302ad954ea1e94bbea16151e1b9cd9824b8411 --- /dev/null +++ b/k32-sae-mlp-32k-transcoder/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ca36668c52350769d1444b3b20dfe097cd04d285856c380eac19ab856e0d360 +size 207027576 diff --git a/k32-sae-mlp-32k-transcoder/state.pt b/k32-sae-mlp-32k-transcoder/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..67d3d4e32c97e71e20e7f6233c1a3d162b6c5ac3 --- /dev/null +++ b/k32-sae-mlp-32k-transcoder/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61b049d43f9fda5748759cf66e7b225985f9602b1f09d1908849131da8ad9f69 +size 525708 diff --git a/k32-smoll-exp36-seed2/config.json b/k32-smoll-exp36-seed2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8913015b842d96cc8af325ab7a4b35d2405d5da7 --- /dev/null +++ b/k32-smoll-exp36-seed2/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.15.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": false, "run_name": "k32-smoll-exp36-seed2", "wandb_log_frequency": 1, "model": "HuggingFaceTB/SmolLM-135M", "dataset": "HuggingFaceFW/fineweb-edu", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "finetune": null, "seed": 42, "data_preprocessing_num_proc": 64} \ No newline at end of file diff --git a/k32-smoll-exp36-seed2/layers.11.mlp/cfg.json b/k32-smoll-exp36-seed2/layers.11.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5aacab2b261661fe08964a4a2bd8b3314ed69a32 --- /dev/null +++ b/k32-smoll-exp36-seed2/layers.11.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-seed2/layers.11.mlp/sae.safetensors b/k32-smoll-exp36-seed2/layers.11.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c68a1d4e17c375aa5d7ac74a5c1cc0753964eb2 --- /dev/null +++ b/k32-smoll-exp36-seed2/layers.11.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:350d73f6271536bed2f992ff4cb406e69a4a26bfa827cd11d7df10bc09a9e5ab +size 95637064 diff --git a/k32-smoll-exp36-seed2/layers.11/cfg.json b/k32-smoll-exp36-seed2/layers.11/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-seed2/layers.11/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-seed2/layers.11/sae.safetensors b/k32-smoll-exp36-seed2/layers.11/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..63dd3eaa973ab8d0ce564fd7478e5d7055ba2a0d --- /dev/null +++ b/k32-smoll-exp36-seed2/layers.11/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a553ab39ba08009dfb165930301468ef2cd00b5030fe052f1f4177411e912a80 +size 95637064 diff --git a/k32-smoll-exp36-seed2/layers.15.mlp/cfg.json b/k32-smoll-exp36-seed2/layers.15.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5aacab2b261661fe08964a4a2bd8b3314ed69a32 --- /dev/null +++ b/k32-smoll-exp36-seed2/layers.15.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-seed2/layers.15.mlp/sae.safetensors b/k32-smoll-exp36-seed2/layers.15.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b92fadd45a46562c68095fea3bb2e1a0cd8005bb --- /dev/null +++ b/k32-smoll-exp36-seed2/layers.15.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad670b3773b1989af0add960304157f21f3921d0b01ea1903d8d8d7a34d92b0b +size 95637064 diff --git a/k32-smoll-exp36-seed2/layers.15/cfg.json b/k32-smoll-exp36-seed2/layers.15/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-seed2/layers.15/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-seed2/layers.15/sae.safetensors b/k32-smoll-exp36-seed2/layers.15/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b1ff0ef12cbaf388a09efe8b56e5ff0163ac2cac --- /dev/null +++ b/k32-smoll-exp36-seed2/layers.15/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d84b8e5cdf84c355670b88032db5493ec5004b05b0f34c9d91c02d7f2efcbc1 +size 95637064 diff --git a/k32-smoll-exp36-seed2/layers.17.mlp/cfg.json b/k32-smoll-exp36-seed2/layers.17.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5aacab2b261661fe08964a4a2bd8b3314ed69a32 --- /dev/null +++ b/k32-smoll-exp36-seed2/layers.17.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-seed2/layers.17.mlp/sae.safetensors b/k32-smoll-exp36-seed2/layers.17.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..96adcfa589b594dedc83ff5a995a5da93348a769 --- /dev/null +++ b/k32-smoll-exp36-seed2/layers.17.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa3c21935a9810aa7717c6f83a26714eef8e745e268eb8156c92effadc33c25e +size 95637064 diff --git a/k32-smoll-exp36-seed2/layers.17/cfg.json b/k32-smoll-exp36-seed2/layers.17/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-seed2/layers.17/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-seed2/layers.17/sae.safetensors b/k32-smoll-exp36-seed2/layers.17/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4ea0fe4b8f575ee652be81a593524e9124b6231 --- /dev/null +++ b/k32-smoll-exp36-seed2/layers.17/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab887485d6fa73dcf57bf3f4bb5a248d8a02b1205650169143b6e9cdd7ffe5b1 +size 95637064 diff --git a/k32-smoll-exp36-seed2/lr_scheduler.pt b/k32-smoll-exp36-seed2/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c86675364a838521f4168d0185dd99949e8642b0 --- /dev/null +++ b/k32-smoll-exp36-seed2/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:437a643ee39546a5024f7ebc2d48563fe0be45c37d846916a9928d34e9d4d10c +size 1012 diff --git a/k32-smoll-exp36-seed2/optimizer.pt b/k32-smoll-exp36-seed2/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a59f4a194449c7dde7c4ab9d57a81de198cd2b3d --- /dev/null +++ b/k32-smoll-exp36-seed2/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0515514802ac578dfb5d2ce28146fca991bbbaf4d5e0ee5d4661d6747611691d +size 191277778 diff --git a/k32-smoll-exp36-seed2/state.pt b/k32-smoll-exp36-seed2/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..f64b3cba19b7ca3dda02b3eb2946a1e0c3ad4b58 --- /dev/null +++ b/k32-smoll-exp36-seed2/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32feb976944d16baa55538d9f98a7009bb15fea5e33cb71e509cbf2556a122de +size 167058 diff --git a/k32-smoll-exp36-short-seed2/config.json b/k32-smoll-exp36-short-seed2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ded0772892fc9a53e69952f3e2c5d40aa20f73f6 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.0.mlp", "layers.1.mlp", "layers.2.mlp", "layers.3.mlp", "layers.4.mlp", "layers.5.mlp", "layers.6.mlp", "layers.7.mlp", "layers.8.mlp", "layers.9.mlp", "layers.10.mlp", "layers.11.mlp", "layers.12.mlp", "layers.13.mlp", "layers.14.mlp", "layers.15.mlp", "layers.16.mlp", "layers.17.mlp", "layers.18.mlp", "layers.19.mlp", "layers.20.mlp", "layers.21.mlp", "layers.22.mlp", "layers.23.mlp", "layers.24.mlp", "layers.25.mlp", "layers.26.mlp", "layers.27.mlp", "layers.28.mlp", "layers.29.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k32-smoll-exp6-seed2", "wandb_log_frequency": 1, "model": "HuggingFaceTB/SmolLM-135M", "dataset": "HuggingFaceFW/fineweb-edu", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 400000, "resume": false, "finetune": null, "seed": 22, "data_preprocessing_num_proc": 48} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.0.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.0.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.0.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.0.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.0.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..96f4e3e817f3af5277212474cd0357c5c609ae00 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.0.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01bea1296c69551fb591dd2cba77c15493fd943850f3dc5891205a633ad99254 +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.1.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.1.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.1.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.1.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.1.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc02f224303b5132744bd95db10a35c1521be446 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.1.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d902b5cc3847c54ca6dd9852c29e746a96f035fd1d55c0dd72214de3a91da67d +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.10.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.10.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.10.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.10.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.10.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e1def5bfbf68c863ed2ed570d41138a46aed1b4 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.10.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b448558eaecb3393ad022c1ff7b60ee984af6dffad91d4d7281602cc6c57799f +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.11.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.11.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.11.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.11.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.11.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8bee5579bdd1d33c4316cce3e3db08b3743c6b27 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.11.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d7fd37556e98f14d0b524af1cd40fa950fc65af3f23894899b2ecd0a0f515f9 +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.12.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.12.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.12.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.12.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.12.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8b3dbc9823958f8178e7fa7b23ec56c01e6fc57 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.12.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c2dd6a58a36cc97941833ad581254ddeafb3eeed93ec3f163c2a00cf5a46506 +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.13.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.13.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.13.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.13.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.13.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a59e810f5eb81da571cc8afa6e36aec841d5ef17 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.13.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eea6ae4012d0f46a70fe66ca4c49ca7d3992c1241ae00303c75567aad5f8d2d +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.14.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.14.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.14.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.14.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.14.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5bbd565c59932a56a6c3b25d0d2da2662dd5cffd --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.14.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b369da42d9344708e6bb1865abca83e10eaeb6ff22efa16e7fca06289b2a47c7 +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.15.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.15.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.15.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.15.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.15.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3d0e9c80485c6e1f2554280f7d09ddcd20a451d --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.15.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d158fe14cb034b2d5062c5921b1867d76bf652fc7fccc53e402e7beb7aa35e7b +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.16.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.16.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.16.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.16.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.16.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b3dd87f28593791db1eac2986987bca169705605 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.16.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0572816811dd2c48cec76d30f2d5ebd6a7ff5107d531f0fedd2c9e53b209dfac +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.17.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.17.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.17.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.17.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.17.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df8d02c31ec505b073c3cd21b262eb6d803cd3aa --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.17.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b48318a5d122c7ba7ec9bce357ef3ab60fa7ca06543d2d8224cdb9acaa7ef03 +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.18.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.18.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.18.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.18.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.18.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2457a11a4e909c83adda02540470b1b4d8474b8a --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.18.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaa792863d9d550f40800f97bec7c9706a0d2bfbcccf5d597ce92a73d2ecb92a +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.19.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.19.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.19.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.19.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.19.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..36853f939dfb921175e7f6efde65754782190ed0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.19.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db44b9bd23757606d0dfdc63c699771b6d2986f1acc4a84b0f074ba047d4f017 +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.2.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.2.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.2.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.2.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.2.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e4b4c5b671a2f17f58a748498e80c817b283990c --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.2.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a9a4d41e24843a1c2cd5b09f1243115d13c751088a5ebcc1282d89b8b1a802d +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.20.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.20.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.20.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.20.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.20.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9336d8aa3478bd1697c44702c4b818f2883d6dde --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.20.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d393932c93ee480601dfe70539053ce43862d663c9595af10ffb175a389e32c3 +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.21.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.21.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.21.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.21.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.21.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2595ff69fc4790066707bc5f0a286670d5a8a696 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.21.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d237e305b34d91f461351204439ceb9f6ede7de53dc1d31ea453337b3ed65e6f +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.22.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.22.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.22.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.22.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.22.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e8ed5d172966466713efffa26ac2e6cf894693ec --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.22.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c32e7c22d54e5f815f0da03dfb79087bef4485cb7412ec7df1ace13a3707896f +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.23.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.23.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.23.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.23.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.23.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7a2ce718072987e222061954f77fd0af125499e --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.23.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51604a7e04d453bc8787926202505ff224679746e0a32802b3b67c3ffa059ebd +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.24.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.24.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.24.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.24.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.24.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..91e9feb14590efbb064d8c0ef9a3dd515f56e090 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.24.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30f4acb7018b96ea9540d992a0e24f66617cc43d9ff50e227a71aab6e9cc84a9 +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.25.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.25.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.25.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.25.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.25.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..45f10456f0e8155c8cb9589b3e592aa38d0dc0ab --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.25.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:518e9c5c2c0a7b158b08388b7195b66add117e917325dd94af115747bd4cd6d9 +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.26.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.26.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.26.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.26.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.26.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..50d8626cb3be5d9780dbd6af7d558e7fad510d00 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.26.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2598fefa557a861ee0d24cbe452c1f85580c544e9b61d033fbf85cb8cc4360fc +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.27.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.27.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.27.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.27.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.27.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60c5a80159c814601ed96e0d3beadd546ed331e1 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.27.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ede21fd26c5fe2a104bbf50f21118b2fb6cf1dde42997b6fa113711d5bf3edf3 +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.28.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.28.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.28.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.28.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.28.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57025b6e8cd747ee624e39a3399702a6ad209bce --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.28.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01b2442a85b2d5728e4dd11a388ea7b053b4f70315da94df9aa59a5cee964b3f +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.29.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.29.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.29.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.29.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.29.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ff48301244a6d2db692f554d09ea2b5ed436c3b --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.29.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2be13776eb8148eb5f415bf0ca9e335eeb0117ddcf00762dfd8fbf62edab8d40 +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.3.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.3.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.3.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.3.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.3.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8cca290c6bfca778bc7d75cfced73c4169f1c92e --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.3.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60b40c15289424cc25f32019588d583c742a24353b43d1e61b5f818fc2553be8 +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.4.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.4.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.4.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.4.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.4.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc9d799b2f6ecd587b423c010aec5c92971f0b48 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.4.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8544c81723212dab0c768bf183a32cc88b66c1b27e18ee759985d050c45e5a1f +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.5.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.5.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.5.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.5.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.5.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b8d502b06034081d691b1b7e4de84d2c57ef306 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.5.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c68f9f218e65dd0c1d0653c01ffb9f93fc2850b3616abb83e8add9834947f5f1 +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.6.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.6.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e64c987e223963ef3cff80f19a925397a5c50f33 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e864ca8dad4ea2ba47e0cc915699faf1778909b69e0096355cb1430e9daae1cf +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.7.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.7.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.7.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.7.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.7.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8e22f7a9de80b1d37a1ebb36d2be732501f199f2 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.7.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baf375e283f946409266135afa62d680f4af3db3512f1c04826db310d11dd0c7 +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.8.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.8.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.8.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.8.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.8.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..affa87f0055d910e3d67b196a73f7a68e14762b9 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.8.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a388b6f54ef1c81b47b1c60ccdc3acc80b06fcff3e77d247c46ca1754a67edc2 +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/layers.9.mlp/cfg.json b/k32-smoll-exp36-short-seed2/layers.9.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.9.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short-seed2/layers.9.mlp/sae.safetensors b/k32-smoll-exp36-short-seed2/layers.9.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2527b9538743c30de18ebc72340c35c67e88d79 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/layers.9.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54aba46ed7b50b71a4cf58ffe3c0d7277c29e6aa92622109c08c5d1fb518ac15 +size 95637064 diff --git a/k32-smoll-exp36-short-seed2/lr_scheduler.pt b/k32-smoll-exp36-short-seed2/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d556a2dc4ad21f6fe40db78e3ede6b92d0fd670 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e59fc95f3bf7393aa8f959423ced7137037ce96a366e8b43636b5f2a116395e9 +size 1652 diff --git a/k32-smoll-exp36-short-seed2/optimizer.pt b/k32-smoll-exp36-short-seed2/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1239b8dd3b80d3a7cc52be7d0636212bcb83e6d --- /dev/null +++ b/k32-smoll-exp36-short-seed2/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c76685931e68759007934f3bf42f40d6e34ba41151519300acf62ca5a453ddc +size 1457196858 diff --git a/k32-smoll-exp36-short-seed2/state.pt b/k32-smoll-exp36-short-seed2/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..d2e62764cf778045346398c1aa6fc8cde3fd8d48 --- /dev/null +++ b/k32-smoll-exp36-short-seed2/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01a430013055575867f25b65ca10407fad6a6c488cb546480afbdf68796ce90f +size 4983864 diff --git a/k32-smoll-exp36-short/config.json b/k32-smoll-exp36-short/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3266cb3e7f3b2695a2ebefec9c8b2616e8080414 --- /dev/null +++ b/k32-smoll-exp36-short/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.0.mlp", "layers.1.mlp", "layers.2.mlp", "layers.3.mlp", "layers.4.mlp", "layers.5.mlp", "layers.6.mlp", "layers.7.mlp", "layers.8.mlp", "layers.9.mlp", "layers.10.mlp", "layers.11.mlp", "layers.12.mlp", "layers.13.mlp", "layers.14.mlp", "layers.15.mlp", "layers.16.mlp", "layers.17.mlp", "layers.18.mlp", "layers.19.mlp", "layers.20.mlp", "layers.21.mlp", "layers.22.mlp", "layers.23.mlp", "layers.24.mlp", "layers.25.mlp", "layers.26.mlp", "layers.27.mlp", "layers.28.mlp", "layers.29.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k32-smoll-exp36", "wandb_log_frequency": 1, "model": "HuggingFaceTB/SmolLM-135M", "dataset": "HuggingFaceFW/fineweb-edu", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 400000, "resume": false, "finetune": null, "seed": 42, "data_preprocessing_num_proc": 48} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.0.mlp/cfg.json b/k32-smoll-exp36-short/layers.0.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.0.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.0.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.0.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fcf845b9c7a22c1be902874bf2c236aa996c2925 --- /dev/null +++ b/k32-smoll-exp36-short/layers.0.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caa554f706aef79c8e019fe062c4df62b0c898c35e4361cdf97918b84b6fa571 +size 95637064 diff --git a/k32-smoll-exp36-short/layers.1.mlp/cfg.json b/k32-smoll-exp36-short/layers.1.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.1.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.1.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.1.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d937d70c182e7f56462261b2998649f43f1afa19 --- /dev/null +++ b/k32-smoll-exp36-short/layers.1.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4d77c4029b1553b4897e7d2dcf49f440ed3b9902d9767c1f0d22ee6ca76b83b +size 95637064 diff --git a/k32-smoll-exp36-short/layers.10.mlp/cfg.json b/k32-smoll-exp36-short/layers.10.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.10.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.10.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.10.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..02867a9b27fac3fc0c0e6522bfbe745731c0019e --- /dev/null +++ b/k32-smoll-exp36-short/layers.10.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2580ebd1d55733e2ac3a580e2d9c1acf46f31e98fc9c537978c84f27e6cf6f9 +size 95637064 diff --git a/k32-smoll-exp36-short/layers.11.mlp/cfg.json b/k32-smoll-exp36-short/layers.11.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.11.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.11.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.11.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e6314d7631161f8d3b5bd27b04260ac1eb4399c7 --- /dev/null +++ b/k32-smoll-exp36-short/layers.11.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8350813efa63f26ee348f92b96a0201cdcb2dd89dbc94014e89d84188dfe506e +size 95637064 diff --git a/k32-smoll-exp36-short/layers.12.mlp/cfg.json b/k32-smoll-exp36-short/layers.12.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.12.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.12.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.12.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..16d7731c52a33bbeafa9974258ed5d02c7fb35df --- /dev/null +++ b/k32-smoll-exp36-short/layers.12.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd215939b5f1420dd4e401a0a2a32ea067543ae74e264f3e7a33497c93f909a9 +size 95637064 diff --git a/k32-smoll-exp36-short/layers.13.mlp/cfg.json b/k32-smoll-exp36-short/layers.13.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.13.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.13.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.13.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a784cc4851734b797b34f2215d7cc8f353d298cc --- /dev/null +++ b/k32-smoll-exp36-short/layers.13.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3a15e32218b38211f7be9b410981eb1e5db33e9860e69191449d6897081e96e +size 95637064 diff --git a/k32-smoll-exp36-short/layers.14.mlp/cfg.json b/k32-smoll-exp36-short/layers.14.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.14.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.14.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.14.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c1697a506352ab1c9ec53f4a01ff4aa7e63df5d0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.14.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a844828aa4562340c23241e7ef6e171d67c0925069661972e54aaa4b4e4f8596 +size 95637064 diff --git a/k32-smoll-exp36-short/layers.15.mlp/cfg.json b/k32-smoll-exp36-short/layers.15.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.15.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.15.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.15.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1fe316bcfabc72995a9ba31d163bb2406fa5a221 --- /dev/null +++ b/k32-smoll-exp36-short/layers.15.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a361a373c3e5fffeb9e47b068dfa92c563fa77e0e6156cb2c07e77b5ff2fc8c5 +size 95637064 diff --git a/k32-smoll-exp36-short/layers.16.mlp/cfg.json b/k32-smoll-exp36-short/layers.16.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.16.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.16.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.16.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aaa9aa5ab5cf0ba367f92e2ff832717ef58498bd --- /dev/null +++ b/k32-smoll-exp36-short/layers.16.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98ac455d9f5fd0decb11af3cb17d2962df91d9588a0ffeb21afcd7ae30d986f7 +size 95637064 diff --git a/k32-smoll-exp36-short/layers.17.mlp/cfg.json b/k32-smoll-exp36-short/layers.17.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.17.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.17.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.17.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b9f01bfda258de58fd33448c532c783a855cb419 --- /dev/null +++ b/k32-smoll-exp36-short/layers.17.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:772b14efbe144f78171480949296933777669e7b8f320c840295a6f227a42e71 +size 95637064 diff --git a/k32-smoll-exp36-short/layers.18.mlp/cfg.json b/k32-smoll-exp36-short/layers.18.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.18.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.18.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.18.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7542993f2cd6cb3707a2bae38639a5402bf45b5e --- /dev/null +++ b/k32-smoll-exp36-short/layers.18.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7e8f058c3fd8550cd88cc7da5203ce437cec2d8c943e393048a4bd7e1396349 +size 95637064 diff --git a/k32-smoll-exp36-short/layers.19.mlp/cfg.json b/k32-smoll-exp36-short/layers.19.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.19.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.19.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.19.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..178e6220a90ed0c02f90d931e06cfd628e71125f --- /dev/null +++ b/k32-smoll-exp36-short/layers.19.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6e0c18266f120366634e31021fcfd6d14f1b890d0d8c03706763c3bf70171d6 +size 95637064 diff --git a/k32-smoll-exp36-short/layers.2.mlp/cfg.json b/k32-smoll-exp36-short/layers.2.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.2.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.2.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.2.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8aede6122c141c4d1b6dd77c2b7177c6518c3aca --- /dev/null +++ b/k32-smoll-exp36-short/layers.2.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6829c09e142279827466ffa36fcdd1c8c8f72a8f24bf2c1dafe4a4f8fef1cf13 +size 95637064 diff --git a/k32-smoll-exp36-short/layers.20.mlp/cfg.json b/k32-smoll-exp36-short/layers.20.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.20.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.20.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.20.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec69436cfc0657a047ea6a2824c7a0c61e702674 --- /dev/null +++ b/k32-smoll-exp36-short/layers.20.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce569aa5c14036975dc23280321535d5029e831caac69b87a3d8eafb64fb9ae2 +size 95637064 diff --git a/k32-smoll-exp36-short/layers.21.mlp/cfg.json b/k32-smoll-exp36-short/layers.21.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.21.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.21.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.21.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d53838d3c6e4a53574190d6e3a3f3f577e2884d --- /dev/null +++ b/k32-smoll-exp36-short/layers.21.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6eefe52afd6682f649a45f7f0964deab0263107abe83f0bbd8b4e692428fda2 +size 95637064 diff --git a/k32-smoll-exp36-short/layers.22.mlp/cfg.json b/k32-smoll-exp36-short/layers.22.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.22.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.22.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.22.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..21e9d63f3dd143fa4d5f31786fa56937d5052322 --- /dev/null +++ b/k32-smoll-exp36-short/layers.22.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddbf8c03ba8098bf94cf2b9fe79f67ab1852d37f7f7b7ba7311eef7582e3d5e4 +size 95637064 diff --git a/k32-smoll-exp36-short/layers.23.mlp/cfg.json b/k32-smoll-exp36-short/layers.23.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.23.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.23.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.23.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a870f142b8d1a289fd26843a9fa200879227831 --- /dev/null +++ b/k32-smoll-exp36-short/layers.23.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:796701040ed4d08859daf16296ec1b2409c07829dfe32cbe7a9b321d85a92379 +size 95637064 diff --git a/k32-smoll-exp36-short/layers.24.mlp/cfg.json b/k32-smoll-exp36-short/layers.24.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.24.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.24.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.24.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc361e541a123ec3fd7959371ae6ac9941a6bb6e --- /dev/null +++ b/k32-smoll-exp36-short/layers.24.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20e2de71a0f537d462ea8b0b8e0b50e1284bf0907e6930217c75452a9b924452 +size 95637064 diff --git a/k32-smoll-exp36-short/layers.25.mlp/cfg.json b/k32-smoll-exp36-short/layers.25.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.25.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.25.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.25.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..99ea5161e410e92cf750da2f538a3fb7d3b97c6d --- /dev/null +++ b/k32-smoll-exp36-short/layers.25.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f1ef826f73ccc94395cfc062c58c5f1b3c5dfc592882f5952ee563df8d20c70 +size 95637064 diff --git a/k32-smoll-exp36-short/layers.26.mlp/cfg.json b/k32-smoll-exp36-short/layers.26.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.26.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.26.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.26.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb888d6e776ed3431014610a5567b69d297d301b --- /dev/null +++ b/k32-smoll-exp36-short/layers.26.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b82c9362bf189811305bb8105dce2b1ae4b4d5c1e425d5e449b55a9a515fc7be +size 95637064 diff --git a/k32-smoll-exp36-short/layers.27.mlp/cfg.json b/k32-smoll-exp36-short/layers.27.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.27.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.27.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.27.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b6ebd2f271911076933e84e56eff3d01e247304b --- /dev/null +++ b/k32-smoll-exp36-short/layers.27.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a075b07799dd739677dcf5311c0688191ccf39454c258c489890410b0222911d +size 95637064 diff --git a/k32-smoll-exp36-short/layers.28.mlp/cfg.json b/k32-smoll-exp36-short/layers.28.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.28.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.28.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.28.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..607ed335d45f3e5df7d09cc9df999f4faa662730 --- /dev/null +++ b/k32-smoll-exp36-short/layers.28.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:938e6b9b3cf324eb8b1468a1eefe43e38af5e825989fbb0ee78a8071748cc74d +size 95637064 diff --git a/k32-smoll-exp36-short/layers.29.mlp/cfg.json b/k32-smoll-exp36-short/layers.29.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.29.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.29.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.29.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..913c5c6697535d5b9ce35a356c9da25fb534abb3 --- /dev/null +++ b/k32-smoll-exp36-short/layers.29.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3d66131652c0dbc2f7614681f5aef2717b61c75e63808698cadd9d0dfec45df +size 95637064 diff --git a/k32-smoll-exp36-short/layers.3.mlp/cfg.json b/k32-smoll-exp36-short/layers.3.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.3.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.3.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.3.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a2a6f43ca78b35cae545c2a06486ad939d89cc14 --- /dev/null +++ b/k32-smoll-exp36-short/layers.3.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fee9304ceeca3ca953bc7e7d559ceaaf5a9c9213bfd646674f8a11b7cec662a +size 95637064 diff --git a/k32-smoll-exp36-short/layers.4.mlp/cfg.json b/k32-smoll-exp36-short/layers.4.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.4.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.4.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.4.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0751446f87a1dadc8ca6fecd2598819d3815c1ec --- /dev/null +++ b/k32-smoll-exp36-short/layers.4.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e920b093c9bedf34a3c5f4711fa9ac1f9f4d74bb583123513b637ac55b59266b +size 95637064 diff --git a/k32-smoll-exp36-short/layers.5.mlp/cfg.json b/k32-smoll-exp36-short/layers.5.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.5.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.5.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.5.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e53363a6655c43c6c0bea576f6d467413b28ee8 --- /dev/null +++ b/k32-smoll-exp36-short/layers.5.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc72802d9d1d786a075ea4857ccd99b63177387c11b07925d169dc72a4286150 +size 95637064 diff --git a/k32-smoll-exp36-short/layers.6.mlp/cfg.json b/k32-smoll-exp36-short/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.6.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43df95f6de4ad27af9a9e9a92a92c0db105f8b80 --- /dev/null +++ b/k32-smoll-exp36-short/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d2c48f9786d720ae77983f227c268a8b01d4061dc4a8bede6dbe7e8166a9d1e +size 95637064 diff --git a/k32-smoll-exp36-short/layers.7.mlp/cfg.json b/k32-smoll-exp36-short/layers.7.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.7.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.7.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.7.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ade89ec2dc8e1b47b440bcc1902499b8f4014f53 --- /dev/null +++ b/k32-smoll-exp36-short/layers.7.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c85bd85183e86fcb44c6467841cce5307dd9def345b802ea176a09db1396ac1f +size 95637064 diff --git a/k32-smoll-exp36-short/layers.8.mlp/cfg.json b/k32-smoll-exp36-short/layers.8.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.8.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.8.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.8.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57920c0b56b5ebd9fcf5e2e1e755b208cf8853b6 --- /dev/null +++ b/k32-smoll-exp36-short/layers.8.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1564de4e3f8bb8a885e87ef356314b309e90ea54b7a722d41ad1a5677908ed23 +size 95637064 diff --git a/k32-smoll-exp36-short/layers.9.mlp/cfg.json b/k32-smoll-exp36-short/layers.9.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.9.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36-short/layers.9.mlp/sae.safetensors b/k32-smoll-exp36-short/layers.9.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d889ac3814ef069afa73981a3ef5526ec5d363c0 --- /dev/null +++ b/k32-smoll-exp36-short/layers.9.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51d91dffa818d1da591b8b40bd70908bf6fe1007d99f279fb08d8d2ac1a3cf90 +size 95637064 diff --git a/k32-smoll-exp36-short/lr_scheduler.pt b/k32-smoll-exp36-short/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d556a2dc4ad21f6fe40db78e3ede6b92d0fd670 --- /dev/null +++ b/k32-smoll-exp36-short/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e59fc95f3bf7393aa8f959423ced7137037ce96a366e8b43636b5f2a116395e9 +size 1652 diff --git a/k32-smoll-exp36-short/optimizer.pt b/k32-smoll-exp36-short/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f6983f4e7d4132b270699d4cc7072462037268d --- /dev/null +++ b/k32-smoll-exp36-short/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b5ee6bbf6b025a7080d0a04aabc0d61037a31d0be4e0d3fa7a491a38bb6914e +size 1457196858 diff --git a/k32-smoll-exp36-short/state.pt b/k32-smoll-exp36-short/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..d68d454b852c972b13285864452764546ca8d532 --- /dev/null +++ b/k32-smoll-exp36-short/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:545f4543a229c32b41b017d16f87589880ac1d73e153f69623f6aec54578a2e9 +size 4983864 diff --git a/k32-smoll-exp36/config.json b/k32-smoll-exp36/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6a35573a46bcef82fbe2b8539bf0e46f89168baf --- /dev/null +++ b/k32-smoll-exp36/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.11.mlp", "layers.17.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": false, "run_name": "k32-smoll-exp36", "wandb_log_frequency": 1, "model": "HuggingFaceTB/SmolLM-135M", "dataset": "HuggingFaceFW/fineweb-edu", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "finetune": null, "seed": 42, "data_preprocessing_num_proc": 64} \ No newline at end of file diff --git a/k32-smoll-exp36/layers.11.mlp/cfg.json b/k32-smoll-exp36/layers.11.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5aacab2b261661fe08964a4a2bd8b3314ed69a32 --- /dev/null +++ b/k32-smoll-exp36/layers.11.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36/layers.11.mlp/sae.safetensors b/k32-smoll-exp36/layers.11.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5404959ebb473a7ea897978b99876919658059c6 --- /dev/null +++ b/k32-smoll-exp36/layers.11.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5b09146b63d4dc1273222d85e4ec26ba3f0c0a8f0d8a1529c2a1ebc87a4d941 +size 95637064 diff --git a/k32-smoll-exp36/layers.11/cfg.json b/k32-smoll-exp36/layers.11/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36/layers.11/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36/layers.11/sae.safetensors b/k32-smoll-exp36/layers.11/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a32068fd6f4b9a74ac05815b0aa0ee596555a34d --- /dev/null +++ b/k32-smoll-exp36/layers.11/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2033ec564ae10d0bc4d95f8ac3eb4e39715f4bbdc45b910928260b68510d69a +size 95637064 diff --git a/k32-smoll-exp36/layers.15.mlp/cfg.json b/k32-smoll-exp36/layers.15.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5aacab2b261661fe08964a4a2bd8b3314ed69a32 --- /dev/null +++ b/k32-smoll-exp36/layers.15.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36/layers.15.mlp/sae.safetensors b/k32-smoll-exp36/layers.15.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0788ddd5ecb1b343ce065dc4a2666d7e8c5161e2 --- /dev/null +++ b/k32-smoll-exp36/layers.15.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:201bc1a57a38bd06771a90f758362ec4ae2f689ad60b3b11937f6a3017ecd423 +size 95637064 diff --git a/k32-smoll-exp36/layers.15/cfg.json b/k32-smoll-exp36/layers.15/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36/layers.15/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36/layers.15/sae.safetensors b/k32-smoll-exp36/layers.15/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95a2f37c102ec1231d0df54bfd257edf37ae763f --- /dev/null +++ b/k32-smoll-exp36/layers.15/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6d3e247db0236dac9119d3e9c9494d857df3b7636fc8f7aba4c2b9b4622eb7f +size 95637064 diff --git a/k32-smoll-exp36/layers.17.mlp/cfg.json b/k32-smoll-exp36/layers.17.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5aacab2b261661fe08964a4a2bd8b3314ed69a32 --- /dev/null +++ b/k32-smoll-exp36/layers.17.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36/layers.17.mlp/sae.safetensors b/k32-smoll-exp36/layers.17.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf1b79e0be88e2832f685ca703b79311a7383c46 --- /dev/null +++ b/k32-smoll-exp36/layers.17.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e854b3600951d1c2a0ac8b5bf79d2b0b10d5786f1cb40ac4f3c25eb8e53bd58 +size 95637064 diff --git a/k32-smoll-exp36/layers.17/cfg.json b/k32-smoll-exp36/layers.17/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dce8b777e248175e8ae43d67f21d2bb896934f0 --- /dev/null +++ b/k32-smoll-exp36/layers.17/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp36/layers.17/sae.safetensors b/k32-smoll-exp36/layers.17/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b54304635a04914ead2e10fb4bcc80eb2aa3357 --- /dev/null +++ b/k32-smoll-exp36/layers.17/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b3d0b30e750ab02691b9f5eae1eb2acb310f28c1056274b77abf5ad3242defd +size 95637064 diff --git a/k32-smoll-exp36/lr_scheduler.pt b/k32-smoll-exp36/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..251b7970b9b3de1804131b8d3115d5f1bb376932 --- /dev/null +++ b/k32-smoll-exp36/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2df93f792b8a1a78aa0348b3dae03ffe0d47c407c8b4d3fc6da431739a0c789c +size 1076 diff --git a/k32-smoll-exp36/optimizer.pt b/k32-smoll-exp36/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..766f927b40c880416eacbde4901fb5f7fee62f2f --- /dev/null +++ b/k32-smoll-exp36/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:675c8ca93192f270aea7218163c832f1bf93194073e0865b66505739d20db85c +size 382554374 diff --git a/k32-smoll-exp36/state.pt b/k32-smoll-exp36/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..abed4fe60c114e8cd543982ad74b7a28246de7e0 --- /dev/null +++ b/k32-smoll-exp36/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2554f4c61597a3f14bb62af4073fe0c66867b3d9b7466d18cb4e0d5b374d1492 +size 333132 diff --git a/k32-smoll-exp6-seed2/config.json b/k32-smoll-exp6-seed2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..231be6cb1319435686557ec1821652f6d0cf491b --- /dev/null +++ b/k32-smoll-exp6-seed2/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 6, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.15.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": false, "run_name": "k32-smoll-exp6-seed2", "wandb_log_frequency": 1, "model": "HuggingFaceTB/SmolLM-135M", "dataset": "HuggingFaceFW/fineweb-edu", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "finetune": null, "seed": 42, "data_preprocessing_num_proc": 64} \ No newline at end of file diff --git a/k32-smoll-exp6-seed2/layers.15.mlp/cfg.json b/k32-smoll-exp6-seed2/layers.15.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..82022ecc98b1e503e02fe47225451aa8020b3d5c --- /dev/null +++ b/k32-smoll-exp6-seed2/layers.15.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 6, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp6-seed2/layers.15.mlp/sae.safetensors b/k32-smoll-exp6-seed2/layers.15.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..753cd696bd0ea4123cd1d8b3edb105295d29330f --- /dev/null +++ b/k32-smoll-exp6-seed2/layers.15.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:021e8fcdea8653a743bff6bef79da598f9980bcccd228a029ca085bd2197c1e3 +size 15941696 diff --git a/k32-smoll-exp6-seed2/lr_scheduler.pt b/k32-smoll-exp6-seed2/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..195753bac2a73f9433507c8f34c81e98f09c103b --- /dev/null +++ b/k32-smoll-exp6-seed2/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f18429d0c78427c93779b53e0c61b82f4c14405e73b970a5d611bb0761084d30 +size 1012 diff --git a/k32-smoll-exp6-seed2/optimizer.pt b/k32-smoll-exp6-seed2/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3684b1fd5a46654c4b9f608b88384c4ef50abf2 --- /dev/null +++ b/k32-smoll-exp6-seed2/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a871a7d127e343fe33d510e4820ba7796f4bb26334a53642df18720019ee030b +size 31887058 diff --git a/k32-smoll-exp6-seed2/state.pt b/k32-smoll-exp6-seed2/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..3555773520d29f23f09b7fe240d01fdd21f59293 --- /dev/null +++ b/k32-smoll-exp6-seed2/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6720579d1ba2264cfcc9ad4d12272084808324dc17aca2fde6adb01745c32d86 +size 28818 diff --git a/k32-smoll-exp6/config.json b/k32-smoll-exp6/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3bf2a6ad025f527e8955e1579d028de616c1fa35 --- /dev/null +++ b/k32-smoll-exp6/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 6, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.15.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": false, "run_name": "k32-smoll-exp6", "wandb_log_frequency": 1, "model": "HuggingFaceTB/SmolLM-135M", "dataset": "HuggingFaceFW/fineweb-edu", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "finetune": null, "seed": 42, "data_preprocessing_num_proc": 64} \ No newline at end of file diff --git a/k32-smoll-exp6/layers.15.mlp/cfg.json b/k32-smoll-exp6/layers.15.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..82022ecc98b1e503e02fe47225451aa8020b3d5c --- /dev/null +++ b/k32-smoll-exp6/layers.15.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 6, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 576} \ No newline at end of file diff --git a/k32-smoll-exp6/layers.15.mlp/sae.safetensors b/k32-smoll-exp6/layers.15.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..93462e379c50619ba5e7c85dfdb98ec79b7faecd --- /dev/null +++ b/k32-smoll-exp6/layers.15.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fbe277fb395728e8b6ae95f20708e7010a565679dfa10223e39da0bd574a407 +size 15941696 diff --git a/k32-smoll-exp6/lr_scheduler.pt b/k32-smoll-exp6/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..195753bac2a73f9433507c8f34c81e98f09c103b --- /dev/null +++ b/k32-smoll-exp6/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f18429d0c78427c93779b53e0c61b82f4c14405e73b970a5d611bb0761084d30 +size 1012 diff --git a/k32-smoll-exp6/optimizer.pt b/k32-smoll-exp6/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e6fc1243f77868bce688e140f1eb518659f3ff19 --- /dev/null +++ b/k32-smoll-exp6/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6afe5096bfaae842896c0fc79a94b82833eb6314e2e5dac2d61937d61f268a74 +size 31887058 diff --git a/k32-smoll-exp6/state.pt b/k32-smoll-exp6/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..501389d6db8fefa2c0da4909a828f5fd4c3f7572 --- /dev/null +++ b/k32-smoll-exp6/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57fd52ea99b5368ce0829abed44db97542affa29ba359c46ee6833a8af689423 +size 28818 diff --git a/k64-sae-mlp-32k-seed2/config.json b/k64-sae-mlp-32k-seed2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2c6c4415dfe058ac6e921fd81194e01af74db3b4 --- /dev/null +++ b/k64-sae-mlp-32k-seed2/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 64, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.6.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k64-sae-mlp-32k-seed2", "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "/mnt/ssd-1/pile_preshuffled/standard/document.bin", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "finetune": null, "seed": 52, "number_seeds": 1, "data_preprocessing_num_proc": 48} \ No newline at end of file diff --git a/k64-sae-mlp-32k-seed2/layers.6.mlp/cfg.json b/k64-sae-mlp-32k-seed2/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c612cbe8ba8cea87706a7ff623e258c54f06edab --- /dev/null +++ b/k64-sae-mlp-32k-seed2/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 64, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k64-sae-mlp-32k-seed2/layers.6.mlp/sae.safetensors b/k64-sae-mlp-32k-seed2/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..10bcc8b5dcbb0b81f68713bb88a3f1431cdfba1f --- /dev/null +++ b/k64-sae-mlp-32k-seed2/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cdfc22f8e7b69f8aa151719492929044c4431ed4943bc35dc83a4c18baf7f71 +size 201461072 diff --git a/k64-sae-mlp-32k-seed2/lr_scheduler.pt b/k64-sae-mlp-32k-seed2/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..09d44e04064f4b6efd4e262a530222ffda2bae63 --- /dev/null +++ b/k64-sae-mlp-32k-seed2/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a645210450394a692a612289cb5fe097161dfa420f3c634f6516bc67841ac2b4 +size 1012 diff --git a/k64-sae-mlp-32k-seed2/optimizer.pt b/k64-sae-mlp-32k-seed2/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c12905e10d15b5fe2d7339e115cab6a3b599c27 --- /dev/null +++ b/k64-sae-mlp-32k-seed2/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:651ac6aaf09d2836adcb81391ebd5489dd79f0472dd30151f8bf2af97794cb90 +size 102316366 diff --git a/k64-sae-mlp-32k-seed2/state.pt b/k64-sae-mlp-32k-seed2/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..525d0c46223ea392222acd26c66a33cbb515dfdb --- /dev/null +++ b/k64-sae-mlp-32k-seed2/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8be4eddd2c6e55f9e229cd0a16ddba5f4e73c08909c67fdae808675fc1b7a115 +size 263314 diff --git a/k64-sae-mlp-32k/config.json b/k64-sae-mlp-32k/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e6b8698a14adc8c018f90f42c9145bf5f17ceef2 --- /dev/null +++ b/k64-sae-mlp-32k/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 64, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.6.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k64-sae-mlp-32k", "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "/mnt/ssd-1/pile_preshuffled/standard/document.bin", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "finetune": null, "seed": 42, "number_seeds": 1, "data_preprocessing_num_proc": 48} \ No newline at end of file diff --git a/k64-sae-mlp-32k/layers.6.mlp/cfg.json b/k64-sae-mlp-32k/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c612cbe8ba8cea87706a7ff623e258c54f06edab --- /dev/null +++ b/k64-sae-mlp-32k/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 64, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k64-sae-mlp-32k/layers.6.mlp/sae.safetensors b/k64-sae-mlp-32k/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..27da55af5bcfb143450b93be5553ecc5aa7e3770 --- /dev/null +++ b/k64-sae-mlp-32k/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afb8bc8091d9f14ecb4ae57fd8eebd7f667cc85ef21b38205fdf93628b3a9139 +size 201461072 diff --git a/k64-sae-mlp-32k/lr_scheduler.pt b/k64-sae-mlp-32k/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..09d44e04064f4b6efd4e262a530222ffda2bae63 --- /dev/null +++ b/k64-sae-mlp-32k/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a645210450394a692a612289cb5fe097161dfa420f3c634f6516bc67841ac2b4 +size 1012 diff --git a/k64-sae-mlp-32k/optimizer.pt b/k64-sae-mlp-32k/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a6f56195d9ac600008f4097645b964a5d5a8c03 --- /dev/null +++ b/k64-sae-mlp-32k/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7dc8defa5711e78a489fd1b00b6546d44363a64436e96362858563ab1369da5 +size 102316366 diff --git a/k64-sae-mlp-32k/state.pt b/k64-sae-mlp-32k/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a4f67d504d9b0226a85ddfccb1a67d2cb875485 --- /dev/null +++ b/k64-sae-mlp-32k/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c2e92944813b68e7ef5cc4b2f0f3d570c94a670d0d872ef7f5db0d7aac4d22b +size 263314 diff --git a/k64-sae-mlp-4k-seed2/config.json b/k64-sae-mlp-4k-seed2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a958da7b69696d7e5b26d3bc38fd26702ed3c0c7 --- /dev/null +++ b/k64-sae-mlp-4k-seed2/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 4084, "k": 64, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.6.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k64-sae-mlp-4k-seed2", "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "/mnt/ssd-1/pile_preshuffled/standard/document.bin", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "finetune": null, "seed": 42, "number_seeds": 1, "data_preprocessing_num_proc": 48} \ No newline at end of file diff --git a/k64-sae-mlp-4k-seed2/layers.6.mlp/cfg.json b/k64-sae-mlp-4k-seed2/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bf61b3c408a9e63acfbf24aa8a011b24494983ae --- /dev/null +++ b/k64-sae-mlp-4k-seed2/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 4084, "k": 64, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k64-sae-mlp-4k-seed2/layers.6.mlp/sae.safetensors b/k64-sae-mlp-4k-seed2/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8009de748a651d5c409411a704f1fa43022cfb4e --- /dev/null +++ b/k64-sae-mlp-4k-seed2/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:846a92ecd53d744ab5e8005224107d89eb19d76b3b52f81ac8540641335c48dc +size 25111832 diff --git a/k64-sae-mlp-4k-seed2/lr_scheduler.pt b/k64-sae-mlp-4k-seed2/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..67f4a53365c2255fe3d69e453e58b71d1d38c20f --- /dev/null +++ b/k64-sae-mlp-4k-seed2/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d094c2b89e5159477b912d683794fde025bc1ce77bae1a650a8d1fd7d9c5cef +size 1012 diff --git a/k64-sae-mlp-4k-seed2/optimizer.pt b/k64-sae-mlp-4k-seed2/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6113949b9dae542e34ece5be4bba5a3bbcb6ab74 --- /dev/null +++ b/k64-sae-mlp-4k-seed2/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b43281f04e678376eaad747710d232cc6d80594b873314b4ab92b3f2d605c49 +size 12787664 diff --git a/k64-sae-mlp-4k-seed2/state.pt b/k64-sae-mlp-4k-seed2/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5e14432f5d01ba0d9a345e441268e4b1f7af5c2 --- /dev/null +++ b/k64-sae-mlp-4k-seed2/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7a67ba19b20bbe45cb318bf14713fa5d2a6495dedbee09baff2a819af0dd666 +size 33874 diff --git a/k64-sae-mlp-4k/config.json b/k64-sae-mlp-4k/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4e68eb846f32de8b8e7539f4a942b92ba14ad466 --- /dev/null +++ b/k64-sae-mlp-4k/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 4084, "k": 64, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.6.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k64-sae-mlp-4k", "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "/mnt/ssd-1/pile_preshuffled/standard/document.bin", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "finetune": null, "seed": 42, "number_seeds": 1, "data_preprocessing_num_proc": 48} \ No newline at end of file diff --git a/k64-sae-mlp-4k/layers.6.mlp/cfg.json b/k64-sae-mlp-4k/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bf61b3c408a9e63acfbf24aa8a011b24494983ae --- /dev/null +++ b/k64-sae-mlp-4k/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 4084, "k": 64, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/k64-sae-mlp-4k/layers.6.mlp/sae.safetensors b/k64-sae-mlp-4k/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..510cd925ce21cab674dd6715bf23a69a7cd3390a --- /dev/null +++ b/k64-sae-mlp-4k/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d00cab7948d588866a0e6a39a5beb2cc7606648b774eeadd1bfa14cc36a0f97e +size 25111832 diff --git a/k64-sae-mlp-4k/lr_scheduler.pt b/k64-sae-mlp-4k/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..67f4a53365c2255fe3d69e453e58b71d1d38c20f --- /dev/null +++ b/k64-sae-mlp-4k/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d094c2b89e5159477b912d683794fde025bc1ce77bae1a650a8d1fd7d9c5cef +size 1012 diff --git a/k64-sae-mlp-4k/optimizer.pt b/k64-sae-mlp-4k/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bcf94b27a39f0ba930e6cc59e653de42fb525c70 --- /dev/null +++ b/k64-sae-mlp-4k/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87c52408f6d3c911cd6937627e457852ea3d8f565d734779a688bf34f8a07ccd +size 12787664 diff --git a/k64-sae-mlp-4k/state.pt b/k64-sae-mlp-4k/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5e14432f5d01ba0d9a345e441268e4b1f7af5c2 --- /dev/null +++ b/k64-sae-mlp-4k/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7a67ba19b20bbe45cb318bf14713fa5d2a6495dedbee09baff2a819af0dd666 +size 33874 diff --git a/llama-8b-exp32-short-seed2/layers.16.mlp/cfg.json b/llama-8b-exp32-short-seed2/layers.16.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..40cc895d610ecd334f677c690bf82d4f4945664d --- /dev/null +++ b/llama-8b-exp32-short-seed2/layers.16.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 4096} \ No newline at end of file diff --git a/llama-8b-exp32-short-seed2/layers.16.mlp/sae.safetensors b/llama-8b-exp32-short-seed2/layers.16.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0462b31d2f095d73ba33811130955be28e6d8465 --- /dev/null +++ b/llama-8b-exp32-short-seed2/layers.16.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66850bf12816986526642ba07ca35ed71d03850aca610b7a6c3b48fd41275c89 +size 4832444760 diff --git a/llama-8b-exp32-short-seed2/layers.16/cfg.json b/llama-8b-exp32-short-seed2/layers.16/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4b3039290562bf31311cf1ede76751732a2a5ff4 --- /dev/null +++ b/llama-8b-exp32-short-seed2/layers.16/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 4096} \ No newline at end of file diff --git a/llama-8b-exp32-short-seed2/layers.16/sae.safetensors b/llama-8b-exp32-short-seed2/layers.16/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..52dd4d3b68c6bedb909fa9e40e1e9eb222d9e74e --- /dev/null +++ b/llama-8b-exp32-short-seed2/layers.16/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea643e5d6c0cedaec85d998b2d8d9c9fe6821a1c2b49d48feda908c88041c432 +size 4295508312 diff --git a/llama-8b-exp32-short/config.json b/llama-8b-exp32-short/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9c621423060dcfa0d0ab4d1c0edb821217303c26 --- /dev/null +++ b/llama-8b-exp32-short/config.json @@ -0,0 +1 @@ +{"sae": {"expansion_factor": 6, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.16.mlp"], "init_seeds": [0, 1], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": false, "run_name": null, "wandb_log_frequency": 1, "model": "meta-llama/Meta-Llama-3.1-8B", "dataset": "togethercomputer/RedPajama-Data-1T-Sample", "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": null, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 128} \ No newline at end of file diff --git a/llama-8b-exp32-short/layers.16/cfg.json b/llama-8b-exp32-short/layers.16/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4b3039290562bf31311cf1ede76751732a2a5ff4 --- /dev/null +++ b/llama-8b-exp32-short/layers.16/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 4096} \ No newline at end of file diff --git a/llama-8b-exp32-short/layers.16/sae.safetensors b/llama-8b-exp32-short/layers.16/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dc5afa9eea562f4c0115efb12ad1b7b86e262734 --- /dev/null +++ b/llama-8b-exp32-short/layers.16/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85be4c737094efa4bb7b63f91de92a521dbefb505d67ddba508bea7cd3a48cfe +size 4295508312 diff --git a/llama-8b-exp32-short/layers.6.mlp/cfg.json b/llama-8b-exp32-short/layers.6.mlp/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba51bc4beefd90615925438ff54756a7c3161145 --- /dev/null +++ b/llama-8b-exp32-short/layers.6.mlp/cfg.json @@ -0,0 +1 @@ +{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 768} \ No newline at end of file diff --git a/llama-8b-exp32-short/layers.6.mlp/sae.safetensors b/llama-8b-exp32-short/layers.6.mlp/sae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6a53c5ae7e943435513bdfa5ab00252f5aaeedbf --- /dev/null +++ b/llama-8b-exp32-short/layers.6.mlp/sae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84e734c43c7c0c9ca72946cc0f0151c4a1cadbaa8b9d9894458faccf1084232b +size 201461072 diff --git a/llama-8b-exp32-short/lr_scheduler.pt b/llama-8b-exp32-short/lr_scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..52e5722907fd7e4fae449d750d79b80136e16b5c --- /dev/null +++ b/llama-8b-exp32-short/lr_scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e53e11d5822e2a845461169f6371ef22d834466f027506283d63de94b640fa6 +size 1076 diff --git a/llama-8b-exp32-short/optimizer.pt b/llama-8b-exp32-short/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..121c6e9b16dffed0cd2ced8caf945138847b6d78 --- /dev/null +++ b/llama-8b-exp32-short/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88ea74e0ae6729efe3baf0e23fa53a886de6d2aee8babcc7239d8c663f4957a1 +size 3221691654 diff --git a/llama-8b-exp32-short/state.pt b/llama-8b-exp32-short/state.pt new file mode 100644 index 0000000000000000000000000000000000000000..8dce4be0a28795e9eddb9cf14f429b536cecd5e3 --- /dev/null +++ b/llama-8b-exp32-short/state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bc5b517ad87bbf9cce5ea454d4f9b777a38193253d69e84bcb6c2e7820d6b84 +size 394636