luciaquirke commited on
Commit
e942c28
·
verified ·
1 Parent(s): 67a4247

Add files using upload-large-folder tool

Browse files
config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sae": {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false}, "batch_size": 2, "grad_acc_steps": 4, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.0.mlp", "layers.1.mlp", "layers.2.mlp", "layers.3.mlp", "layers.4.mlp", "layers.5.mlp", "layers.6.mlp", "layers.7.mlp", "layers.8.mlp", "layers.9.mlp", "layers.10.mlp", "layers.11.mlp", "layers.12.mlp", "layers.13.mlp", "layers.14.mlp", "layers.15.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": true, "save_every": 1000, "log_to_wandb": true, "run_name": "sae-DeepSeek-R1-Distill-Qwen-1.5B-65k", "wandb_log_frequency": 1, "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "EleutherAI/fineweb-edu-dedup-10b", "subset": null, "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": null, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 48}
layers.0.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.0.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:170cc27083a99980de0f1b4607327e1a3a480f29ff2bcdb698e251d7ccfbeb71
3
+ size 805574992
layers.1.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.1.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55db1ef46c774e84a8cb5725fde8e400b0924103064d1234f9c2a418149d18c0
3
+ size 805574992
layers.10.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.10.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f3869c5b2d36fcf5c860f8f5327b168d4700abf19087e22e92ca0aa7da0e3dd
3
+ size 805574992
layers.11.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.11.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69567de11e84a64bfbf989d72cf61f7b6af92219030efcfbb215b142ca436b17
3
+ size 805574992
layers.12.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.12.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f053683f1fdbbde05e1377b144e6f8e6f2d768e0e6448d369f269991e16c7050
3
+ size 805574992
layers.13.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.13.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39c9be49563e7e1e7dd36890dd5f3923d95ff399c41d3cd4860826848369e2ee
3
+ size 805574992
layers.14.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.14.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0989fc5b566ab106636762f613464d21c6e29d9ee2b918704ff9f014ae6340b
3
+ size 805574992
layers.15.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.15.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0055be1ded241e33ca5fd04421776ca36f862f1af44a83a776bda6696f06c293
3
+ size 805574992
layers.2.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.2.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f4f1a470bbeff139a3d09f1e194e984a6ea44a48155488e81d1ad3f091b5f25
3
+ size 805574992
layers.3.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.3.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41928002b8e3d51c706db7d566ba67f16fec2b578bf93542b18762ec3b51d5ff
3
+ size 805574992
layers.4.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.4.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:737d422757b908a2df403a54a5282948f231de8ddbeb73b79ed7e75c30729bba
3
+ size 805574992
layers.5.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.5.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:198e9d1079e9eb1e9e960ced28f076f69de5e37d1bce7d7deb5a2074330c48e4
3
+ size 805574992
layers.6.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.6.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e30e882cb50ee4bf28f2e8dd19106c4a047c213de4afe8bc849ada872e70d04c
3
+ size 805574992
layers.7.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.7.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d8a6bb2a37c3515675cb0aedcf5b03c4894bc53b3fc295ca3cdf6690125c5a5
3
+ size 805574992
layers.8.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.8.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6580e5bc10a9910b5e99844235d31f7a605e85f177301c264b4cdda7d6e1107d
3
+ size 805574992
layers.9.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.9.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3c9bb68fd52384c8f74af15d49dbee2c4c1993fef6e482e59708cc2cb25bbd1
3
+ size 805574992
lr_scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf208a9524911862838668e1825fa34dc5e29ef9c92e1eb7f9258ce6cad96710
3
+ size 1076
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8965b87655e439a0800021d7d8c4a6de610a92c10de1cb6362844d62aa4b3a5f
3
+ size 818191552
state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edaae6a6dd3eecca5e933b467c1d0563006870a40881915dbf8ecd7a694420e5
3
+ size 1049996