adamkarvonen commited on
Commit
eabfa13
·
verified ·
1 Parent(s): 8308b9e

Add files using upload-large-folder tool

Browse files
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:243365f839b4182319013232b2475750ad92068bcd4467950c91383dc5590d32
3
+ size 1208232616
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_0/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainerAprilUpdate",
5
+ "activation_dim": 2304,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.012,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "steps": 244140,
12
+ "decay_start": 195312,
13
+ "seed": 0,
14
+ "device": "cuda:2",
15
+ "layer": 12,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "StandardTrainerNew-google/gemma-2-2b-resid_post_layer_12_trainer_0",
18
+ "submodule_name": "resid_post_layer_12"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 244,
24
+ "ctx_len": 1024,
25
+ "refresh_batch_size": 128,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:2"
28
+ }
29
+ }
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57c34c16b2c40c0f5e02c00caf6cdc012e65681c77bb677634dcca519068a407
3
+ size 1208232616
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_1/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainerAprilUpdate",
5
+ "activation_dim": 2304,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.015,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "steps": 244140,
12
+ "decay_start": 195312,
13
+ "seed": 0,
14
+ "device": "cuda:2",
15
+ "layer": 12,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "StandardTrainerNew-google/gemma-2-2b-resid_post_layer_12_trainer_1",
18
+ "submodule_name": "resid_post_layer_12"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 244,
24
+ "ctx_len": 1024,
25
+ "refresh_batch_size": 128,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:2"
28
+ }
29
+ }
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2500aa1f28537c0e3de0b9533a47f2665183816c7652130d1439fff270f3c284
3
+ size 1208232616
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_2/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainerAprilUpdate",
5
+ "activation_dim": 2304,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.02,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "steps": 244140,
12
+ "decay_start": 195312,
13
+ "seed": 0,
14
+ "device": "cuda:2",
15
+ "layer": 12,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "StandardTrainerNew-google/gemma-2-2b-resid_post_layer_12_trainer_2",
18
+ "submodule_name": "resid_post_layer_12"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 244,
24
+ "ctx_len": 1024,
25
+ "refresh_batch_size": 128,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:2"
28
+ }
29
+ }
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bb94b29cbf921038fea94ab85ca66c19f619f47c8139b9925e48d4e6480f6ac
3
+ size 1208232616
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_3/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainerAprilUpdate",
5
+ "activation_dim": 2304,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.03,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "steps": 244140,
12
+ "decay_start": 195312,
13
+ "seed": 0,
14
+ "device": "cuda:2",
15
+ "layer": 12,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "StandardTrainerNew-google/gemma-2-2b-resid_post_layer_12_trainer_3",
18
+ "submodule_name": "resid_post_layer_12"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 244,
24
+ "ctx_len": 1024,
25
+ "refresh_batch_size": 128,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:2"
28
+ }
29
+ }
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00aa97c3a220aadb85a7e8fec705031c969280c9920250fd95ee34c6d331bf39
3
+ size 1208232616
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_4/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainerAprilUpdate",
5
+ "activation_dim": 2304,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.04,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "steps": 244140,
12
+ "decay_start": 195312,
13
+ "seed": 0,
14
+ "device": "cuda:2",
15
+ "layer": 12,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "StandardTrainerNew-google/gemma-2-2b-resid_post_layer_12_trainer_4",
18
+ "submodule_name": "resid_post_layer_12"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 244,
24
+ "ctx_len": 1024,
25
+ "refresh_batch_size": 128,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:2"
28
+ }
29
+ }
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79506ebf3b1acb9871297dfa2c23b7892a0f58c41cb71a7e359eb39cd0a60b69
3
+ size 1208232616
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_5/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainerAprilUpdate",
5
+ "activation_dim": 2304,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.06,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "steps": 244140,
12
+ "decay_start": 195312,
13
+ "seed": 0,
14
+ "device": "cuda:2",
15
+ "layer": 12,
16
+ "lm_name": "google/gemma-2-2b",
17
+ "wandb_name": "StandardTrainerNew-google/gemma-2-2b-resid_post_layer_12_trainer_5",
18
+ "submodule_name": "resid_post_layer_12"
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 2304,
22
+ "io": "out",
23
+ "n_ctxs": 244,
24
+ "ctx_len": 1024,
25
+ "refresh_batch_size": 128,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:2"
28
+ }
29
+ }