adamkarvonen
/

kl_finetunes

Model card Files Files and versions Community

adamkarvonen commited on Feb 21

Commit

eabfa13

verified ·

1 Parent(s): 8308b9e

Add files using upload-large-folder tool

Browse files

Files changed (12) hide show

gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_0/ae.pt +3 -0
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_0/config.json +29 -0
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_1/ae.pt +3 -0
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_1/config.json +29 -0
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_2/ae.pt +3 -0
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_2/config.json +29 -0
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_3/ae.pt +3 -0
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_3/config.json +29 -0
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_4/ae.pt +3 -0
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_4/config.json +29 -0
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_5/ae.pt +3 -0
gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_5/config.json +29 -0

gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_0/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:243365f839b4182319013232b2475750ad92068bcd4467950c91383dc5590d32
+size 1208232616

gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_0/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 2304,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.012,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:2",
+        "layer": 12,
+        "lm_name": "google/gemma-2-2b",
+        "wandb_name": "StandardTrainerNew-google/gemma-2-2b-resid_post_layer_12_trainer_0",
+        "submodule_name": "resid_post_layer_12"
+    },
+    "buffer": {
+        "d_submodule": 2304,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 128,
+        "out_batch_size": 2048,
+        "device": "cuda:2"
+    }
+}

gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_1/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57c34c16b2c40c0f5e02c00caf6cdc012e65681c77bb677634dcca519068a407
+size 1208232616

gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_1/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 2304,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.015,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:2",
+        "layer": 12,
+        "lm_name": "google/gemma-2-2b",
+        "wandb_name": "StandardTrainerNew-google/gemma-2-2b-resid_post_layer_12_trainer_1",
+        "submodule_name": "resid_post_layer_12"
+    },
+    "buffer": {
+        "d_submodule": 2304,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 128,
+        "out_batch_size": 2048,
+        "device": "cuda:2"
+    }
+}

gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_2/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2500aa1f28537c0e3de0b9533a47f2665183816c7652130d1439fff270f3c284
+size 1208232616

gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_2/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 2304,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.02,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:2",
+        "layer": 12,
+        "lm_name": "google/gemma-2-2b",
+        "wandb_name": "StandardTrainerNew-google/gemma-2-2b-resid_post_layer_12_trainer_2",
+        "submodule_name": "resid_post_layer_12"
+    },
+    "buffer": {
+        "d_submodule": 2304,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 128,
+        "out_batch_size": 2048,
+        "device": "cuda:2"
+    }
+}

gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_3/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8bb94b29cbf921038fea94ab85ca66c19f619f47c8139b9925e48d4e6480f6ac
+size 1208232616

gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_3/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 2304,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.03,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:2",
+        "layer": 12,
+        "lm_name": "google/gemma-2-2b",
+        "wandb_name": "StandardTrainerNew-google/gemma-2-2b-resid_post_layer_12_trainer_3",
+        "submodule_name": "resid_post_layer_12"
+    },
+    "buffer": {
+        "d_submodule": 2304,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 128,
+        "out_batch_size": 2048,
+        "device": "cuda:2"
+    }
+}

gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_4/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:00aa97c3a220aadb85a7e8fec705031c969280c9920250fd95ee34c6d331bf39
+size 1208232616

gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_4/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 2304,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.04,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:2",
+        "layer": 12,
+        "lm_name": "google/gemma-2-2b",
+        "wandb_name": "StandardTrainerNew-google/gemma-2-2b-resid_post_layer_12_trainer_4",
+        "submodule_name": "resid_post_layer_12"
+    },
+    "buffer": {
+        "d_submodule": 2304,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 128,
+        "out_batch_size": 2048,
+        "device": "cuda:2"
+    }
+}

gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_5/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79506ebf3b1acb9871297dfa2c23b7892a0f58c41cb71a7e359eb39cd0a60b69
+size 1208232616

gemma-2-2b_standard_new_width-2pow16_date-0107/resid_post_layer_12/trainer_5/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 2304,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.06,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:2",
+        "layer": 12,
+        "lm_name": "google/gemma-2-2b",
+        "wandb_name": "StandardTrainerNew-google/gemma-2-2b-resid_post_layer_12_trainer_5",
+        "submodule_name": "resid_post_layer_12"
+    },
+    "buffer": {
+        "d_submodule": 2304,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 128,
+        "out_batch_size": 2048,
+        "device": "cuda:2"
+    }
+}