adamkarvonen commited on
Commit
32ebece
·
verified ·
1 Parent(s): 14c522c

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. eval_results_finetunes/core/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +0 -0
  2. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +323 -0
  3. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +323 -0
  4. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +323 -0
  5. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +323 -0
  6. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +323 -0
  7. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +323 -0
  8. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +323 -0
  9. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +323 -0
  10. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +323 -0
  11. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +323 -0
  12. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +323 -0
  13. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +323 -0
  14. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +323 -0
  15. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +323 -0
  16. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +323 -0
  17. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +323 -0
  18. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +323 -0
  19. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +323 -0
  20. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +323 -0
  21. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +323 -0
  22. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +323 -0
  23. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +323 -0
  24. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +323 -0
  25. eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +323 -0
  26. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +670 -0
  27. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +670 -0
  28. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +670 -0
  29. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +670 -0
  30. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +670 -0
  31. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +670 -0
  32. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +670 -0
  33. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +670 -0
  34. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +670 -0
  35. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +670 -0
  36. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +670 -0
  37. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +670 -0
  38. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +670 -0
  39. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +670 -0
  40. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +670 -0
  41. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +670 -0
  42. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +670 -0
  43. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +670 -0
  44. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +670 -0
  45. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +670 -0
  46. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +670 -0
  47. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +670 -0
  48. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +670 -0
  49. eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +670 -0
  50. eval_results_finetunes/tpp/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +414 -0
eval_results_finetunes/core/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "e6fce60a-4b46-45ef-b09a-7a006f64a2db",
73
+ "datetime_epoch_millis": 1740082963524,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.26737467007277516,
77
+ "scr_metric_threshold_2": 0.08705390331103469,
78
+ "scr_dir2_threshold_2": 0.08346717197493174,
79
+ "scr_dir1_threshold_5": 0.28605510212291974,
80
+ "scr_metric_threshold_5": 0.1372854731615903,
81
+ "scr_dir2_threshold_5": 0.13654972321385472,
82
+ "scr_dir1_threshold_10": 0.2816050435752508,
83
+ "scr_metric_threshold_10": 0.19185470369912228,
84
+ "scr_dir2_threshold_10": 0.18909057577594363,
85
+ "scr_dir1_threshold_20": 0.29103123629885963,
86
+ "scr_metric_threshold_20": 0.25478927649470895,
87
+ "scr_dir2_threshold_20": 0.25422214300007573,
88
+ "scr_dir1_threshold_50": 0.28252876336836275,
89
+ "scr_metric_threshold_50": 0.309279613291791,
90
+ "scr_dir2_threshold_50": 0.2907812511833868,
91
+ "scr_dir1_threshold_100": 0.26254096813606764,
92
+ "scr_metric_threshold_100": 0.3400787679092708,
93
+ "scr_dir2_threshold_100": 0.32139644345582796,
94
+ "scr_dir1_threshold_500": 0.1097068299040769,
95
+ "scr_metric_threshold_500": 0.3246955001770993,
96
+ "scr_dir2_threshold_500": 0.31708000678450293
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.42857088793941894,
103
+ "scr_metric_threshold_2": 0.01474211549298745,
104
+ "scr_dir2_threshold_2": 0.01474211549298745,
105
+ "scr_dir1_threshold_5": 0.555555345309774,
106
+ "scr_metric_threshold_5": 0.022113026790745845,
107
+ "scr_dir2_threshold_5": 0.022113026790745845,
108
+ "scr_dir1_threshold_10": 0.5238097040201937,
109
+ "scr_metric_threshold_10": 0.04176908314890556,
110
+ "scr_dir2_threshold_10": 0.04176908314890556,
111
+ "scr_dir1_threshold_20": 0.555555345309774,
112
+ "scr_metric_threshold_20": 0.05405408176057155,
113
+ "scr_dir2_threshold_20": 0.05405408176057155,
114
+ "scr_dir1_threshold_50": 0.555555345309774,
115
+ "scr_metric_threshold_50": 0.08845210716298338,
116
+ "scr_dir2_threshold_50": 0.08845210716298338,
117
+ "scr_dir1_threshold_100": 0.444444654690226,
118
+ "scr_metric_threshold_100": 0.10565119308855696,
119
+ "scr_dir2_threshold_100": 0.10565119308855696,
120
+ "scr_dir1_threshold_500": 0.3650786592542414,
121
+ "scr_metric_threshold_500": 0.13759224805838266,
122
+ "scr_dir2_threshold_500": 0.13759224805838266
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.2828283193172076,
127
+ "scr_metric_threshold_2": 0.12181298763313322,
128
+ "scr_dir2_threshold_2": 0.12181298763313322,
129
+ "scr_dir1_threshold_5": 0.25252507008062886,
130
+ "scr_metric_threshold_5": 0.1416430991917576,
131
+ "scr_dir2_threshold_5": 0.1416430991917576,
132
+ "scr_dir1_threshold_10": 0.2626259524704024,
133
+ "scr_metric_threshold_10": 0.20396592100070968,
134
+ "scr_dir2_threshold_10": 0.20396592100070968,
135
+ "scr_dir1_threshold_20": 0.24242418769085533,
136
+ "scr_metric_threshold_20": 0.26345608682489086,
137
+ "scr_dir2_threshold_20": 0.26345608682489086,
138
+ "scr_dir1_threshold_50": 0.11111091042202506,
139
+ "scr_metric_threshold_50": 0.33994337051954143,
140
+ "scr_dir2_threshold_50": 0.33994337051954143,
141
+ "scr_dir1_threshold_100": 0.0,
142
+ "scr_metric_threshold_100": 0.38243624962156114,
143
+ "scr_dir2_threshold_100": 0.38243624962156114,
144
+ "scr_dir1_threshold_500": -0.5959598878709939,
145
+ "scr_metric_threshold_500": 0.15297456738930126,
146
+ "scr_dir2_threshold_500": 0.15297456738930126
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.4999995193171997,
151
+ "scr_metric_threshold_2": 0.025252499406211347,
152
+ "scr_dir2_threshold_2": 0.025252499406211347,
153
+ "scr_dir1_threshold_5": 0.5806446340898319,
154
+ "scr_metric_threshold_5": 0.04545455913788811,
155
+ "scr_dir2_threshold_5": 0.04545455913788811,
156
+ "scr_dir1_threshold_10": 0.532258142045613,
157
+ "scr_metric_threshold_10": 0.07323242889813597,
158
+ "scr_dir2_threshold_10": 0.07323242889813597,
159
+ "scr_dir1_threshold_20": 0.5645158034084256,
160
+ "scr_metric_threshold_20": 0.1010101481416146,
161
+ "scr_dir2_threshold_20": 0.1010101481416146,
162
+ "scr_dir1_threshold_50": 0.46774185795438705,
163
+ "scr_metric_threshold_50": 0.1540403667913046,
164
+ "scr_dir2_threshold_50": 0.1540403667913046,
165
+ "scr_dir1_threshold_100": 0.45161302727298075,
166
+ "scr_metric_threshold_100": 0.20959595579503107,
167
+ "scr_dir2_threshold_100": 0.20959595579503107,
168
+ "scr_dir1_threshold_500": 0.17741906022667078,
169
+ "scr_metric_threshold_500": 0.06060602867826107,
170
+ "scr_dir2_threshold_500": 0.06060602867826107
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.3577237427057014,
175
+ "scr_metric_threshold_2": 0.06451600498511631,
176
+ "scr_dir2_threshold_2": 0.06451600498511631,
177
+ "scr_dir1_threshold_5": 0.2926829741063909,
178
+ "scr_metric_threshold_5": 0.12903218476393236,
179
+ "scr_dir2_threshold_5": 0.12903218476393236,
180
+ "scr_dir1_threshold_10": 0.24390263995216122,
181
+ "scr_metric_threshold_10": 0.17008790375159719,
182
+ "scr_dir2_threshold_10": 0.17008790375159719,
183
+ "scr_dir1_threshold_20": 0.13821151212590824,
184
+ "scr_metric_threshold_20": 0.2727272886176091,
185
+ "scr_dir2_threshold_20": 0.2727272886176091,
186
+ "scr_dir1_threshold_50": -0.008129974927287195,
187
+ "scr_metric_threshold_50": 0.3519062126924697,
188
+ "scr_dir2_threshold_50": 0.3519062126924697,
189
+ "scr_dir1_threshold_100": -0.08943069338117211,
190
+ "scr_metric_threshold_100": 0.40175947338154144,
191
+ "scr_dir2_threshold_100": 0.40175947338154144,
192
+ "scr_dir1_threshold_500": -0.6747966415939539,
193
+ "scr_metric_threshold_500": 0.070381207579754,
194
+ "scr_dir2_threshold_500": 0.070381207579754
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.04918033854781611,
199
+ "scr_metric_threshold_2": 0.13671883003552063,
200
+ "scr_dir2_threshold_2": 0.13671883003552063,
201
+ "scr_dir1_threshold_5": 0.021857964433295084,
202
+ "scr_metric_threshold_5": 0.2578126309672156,
203
+ "scr_dir2_threshold_5": 0.2578126309672156,
204
+ "scr_dir1_threshold_10": 0.016393554752069144,
205
+ "scr_metric_threshold_10": 0.42578128637978213,
206
+ "scr_dir2_threshold_10": 0.42578128637978213,
207
+ "scr_dir1_threshold_20": 0.027322374114521025,
208
+ "scr_metric_threshold_20": 0.5351562572759564,
209
+ "scr_dir2_threshold_20": 0.5351562572759564,
210
+ "scr_dir1_threshold_50": 0.1092894964580841,
211
+ "scr_metric_threshold_50": 0.621093800931695,
212
+ "scr_dir2_threshold_50": 0.621093800931695,
213
+ "scr_dir1_threshold_100": 0.01092881936245188,
214
+ "scr_metric_threshold_100": 0.6796874854480871,
215
+ "scr_dir2_threshold_100": 0.6796874854480871,
216
+ "scr_dir1_threshold_500": 0.09836067709563222,
217
+ "scr_metric_threshold_500": 0.7695312281721307,
218
+ "scr_dir2_threshold_500": 0.7695312281721307
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.20512801702675515,
223
+ "scr_metric_threshold_2": 0.06451625307937271,
224
+ "scr_dir2_threshold_2": 0.06451625307937271,
225
+ "scr_dir1_threshold_5": 0.23076901915509954,
226
+ "scr_metric_threshold_5": 0.1008065853511987,
227
+ "scr_dir2_threshold_5": 0.1008065853511987,
228
+ "scr_dir1_threshold_10": 0.2666666055336954,
229
+ "scr_metric_threshold_10": 0.11693564862104187,
230
+ "scr_dir2_threshold_10": 0.11693564862104187,
231
+ "scr_dir1_threshold_20": 0.3179486097903842,
232
+ "scr_metric_threshold_20": 0.18951607282340924,
233
+ "scr_dir2_threshold_20": 0.18951607282340924,
234
+ "scr_dir1_threshold_50": 0.30769233120498896,
235
+ "scr_metric_threshold_50": 0.2862904524424683,
236
+ "scr_dir2_threshold_50": 0.2862904524424683,
237
+ "scr_dir1_threshold_100": 0.4051282004256689,
238
+ "scr_metric_threshold_100": 0.22983885116865949,
239
+ "scr_dir2_threshold_100": 0.22983885116865949,
240
+ "scr_dir1_threshold_500": 0.5230769324819956,
241
+ "scr_metric_threshold_500": 0.463709667728174,
242
+ "scr_dir2_threshold_500": 0.463709667728174
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.2297297224732598,
247
+ "scr_metric_threshold_2": 0.18303572260109305,
248
+ "scr_dir2_threshold_2": 0.18303572260109305,
249
+ "scr_dir1_threshold_5": 0.23423416892600485,
250
+ "scr_metric_threshold_5": 0.28125005820765137,
251
+ "scr_dir2_threshold_5": 0.28125005820765137,
252
+ "scr_dir1_threshold_10": 0.26126111613186265,
253
+ "scr_metric_threshold_10": 0.35714272409679687,
254
+ "scr_dir2_threshold_10": 0.35714272409679687,
255
+ "scr_dir1_threshold_20": 0.3063063861274755,
256
+ "scr_metric_threshold_20": 0.4464286046900865,
257
+ "scr_dir2_threshold_20": 0.4464286046900865,
258
+ "scr_dir1_threshold_50": 0.4594594449465196,
259
+ "scr_metric_threshold_50": 0.37499990021545476,
260
+ "scr_dir2_threshold_50": 0.37499990021545476,
261
+ "scr_dir1_threshold_100": 0.590090003012451,
262
+ "scr_metric_threshold_100": 0.4241072010647942,
263
+ "scr_dir2_threshold_100": 0.4241072010647942,
264
+ "scr_dir1_threshold_500": 0.6711711131194117,
265
+ "scr_metric_threshold_500": 0.6294643272911795,
266
+ "scr_dir2_threshold_500": 0.6294643272911795
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.08583681325484281,
271
+ "scr_metric_threshold_2": 0.08583681325484281,
272
+ "scr_dir2_threshold_2": 0.05714296256601923,
273
+ "scr_dir1_threshold_5": 0.12017164088233277,
274
+ "scr_metric_threshold_5": 0.12017164088233277,
275
+ "scr_dir2_threshold_5": 0.11428564130044823,
276
+ "scr_dir1_threshold_10": 0.1459226336960092,
277
+ "scr_metric_threshold_10": 0.1459226336960092,
278
+ "scr_dir2_threshold_10": 0.12380961031057988,
279
+ "scr_dir1_threshold_20": 0.17596567182353345,
280
+ "scr_metric_threshold_20": 0.17596567182353345,
281
+ "scr_dir2_threshold_20": 0.17142860386646747,
282
+ "scr_dir1_threshold_50": 0.25751069557841055,
283
+ "scr_metric_threshold_50": 0.25751069557841055,
284
+ "scr_dir2_threshold_50": 0.10952379871117751,
285
+ "scr_dir1_threshold_100": 0.2875537337059348,
286
+ "scr_metric_threshold_100": 0.2875537337059348,
287
+ "scr_dir2_threshold_100": 0.13809513807839202,
288
+ "scr_dir1_threshold_500": 0.3133047265196112,
289
+ "scr_metric_threshold_500": 0.3133047265196112,
290
+ "scr_dir2_threshold_500": 0.25238077937884024
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_0",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 16384,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "standard_april_update",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "4719686f-0d7e-4841-bcae-f644bc46795a",
73
+ "datetime_epoch_millis": 1740082518354,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.22266933547319315,
77
+ "scr_metric_threshold_2": 0.0950998553529223,
78
+ "scr_dir2_threshold_2": 0.09634145115775132,
79
+ "scr_dir1_threshold_5": 0.2348706385355691,
80
+ "scr_metric_threshold_5": 0.14317550189243988,
81
+ "scr_dir2_threshold_5": 0.1458425883914526,
82
+ "scr_dir1_threshold_10": 0.24392885587481383,
83
+ "scr_metric_threshold_10": 0.18902067513171725,
84
+ "scr_dir2_threshold_10": 0.19334828237177965,
85
+ "scr_dir1_threshold_20": 0.2334989723800943,
86
+ "scr_metric_threshold_20": 0.24689163259009905,
87
+ "scr_dir2_threshold_20": 0.24936709978647248,
88
+ "scr_dir1_threshold_50": 0.19609493476788953,
89
+ "scr_metric_threshold_50": 0.31694447301902434,
90
+ "scr_dir2_threshold_50": 0.31143149560891936,
91
+ "scr_dir1_threshold_100": 0.20405634997153388,
92
+ "scr_metric_threshold_100": 0.3411981134757655,
93
+ "scr_dir2_threshold_100": 0.3374121058794227,
94
+ "scr_dir1_threshold_500": 0.07297026604682132,
95
+ "scr_metric_threshold_500": 0.3165721209859854,
96
+ "scr_dir2_threshold_500": 0.32355915776745997
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.42857088793941894,
103
+ "scr_metric_threshold_2": 0.017199085925573582,
104
+ "scr_dir2_threshold_2": 0.017199085925573582,
105
+ "scr_dir1_threshold_5": 0.444444654690226,
106
+ "scr_metric_threshold_5": 0.022113026790745845,
107
+ "scr_dir2_threshold_5": 0.022113026790745845,
108
+ "scr_dir1_threshold_10": 0.5396825246649839,
109
+ "scr_metric_threshold_10": 0.02948408453723957,
110
+ "scr_dir2_threshold_10": 0.02948408453723957,
111
+ "scr_dir1_threshold_20": 0.555555345309774,
112
+ "scr_metric_threshold_20": 0.061425139507065275,
113
+ "scr_dir2_threshold_20": 0.061425139507065275,
114
+ "scr_dir1_threshold_50": 0.4920631166245965,
115
+ "scr_metric_threshold_50": 0.08353816629781112,
116
+ "scr_dir2_threshold_50": 0.08353816629781112,
117
+ "scr_dir1_threshold_100": 0.47619029597980633,
118
+ "scr_metric_threshold_100": 0.1081081635211431,
119
+ "scr_dir2_threshold_100": 0.1081081635211431,
120
+ "scr_dir1_threshold_500": 0.3492058386094512,
121
+ "scr_metric_threshold_500": 0.12039316213280908,
122
+ "scr_dir2_threshold_500": 0.12039316213280908
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.2626259524704024,
127
+ "scr_metric_threshold_2": 0.12747880615775103,
128
+ "scr_dir2_threshold_2": 0.12747880615775103,
129
+ "scr_dir1_threshold_5": 0.2323233053010818,
130
+ "scr_metric_threshold_5": 0.17280451009623365,
131
+ "scr_dir2_threshold_5": 0.17280451009623365,
132
+ "scr_dir1_threshold_10": 0.19191917367472955,
133
+ "scr_metric_threshold_10": 0.24929179379088426,
134
+ "scr_dir2_threshold_10": 0.24929179379088426,
135
+ "scr_dir1_threshold_20": 0.17171680682792437,
136
+ "scr_metric_threshold_20": 0.2549574434638101,
137
+ "scr_dir2_threshold_20": 0.2549574434638101,
138
+ "scr_dir1_threshold_50": 0.0,
139
+ "scr_metric_threshold_50": 0.354107663553548,
140
+ "scr_dir2_threshold_50": 0.354107663553548,
141
+ "scr_dir1_threshold_100": 0.040403529559094105,
142
+ "scr_metric_threshold_100": 0.40509918601664846,
143
+ "scr_dir2_threshold_100": 0.40509918601664846,
144
+ "scr_dir1_threshold_500": -0.6262631371075725,
145
+ "scr_metric_threshold_500": 0.15297456738930126,
146
+ "scr_dir2_threshold_500": 0.15297456738930126
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.4999995193171997,
151
+ "scr_metric_threshold_2": 0.03282830943478244,
152
+ "scr_dir2_threshold_2": 0.03282830943478244,
153
+ "scr_dir1_threshold_5": 0.5645158034084256,
154
+ "scr_metric_threshold_5": 0.050505149329191916,
155
+ "scr_dir2_threshold_5": 0.050505149329191916,
156
+ "scr_dir1_threshold_10": 0.5483869727270193,
157
+ "scr_metric_threshold_10": 0.07575764873540326,
158
+ "scr_dir2_threshold_10": 0.07575764873540326,
159
+ "scr_dir1_threshold_20": 0.516128349998606,
160
+ "scr_metric_threshold_20": 0.11363639784472028,
161
+ "scr_dir2_threshold_20": 0.11363639784472028,
162
+ "scr_dir1_threshold_50": 0.46774185795438705,
163
+ "scr_metric_threshold_50": 0.1590909569826084,
164
+ "scr_dir2_threshold_50": 0.1590909569826084,
165
+ "scr_dir1_threshold_100": 0.3064516284091226,
166
+ "scr_metric_threshold_100": 0.06565661886956488,
167
+ "scr_dir2_threshold_100": 0.06565661886956488,
168
+ "scr_dir1_threshold_500": -0.04838745340981958,
169
+ "scr_metric_threshold_500": 0.07828286857267056,
170
+ "scr_dir2_threshold_500": 0.07828286857267056
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.31707338347875896,
175
+ "scr_metric_threshold_2": 0.11436944046788777,
176
+ "scr_dir2_threshold_2": 0.11436944046788777,
177
+ "scr_dir1_threshold_5": 0.2845529991791037,
178
+ "scr_metric_threshold_5": 0.14662744296044594,
179
+ "scr_dir2_threshold_5": 0.14662744296044594,
180
+ "scr_dir1_threshold_10": 0.21951223057979316,
181
+ "scr_metric_threshold_10": 0.19061585064227946,
182
+ "scr_dir2_threshold_10": 0.19061585064227946,
183
+ "scr_dir1_threshold_20": -0.040650359226942455,
184
+ "scr_metric_threshold_20": 0.2668620860229714,
185
+ "scr_dir2_threshold_20": 0.2668620860229714,
186
+ "scr_dir1_threshold_50": -0.16260143690776982,
187
+ "scr_metric_threshold_50": 0.3519062126924697,
188
+ "scr_dir2_threshold_50": 0.3519062126924697,
189
+ "scr_dir1_threshold_100": -0.19512182120742508,
190
+ "scr_metric_threshold_100": 0.44868039496384393,
191
+ "scr_dir2_threshold_100": 0.44868039496384393,
192
+ "scr_dir1_threshold_500": -0.3495932831879077,
193
+ "scr_metric_threshold_500": 0.10850441266694984,
194
+ "scr_dir2_threshold_500": 0.10850441266694984
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.06010915791026799,
199
+ "scr_metric_threshold_2": 0.13671883003552063,
200
+ "scr_dir2_threshold_2": 0.13671883003552063,
201
+ "scr_dir1_threshold_5": 0.04918033854781611,
202
+ "scr_metric_threshold_5": 0.2890624563442615,
203
+ "scr_dir2_threshold_5": 0.2890624563442615,
204
+ "scr_dir1_threshold_10": 0.07650271266233713,
205
+ "scr_metric_threshold_10": 0.3906250291038257,
206
+ "scr_dir2_threshold_10": 0.3906250291038257,
207
+ "scr_dir1_threshold_20": 0.08743153202478901,
208
+ "scr_metric_threshold_20": 0.4843749708961743,
209
+ "scr_dir2_threshold_20": 0.4843749708961743,
210
+ "scr_dir1_threshold_50": 0.00546440968122594,
211
+ "scr_metric_threshold_50": 0.5937499417923486,
212
+ "scr_dir2_threshold_50": 0.5937499417923486,
213
+ "scr_dir1_threshold_100": 0.016393554752069144,
214
+ "scr_metric_threshold_100": 0.6640624563442614,
215
+ "scr_dir2_threshold_100": 0.6640624563442614,
216
+ "scr_dir1_threshold_500": 0.08196712234356307,
217
+ "scr_metric_threshold_500": 0.7304687718278693,
218
+ "scr_dir2_threshold_500": 0.7304687718278693
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.06666642213478169,
223
+ "scr_metric_threshold_2": 0.04838718980952953,
224
+ "scr_dir2_threshold_2": 0.04838718980952953,
225
+ "scr_dir1_threshold_5": 0.1179487320563267,
226
+ "scr_metric_threshold_5": 0.1008065853511987,
227
+ "scr_dir2_threshold_5": 0.1008065853511987,
228
+ "scr_dir1_threshold_10": 0.16410229135546164,
229
+ "scr_metric_threshold_10": 0.12500006008532116,
230
+ "scr_dir2_threshold_10": 0.12500006008532116,
231
+ "scr_dir1_threshold_20": 0.2512818819907463,
232
+ "scr_metric_threshold_20": 0.2056451360932524,
233
+ "scr_dir2_threshold_20": 0.2056451360932524,
234
+ "scr_dir1_threshold_50": 0.27179474482639304,
235
+ "scr_metric_threshold_50": 0.2782258006369044,
236
+ "scr_dir2_threshold_50": 0.2782258006369044,
237
+ "scr_dir1_threshold_100": 0.3179486097903842,
238
+ "scr_metric_threshold_100": 0.2782258006369044,
239
+ "scr_dir2_threshold_100": 0.2782258006369044,
240
+ "scr_dir1_threshold_500": 0.38974347688271976,
241
+ "scr_metric_threshold_500": 0.43951619299405154,
242
+ "scr_dir2_threshold_500": 0.43951619299405154
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.09909916440732847,
247
+ "scr_metric_threshold_2": 0.23660698486494627,
248
+ "scr_dir2_threshold_2": 0.23660698486494627,
249
+ "scr_dir1_threshold_5": 0.1216216651604412,
250
+ "scr_metric_threshold_5": 0.2991072343263093,
251
+ "scr_dir2_threshold_5": 0.2991072343263093,
252
+ "scr_dir1_threshold_10": 0.11261250376556366,
253
+ "scr_metric_threshold_10": 0.35267849659016254,
254
+ "scr_dir2_threshold_10": 0.35267849659016254,
255
+ "scr_dir1_threshold_20": 0.1891891674197794,
256
+ "scr_metric_threshold_20": 0.4508928321967208,
257
+ "scr_dir2_threshold_20": 0.4508928321967208,
258
+ "scr_dir1_threshold_50": 0.2882883318271079,
259
+ "scr_metric_threshold_50": 0.508928588059329,
260
+ "scr_dir2_threshold_50": 0.508928588059329,
261
+ "scr_dir1_threshold_100": 0.4684683378520097,
262
+ "scr_metric_threshold_100": 0.5580356228165478,
263
+ "scr_dir2_threshold_100": 0.5580356228165478,
264
+ "scr_dir1_threshold_500": 0.5810811101069608,
265
+ "scr_metric_threshold_500": 0.6964285381670564,
266
+ "scr_dir2_threshold_500": 0.6964285381670564
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.047210196127387125,
271
+ "scr_metric_threshold_2": 0.047210196127387125,
272
+ "scr_dir2_threshold_2": 0.05714296256601923,
273
+ "scr_dir1_threshold_5": 0.0643776099411321,
274
+ "scr_metric_threshold_5": 0.0643776099411321,
275
+ "scr_dir2_threshold_5": 0.08571430193323373,
276
+ "scr_dir1_threshold_10": 0.09871243756862208,
277
+ "scr_metric_threshold_10": 0.09871243756862208,
278
+ "scr_dir2_threshold_10": 0.1333332954891213,
279
+ "scr_dir1_threshold_20": 0.13733905469607777,
280
+ "scr_metric_threshold_20": 0.13733905469607777,
281
+ "scr_dir2_threshold_20": 0.1571427922670651,
282
+ "scr_dir1_threshold_50": 0.20600845413717558,
283
+ "scr_metric_threshold_50": 0.20600845413717558,
284
+ "scr_dir2_threshold_50": 0.1619046348563358,
285
+ "scr_dir1_threshold_100": 0.20171666463720986,
286
+ "scr_metric_threshold_100": 0.20171666463720986,
287
+ "scr_dir2_threshold_100": 0.17142860386646747,
288
+ "scr_dir1_threshold_500": 0.20600845413717558,
289
+ "scr_metric_threshold_500": 0.20600845413717558,
290
+ "scr_dir2_threshold_500": 0.2619047483889719
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_1",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 16384,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "standard_april_update",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "5a121800-db8c-4d3e-9d4e-1b55f40d9036",
73
+ "datetime_epoch_millis": 1740083111563,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.183637619650288,
77
+ "scr_metric_threshold_2": 0.09466631741659062,
78
+ "scr_dir2_threshold_2": 0.10377115211902242,
79
+ "scr_dir1_threshold_5": 0.18218183309965136,
80
+ "scr_metric_threshold_5": 0.14141795858164735,
81
+ "scr_dir2_threshold_5": 0.15194828397826285,
82
+ "scr_dir1_threshold_10": 0.1989728943351617,
83
+ "scr_metric_threshold_10": 0.1877918743430218,
84
+ "scr_dir2_threshold_10": 0.197543039047675,
85
+ "scr_dir1_threshold_20": 0.18607776053085284,
86
+ "scr_metric_threshold_20": 0.23600972023153102,
87
+ "scr_dir2_threshold_20": 0.25374167258506203,
88
+ "scr_dir1_threshold_50": 0.1454219070820961,
89
+ "scr_metric_threshold_50": 0.2945515730056151,
90
+ "scr_dir2_threshold_50": 0.3086380019079313,
91
+ "scr_dir1_threshold_100": 0.12688973172655868,
92
+ "scr_metric_threshold_100": 0.3377141400492294,
93
+ "scr_dir2_threshold_100": 0.358825957976707,
94
+ "scr_dir1_threshold_500": -0.06518448160046333,
95
+ "scr_metric_threshold_500": 0.32155851770906513,
96
+ "scr_dir2_threshold_500": 0.3385036524130333
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.3809524260050484,
103
+ "scr_metric_threshold_2": 0.01474211549298745,
104
+ "scr_dir2_threshold_2": 0.01474211549298745,
105
+ "scr_dir1_threshold_5": 0.41269806729462877,
106
+ "scr_metric_threshold_5": 0.022113026790745845,
107
+ "scr_dir2_threshold_5": 0.022113026790745845,
108
+ "scr_dir1_threshold_10": 0.42857088793941894,
109
+ "scr_metric_threshold_10": 0.02948408453723957,
110
+ "scr_dir2_threshold_10": 0.02948408453723957,
111
+ "scr_dir1_threshold_20": 0.42857088793941894,
112
+ "scr_metric_threshold_20": 0.04422605358149169,
113
+ "scr_dir2_threshold_20": 0.04422605358149169,
114
+ "scr_dir1_threshold_50": 0.20634856059430595,
115
+ "scr_metric_threshold_50": 0.07125316768614513,
116
+ "scr_dir2_threshold_50": 0.07125316768614513,
117
+ "scr_dir1_threshold_100": 0.2857136099242737,
118
+ "scr_metric_threshold_100": 0.09828013534206324,
119
+ "scr_dir2_threshold_100": 0.09828013534206324,
120
+ "scr_dir1_threshold_500": 0.04761846193437052,
121
+ "scr_metric_threshold_500": 0.1277642198793028,
122
+ "scr_dir2_threshold_500": 0.1277642198793028
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.2323233053010818,
127
+ "scr_metric_threshold_2": 0.12464581246959615,
128
+ "scr_dir2_threshold_2": 0.12464581246959615,
129
+ "scr_dir1_threshold_5": 0.19191917367472955,
130
+ "scr_metric_threshold_5": 0.17280451009623365,
131
+ "scr_dir2_threshold_5": 0.17280451009623365,
132
+ "scr_dir1_threshold_10": 0.19191917367472955,
133
+ "scr_metric_threshold_10": 0.2096317395253275,
134
+ "scr_dir2_threshold_10": 0.2096317395253275,
135
+ "scr_dir1_threshold_20": 0.1818176892176979,
136
+ "scr_metric_threshold_20": 0.26628891166135377,
137
+ "scr_dir2_threshold_20": 0.26628891166135377,
138
+ "scr_dir1_threshold_50": 0.12121179281179859,
139
+ "scr_metric_threshold_50": 0.3427761953560044,
140
+ "scr_dir2_threshold_50": 0.3427761953560044,
141
+ "scr_dir1_threshold_100": -0.030303249236578716,
142
+ "scr_metric_threshold_100": 0.3767705999486353,
143
+ "scr_dir2_threshold_100": 0.3767705999486353,
144
+ "scr_dir1_threshold_500": -0.47474809505919524,
145
+ "scr_metric_threshold_500": 0.19830027132778386,
146
+ "scr_dir2_threshold_500": 0.19830027132778386
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.48387068863579336,
151
+ "scr_metric_threshold_2": 0.03535352927204973,
152
+ "scr_dir2_threshold_2": 0.03535352927204973,
153
+ "scr_dir1_threshold_5": 0.4999995193171997,
154
+ "scr_metric_threshold_5": 0.0530303691664592,
155
+ "scr_dir2_threshold_5": 0.0530303691664592,
156
+ "scr_dir1_threshold_10": 0.48387068863579336,
157
+ "scr_metric_threshold_10": 0.06818183870683217,
158
+ "scr_dir2_threshold_10": 0.06818183870683217,
159
+ "scr_dir1_threshold_20": 0.516128349998606,
160
+ "scr_metric_threshold_20": 0.12121220787329137,
161
+ "scr_dir2_threshold_20": 0.12121220787329137,
162
+ "scr_dir1_threshold_50": 0.43548323522597376,
163
+ "scr_metric_threshold_50": 0.16161617681987567,
164
+ "scr_dir2_threshold_50": 0.16161617681987567,
165
+ "scr_dir1_threshold_100": 0.24193534431789668,
166
+ "scr_metric_threshold_100": 0.23484845520124242,
167
+ "scr_dir2_threshold_100": 0.23484845520124242,
168
+ "scr_dir1_threshold_500": -0.06451628409122591,
169
+ "scr_metric_threshold_500": 0.11111117800745299,
170
+ "scr_dir2_threshold_500": 0.11111117800745299
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.2845529991791037,
175
+ "scr_metric_threshold_2": 0.13782990125903902,
176
+ "scr_dir2_threshold_2": 0.13782990125903902,
177
+ "scr_dir1_threshold_5": 0.23577218043436754,
178
+ "scr_metric_threshold_5": 0.17888562024670382,
179
+ "scr_dir2_threshold_5": 0.17888562024670382,
180
+ "scr_dir1_threshold_10": 0.22764220550708036,
181
+ "scr_metric_threshold_10": 0.23167156962994426,
182
+ "scr_dir2_threshold_10": 0.23167156962994426,
183
+ "scr_dir1_threshold_20": -0.032520384299655265,
184
+ "scr_metric_threshold_20": 0.29618757461506057,
185
+ "scr_dir2_threshold_20": 0.29618757461506057,
186
+ "scr_dir1_threshold_50": -0.24390215536165474,
187
+ "scr_metric_threshold_50": 0.36070375439387664,
188
+ "scr_dir2_threshold_50": 0.36070375439387664,
189
+ "scr_dir1_threshold_100": -0.21138177106199948,
190
+ "scr_metric_threshold_100": 0.46041062535941957,
191
+ "scr_dir2_threshold_100": 0.46041062535941957,
192
+ "scr_dir1_threshold_500": -0.5609755388404137,
193
+ "scr_metric_threshold_500": 0.18475064804764177,
194
+ "scr_dir2_threshold_500": 0.18475064804764177
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.021857964433295084,
199
+ "scr_metric_threshold_2": 0.125,
200
+ "scr_dir2_threshold_2": 0.125,
201
+ "scr_dir1_threshold_5": -0.027322374114521025,
202
+ "scr_metric_threshold_5": 0.23437497089617432,
203
+ "scr_dir2_threshold_5": 0.23437497089617432,
204
+ "scr_dir1_threshold_10": -0.021857964433295084,
205
+ "scr_metric_threshold_10": 0.3554687718278693,
206
+ "scr_dir2_threshold_10": 0.3554687718278693,
207
+ "scr_dir1_threshold_20": 0.0,
208
+ "scr_metric_threshold_20": 0.4921876018633899,
209
+ "scr_dir2_threshold_20": 0.4921876018633899,
210
+ "scr_dir1_threshold_50": -0.00546440968122594,
211
+ "scr_metric_threshold_50": 0.5703125145519129,
212
+ "scr_dir2_threshold_50": 0.5703125145519129,
213
+ "scr_dir1_threshold_100": 0.021857964433295084,
214
+ "scr_metric_threshold_100": 0.625,
215
+ "scr_dir2_threshold_100": 0.625,
216
+ "scr_dir1_threshold_500": -0.00546440968122594,
217
+ "scr_metric_threshold_500": 0.703124912688523,
218
+ "scr_dir2_threshold_500": 0.703124912688523
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.025641002128344394,
223
+ "scr_metric_threshold_2": 0.05241939554166917,
224
+ "scr_dir2_threshold_2": 0.05241939554166917,
225
+ "scr_dir1_threshold_5": 0.05128200425668879,
226
+ "scr_metric_threshold_5": 0.1008065853511987,
227
+ "scr_dir2_threshold_5": 0.1008065853511987,
228
+ "scr_dir1_threshold_10": 0.1179487320563267,
229
+ "scr_metric_threshold_10": 0.1370969176230247,
230
+ "scr_dir2_threshold_10": 0.1370969176230247,
231
+ "scr_dir1_threshold_20": 0.18461515419110838,
232
+ "scr_metric_threshold_20": 0.16935480382142643,
233
+ "scr_dir2_threshold_20": 0.16935480382142643,
234
+ "scr_dir1_threshold_50": 0.24102560340535104,
235
+ "scr_metric_threshold_50": 0.23387105690079912,
236
+ "scr_dir2_threshold_50": 0.23387105690079912,
237
+ "scr_dir1_threshold_100": 0.2666666055336954,
238
+ "scr_metric_threshold_100": 0.2500001201706423,
239
+ "scr_dir2_threshold_100": 0.2500001201706423,
240
+ "scr_dir1_threshold_500": 0.30769233120498896,
241
+ "scr_metric_threshold_500": 0.4838709367301568,
242
+ "scr_dir2_threshold_500": 0.4838709367301568
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.027026947205857795,
247
+ "scr_metric_threshold_2": 0.2544641609836042,
248
+ "scr_dir2_threshold_2": 0.2544641609836042,
249
+ "scr_dir1_threshold_5": 0.06306305580659313,
250
+ "scr_metric_threshold_5": 0.3392855479781389,
251
+ "scr_dir2_threshold_5": 0.3392855479781389,
252
+ "scr_dir1_threshold_10": 0.10360361086007354,
253
+ "scr_metric_threshold_10": 0.41071425245277066,
254
+ "scr_dir2_threshold_10": 0.41071425245277066,
255
+ "scr_dir1_threshold_20": 0.1756755595721568,
256
+ "scr_metric_threshold_20": 0.46428578080874444,
257
+ "scr_dir2_threshold_20": 0.46428578080874444,
258
+ "scr_dir1_threshold_50": 0.29279277827985295,
259
+ "scr_metric_threshold_50": 0.4999998669539397,
260
+ "scr_dir2_threshold_50": 0.4999998669539397,
261
+ "scr_dir1_threshold_100": 0.3333333333333333,
262
+ "scr_metric_threshold_100": 0.5491071678032792,
263
+ "scr_dir2_threshold_100": 0.5491071678032792,
264
+ "scr_dir1_threshold_500": 0.1216216651604412,
265
+ "scr_metric_threshold_500": 0.6562499584231062,
266
+ "scr_dir2_threshold_500": 0.6562499584231062
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.012875624313779262,
271
+ "scr_metric_threshold_2": 0.012875624313779262,
272
+ "scr_dir2_threshold_2": 0.08571430193323373,
273
+ "scr_dir1_threshold_5": 0.030043038127524242,
274
+ "scr_metric_threshold_5": 0.030043038127524242,
275
+ "scr_dir2_threshold_5": 0.11428564130044823,
276
+ "scr_dir1_threshold_10": 0.060085820441166386,
277
+ "scr_metric_threshold_10": 0.060085820441166386,
278
+ "scr_dir2_threshold_10": 0.13809513807839202,
279
+ "scr_dir1_threshold_20": 0.03433482762748996,
280
+ "scr_metric_threshold_20": 0.03433482762748996,
281
+ "scr_dir2_threshold_20": 0.17619044645573817,
282
+ "scr_dir1_threshold_50": 0.11587985138236706,
283
+ "scr_metric_threshold_50": 0.11587985138236706,
284
+ "scr_dir2_threshold_50": 0.22857128260089646,
285
+ "scr_dir1_threshold_100": 0.10729601656855352,
286
+ "scr_metric_threshold_100": 0.10729601656855352,
287
+ "scr_dir2_threshold_100": 0.2761905599883743,
288
+ "scr_dir1_threshold_500": 0.10729601656855352,
289
+ "scr_metric_threshold_500": 0.10729601656855352,
290
+ "scr_dir2_threshold_500": 0.24285709420029883
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_2",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 16384,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "standard_april_update",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "abf2cf7d-99b8-40f2-930e-cf4d9a4a66b0",
73
+ "datetime_epoch_millis": 1740083259968,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.13531781556751246,
77
+ "scr_metric_threshold_2": 0.09607065744564652,
78
+ "scr_dir2_threshold_2": 0.09921546099599182,
79
+ "scr_dir1_threshold_5": 0.19140089806289062,
80
+ "scr_metric_threshold_5": 0.13960420664064727,
81
+ "scr_dir2_threshold_5": 0.14411574424901313,
82
+ "scr_dir1_threshold_10": 0.20292778467294417,
83
+ "scr_metric_threshold_10": 0.18089129135676427,
84
+ "scr_dir2_threshold_10": 0.18623312131239023,
85
+ "scr_dir1_threshold_20": 0.1878010365018607,
86
+ "scr_metric_threshold_20": 0.22001189820413825,
87
+ "scr_dir2_threshold_20": 0.22921893226369516,
88
+ "scr_dir1_threshold_50": 0.18834599243051853,
89
+ "scr_metric_threshold_50": 0.2630961197206862,
90
+ "scr_dir2_threshold_50": 0.27348596097217376,
91
+ "scr_dir1_threshold_100": 0.09007640039318705,
92
+ "scr_metric_threshold_100": 0.29354709832153386,
93
+ "scr_dir2_threshold_100": 0.30173225270792664,
94
+ "scr_dir1_threshold_500": -0.07302779195784508,
95
+ "scr_metric_threshold_500": 0.3027060667728086,
96
+ "scr_dir2_threshold_500": 0.32245108318658156
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.2857136099242737,
103
+ "scr_metric_threshold_2": 0.0024571168813214595,
104
+ "scr_dir2_threshold_2": 0.0024571168813214595,
105
+ "scr_dir1_threshold_5": 0.3492058386094512,
106
+ "scr_metric_threshold_5": 0.01474211549298745,
107
+ "scr_dir2_threshold_5": 0.01474211549298745,
108
+ "scr_dir1_threshold_10": 0.3650786592542414,
109
+ "scr_metric_threshold_10": 0.027027114104653437,
110
+ "scr_dir2_threshold_10": 0.027027114104653437,
111
+ "scr_dir1_threshold_20": 0.3968252466498386,
112
+ "scr_metric_threshold_20": 0.04422605358149169,
113
+ "scr_dir2_threshold_20": 0.04422605358149169,
114
+ "scr_dir1_threshold_50": 0.333333017964661,
115
+ "scr_metric_threshold_50": 0.06879605080482366,
116
+ "scr_dir2_threshold_50": 0.06879605080482366,
117
+ "scr_dir1_threshold_100": 0.333333017964661,
118
+ "scr_metric_threshold_100": 0.07616710855131739,
119
+ "scr_dir2_threshold_100": 0.07616710855131739,
120
+ "scr_dir1_threshold_500": 0.11111069061954806,
121
+ "scr_metric_threshold_500": 0.12285013256539522,
122
+ "scr_dir2_threshold_500": 0.12285013256539522
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.1313126752015721,
127
+ "scr_metric_threshold_2": 0.1671388604233078,
128
+ "scr_dir2_threshold_2": 0.1671388604233078,
129
+ "scr_dir1_threshold_5": 0.17171680682792437,
130
+ "scr_metric_threshold_5": 0.21246456436179043,
131
+ "scr_dir2_threshold_5": 0.21246456436179043,
132
+ "scr_dir1_threshold_10": 0.1818176892176979,
133
+ "scr_metric_threshold_10": 0.2691217364978167,
134
+ "scr_dir2_threshold_10": 0.2691217364978167,
135
+ "scr_dir1_threshold_20": 0.16161592443815084,
136
+ "scr_metric_threshold_20": 0.30878195961506544,
137
+ "scr_dir2_threshold_20": 0.30878195961506544,
138
+ "scr_dir1_threshold_50": 0.17171680682792437,
139
+ "scr_metric_threshold_50": 0.3456090201924673,
140
+ "scr_dir2_threshold_50": 0.3456090201924673,
141
+ "scr_dir1_threshold_100": -0.31313156855378627,
142
+ "scr_metric_threshold_100": 0.3569404883900109,
143
+ "scr_dir2_threshold_100": 0.3569404883900109,
144
+ "scr_dir1_threshold_500": -1.1313138793360884,
145
+ "scr_metric_threshold_500": 0.27478755502243446,
146
+ "scr_dir2_threshold_500": 0.27478755502243446
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.258064174999303,
151
+ "scr_metric_threshold_2": 0.025252499406211347,
152
+ "scr_dir2_threshold_2": 0.025252499406211347,
153
+ "scr_dir1_threshold_5": 0.37096695113474787,
154
+ "scr_metric_threshold_5": 0.04545455913788811,
155
+ "scr_dir2_threshold_5": 0.04545455913788811,
156
+ "scr_dir1_threshold_10": 0.3870967431817548,
157
+ "scr_metric_threshold_10": 0.06818183870683217,
158
+ "scr_dir2_threshold_10": 0.06818183870683217,
159
+ "scr_dir1_threshold_20": 0.3225804590905289,
160
+ "scr_metric_threshold_20": 0.09090911827577622,
161
+ "scr_dir2_threshold_20": 0.09090911827577622,
162
+ "scr_dir1_threshold_50": 0.29032183636211567,
163
+ "scr_metric_threshold_50": 0.13383845757639704,
164
+ "scr_dir2_threshold_50": 0.13383845757639704,
165
+ "scr_dir1_threshold_100": 0.14516043749825752,
166
+ "scr_metric_threshold_100": 0.17171720668571405,
167
+ "scr_dir2_threshold_100": 0.17171720668571405,
168
+ "scr_dir1_threshold_500": -0.14516139886385812,
169
+ "scr_metric_threshold_500": 0.16919198684844677,
170
+ "scr_dir2_threshold_500": 0.16919198684844677
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.12195107768082737,
175
+ "scr_metric_threshold_2": 0.17302041765206616,
176
+ "scr_dir2_threshold_2": 0.17302041765206616,
177
+ "scr_dir1_threshold_5": 0.20325228072521878,
178
+ "scr_metric_threshold_5": 0.20234608103785506,
179
+ "scr_dir2_threshold_5": 0.20234608103785506,
180
+ "scr_dir1_threshold_10": 0.19512182120742508,
181
+ "scr_metric_threshold_10": 0.24926682782645782,
182
+ "scr_dir2_threshold_10": 0.24926682782645782,
183
+ "scr_dir1_threshold_20": 0.02439040937236807,
184
+ "scr_metric_threshold_20": 0.2639295721225024,
185
+ "scr_dir2_threshold_20": 0.2639295721225024,
186
+ "scr_dir1_threshold_50": 0.0,
187
+ "scr_metric_threshold_50": 0.3108504937048049,
188
+ "scr_dir2_threshold_50": 0.3108504937048049,
189
+ "scr_dir1_threshold_100": -0.11382110275354018,
190
+ "scr_metric_threshold_100": 0.39296193168013455,
191
+ "scr_dir2_threshold_100": 0.39296193168013455,
192
+ "scr_dir1_threshold_500": -0.24390215536165474,
193
+ "scr_metric_threshold_500": 0.30205277720969825,
194
+ "scr_dir2_threshold_500": 0.30205277720969825
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.04918033854781611,
199
+ "scr_metric_threshold_2": 0.09375017462295412,
200
+ "scr_dir2_threshold_2": 0.09375017462295412,
201
+ "scr_dir1_threshold_5": 0.05464474822904205,
202
+ "scr_metric_threshold_5": 0.18750011641530276,
203
+ "scr_dir2_threshold_5": 0.18750011641530276,
204
+ "scr_dir1_threshold_10": 0.06557389329988525,
205
+ "scr_metric_threshold_10": 0.31250011641530273,
206
+ "scr_dir2_threshold_10": 0.31250011641530273,
207
+ "scr_dir1_threshold_20": 0.06010915791026799,
208
+ "scr_metric_threshold_20": 0.42968748544808716,
209
+ "scr_dir2_threshold_20": 0.42968748544808716,
210
+ "scr_dir1_threshold_50": 0.00546440968122594,
211
+ "scr_metric_threshold_50": 0.5273438591393463,
212
+ "scr_dir2_threshold_50": 0.5273438591393463,
213
+ "scr_dir1_threshold_100": -0.01092881936245188,
214
+ "scr_metric_threshold_100": 0.5,
215
+ "scr_dir2_threshold_100": 0.5,
216
+ "scr_dir1_threshold_500": 0.11475423184770137,
217
+ "scr_metric_threshold_500": 0.5859375436557386,
218
+ "scr_dir2_threshold_500": 0.5859375436557386
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.07692300638503319,
223
+ "scr_metric_threshold_2": 0.07258066454365199,
224
+ "scr_dir2_threshold_2": 0.07258066454365199,
225
+ "scr_dir1_threshold_5": 0.16410229135546164,
226
+ "scr_metric_threshold_5": 0.10887099681547797,
227
+ "scr_dir2_threshold_5": 0.10887099681547797,
228
+ "scr_dir1_threshold_10": 0.18974359914866223,
229
+ "scr_metric_threshold_10": 0.10483879108333834,
230
+ "scr_dir2_threshold_10": 0.10483879108333834,
231
+ "scr_dir1_threshold_20": 0.2358974641126534,
232
+ "scr_metric_threshold_20": 0.1491935348194436,
233
+ "scr_dir2_threshold_20": 0.1491935348194436,
234
+ "scr_dir1_threshold_50": 0.2358974641126534,
235
+ "scr_metric_threshold_50": 0.19758072462897314,
236
+ "scr_dir2_threshold_50": 0.19758072462897314,
237
+ "scr_dir1_threshold_100": 0.28717946836934216,
238
+ "scr_metric_threshold_100": 0.2500001201706423,
239
+ "scr_dir2_threshold_100": 0.2500001201706423,
240
+ "scr_dir1_threshold_500": 0.32307674908308187,
241
+ "scr_metric_threshold_500": 0.3991936549900859,
242
+ "scr_dir2_threshold_500": 0.3991936549900859
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.10360361086007354,
247
+ "scr_metric_threshold_2": 0.17857149509445872,
248
+ "scr_dir2_threshold_2": 0.17857149509445872,
249
+ "scr_dir1_threshold_5": 0.148648612366299,
250
+ "scr_metric_threshold_5": 0.2767855646088965,
251
+ "scr_dir2_threshold_5": 0.2767855646088965,
252
+ "scr_dir1_threshold_10": 0.15315305881904406,
253
+ "scr_metric_threshold_10": 0.33035709296487026,
254
+ "scr_dir2_threshold_10": 0.33035709296487026,
255
+ "scr_dir1_threshold_20": 0.19369361387252446,
256
+ "scr_metric_threshold_20": 0.3660714452021861,
257
+ "scr_dir2_threshold_20": 0.3660714452021861,
258
+ "scr_dir1_threshold_50": 0.31981972548571075,
259
+ "scr_metric_threshold_50": 0.37053567270882043,
260
+ "scr_dir2_threshold_50": 0.37053567270882043,
261
+ "scr_dir1_threshold_100": 0.2297297224732598,
262
+ "scr_metric_threshold_100": 0.43749988358469727,
263
+ "scr_dir2_threshold_100": 0.43749988358469727,
264
+ "scr_dir1_threshold_500": 0.29279277827985295,
265
+ "scr_metric_threshold_500": 0.4732142358220131,
266
+ "scr_dir2_threshold_500": 0.4732142358220131
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.05579403094120067,
271
+ "scr_metric_threshold_2": 0.05579403094120067,
272
+ "scr_dir2_threshold_2": 0.08095245934396302,
273
+ "scr_dir1_threshold_5": 0.06866965525497992,
274
+ "scr_metric_threshold_5": 0.06866965525497992,
275
+ "scr_dir2_threshold_5": 0.10476195612190681,
276
+ "scr_dir1_threshold_10": 0.08583681325484281,
277
+ "scr_metric_threshold_10": 0.08583681325484281,
278
+ "scr_dir2_threshold_10": 0.12857145289985059,
279
+ "scr_dir1_threshold_20": 0.10729601656855352,
280
+ "scr_metric_threshold_20": 0.10729601656855352,
281
+ "scr_dir2_threshold_20": 0.18095228904500887,
282
+ "scr_dir1_threshold_50": 0.15021467900985702,
283
+ "scr_metric_threshold_50": 0.15021467900985702,
284
+ "scr_dir2_threshold_50": 0.2333334090217574,
285
+ "scr_dir1_threshold_100": 0.16309004750975417,
286
+ "scr_metric_threshold_100": 0.16309004750975417,
287
+ "scr_dir2_threshold_100": 0.22857128260089646,
288
+ "scr_dir1_threshold_500": 0.09442064806865635,
289
+ "scr_metric_threshold_500": 0.09442064806865635,
290
+ "scr_dir2_threshold_500": 0.25238077937884024
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_3",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 16384,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "standard_april_update",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "80b9520b-e291-4a4d-b46f-3f2bf20d50b3",
73
+ "datetime_epoch_millis": 1740082815106,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.13500621635387275,
77
+ "scr_metric_threshold_2": 0.07620098979953326,
78
+ "scr_dir2_threshold_2": 0.08250590731321146,
79
+ "scr_dir1_threshold_5": 0.1654151462479824,
80
+ "scr_metric_threshold_5": 0.11013616987779451,
81
+ "scr_dir2_threshold_5": 0.11959354789941858,
82
+ "scr_dir1_threshold_10": 0.16883747448802375,
83
+ "scr_metric_threshold_10": 0.1517570879734312,
84
+ "scr_dir2_threshold_10": 0.16371293604096565,
85
+ "scr_dir1_threshold_20": 0.15291293626476304,
86
+ "scr_metric_threshold_20": 0.1890154234781714,
87
+ "scr_dir2_threshold_20": 0.2019190486907705,
88
+ "scr_dir1_threshold_50": 0.14393894061181473,
89
+ "scr_metric_threshold_50": 0.24235281750516754,
90
+ "scr_dir2_threshold_50": 0.24822346419068866,
91
+ "scr_dir1_threshold_100": 0.04156931816221307,
92
+ "scr_metric_threshold_100": 0.2582905262149958,
93
+ "scr_dir2_threshold_100": 0.28013036967692434,
94
+ "scr_dir1_threshold_500": -0.0589299254074618,
95
+ "scr_metric_threshold_500": 0.23596095536001083,
96
+ "scr_dir2_threshold_500": 0.2646575713939985
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.2857136099242737,
103
+ "scr_metric_threshold_2": -0.0024569704325861324,
104
+ "scr_dir2_threshold_2": -0.0024569704325861324,
105
+ "scr_dir1_threshold_5": 0.3492058386094512,
106
+ "scr_metric_threshold_5": 0.03439802540241183,
107
+ "scr_dir2_threshold_5": 0.03439802540241183,
108
+ "scr_dir1_threshold_10": 0.3809524260050484,
109
+ "scr_metric_threshold_10": 0.04668317046281315,
110
+ "scr_dir2_threshold_10": 0.04668317046281315,
111
+ "scr_dir1_threshold_20": 0.3809524260050484,
112
+ "scr_metric_threshold_20": 0.05159711132798542,
113
+ "scr_dir2_threshold_20": 0.05159711132798542,
114
+ "scr_dir1_threshold_50": 0.42857088793941894,
115
+ "scr_metric_threshold_50": 0.061425139507065275,
116
+ "scr_dir2_threshold_50": 0.061425139507065275,
117
+ "scr_dir1_threshold_100": 0.2698407892794835,
118
+ "scr_metric_threshold_100": 0.07371013811873126,
119
+ "scr_dir2_threshold_100": 0.07371013811873126,
120
+ "scr_dir1_threshold_500": -0.01587376675080703,
121
+ "scr_metric_threshold_500": 0.11547922126763682,
122
+ "scr_dir2_threshold_500": 0.11547922126763682
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.1818176892176979,
127
+ "scr_metric_threshold_2": 0.13314445583067688,
128
+ "scr_dir2_threshold_2": 0.13314445583067688,
129
+ "scr_dir1_threshold_5": 0.19191917367472955,
130
+ "scr_metric_threshold_5": 0.15014157370114636,
131
+ "scr_dir2_threshold_5": 0.15014157370114636,
132
+ "scr_dir1_threshold_10": 0.20202005606450307,
133
+ "scr_metric_threshold_10": 0.20679891468886458,
134
+ "scr_dir2_threshold_10": 0.20679891468886458,
135
+ "scr_dir1_threshold_20": 0.1818176892176979,
136
+ "scr_metric_threshold_20": 0.2521246186273472,
137
+ "scr_dir2_threshold_20": 0.2521246186273472,
138
+ "scr_dir1_threshold_50": 0.15151504204837732,
139
+ "scr_metric_threshold_50": 0.3059489659269105,
140
+ "scr_dir2_threshold_50": 0.3059489659269105,
141
+ "scr_dir1_threshold_100": -0.3232324509435598,
142
+ "scr_metric_threshold_100": 0.3512748387170851,
143
+ "scr_dir2_threshold_100": 0.3512748387170851,
144
+ "scr_dir1_threshold_500": -0.5050507422285159,
145
+ "scr_metric_threshold_500": 0.21813038288640824,
146
+ "scr_dir2_threshold_500": 0.21813038288640824
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.3225804590905289,
151
+ "scr_metric_threshold_2": 0.025252499406211347,
152
+ "scr_dir2_threshold_2": 0.025252499406211347,
153
+ "scr_dir1_threshold_5": 0.37096695113474787,
154
+ "scr_metric_threshold_5": 0.03030308959751515,
155
+ "scr_dir2_threshold_5": 0.03030308959751515,
156
+ "scr_dir1_threshold_10": 0.3870967431817548,
157
+ "scr_metric_threshold_10": 0.055555589003726494,
158
+ "scr_dir2_threshold_10": 0.055555589003726494,
159
+ "scr_dir1_threshold_20": 0.24193534431789668,
160
+ "scr_metric_threshold_20": 0.08333330824720513,
161
+ "scr_dir2_threshold_20": 0.08333330824720513,
162
+ "scr_dir1_threshold_50": 0.16129022954526445,
163
+ "scr_metric_threshold_50": 0.11616161768198757,
164
+ "scr_dir2_threshold_50": 0.11616161768198757,
165
+ "scr_dir1_threshold_100": 0.20967672158948342,
166
+ "scr_metric_threshold_100": 0.1590909569826084,
167
+ "scr_dir2_threshold_100": 0.1590909569826084,
168
+ "scr_dir1_threshold_500": -0.22580747500209097,
169
+ "scr_metric_threshold_500": 0.10606058781614919,
170
+ "scr_dir2_threshold_500": 0.10606058781614919
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.1707318964255635,
175
+ "scr_metric_threshold_2": 0.10850441266694984,
176
+ "scr_dir2_threshold_2": 0.10850441266694984,
177
+ "scr_dir1_threshold_5": 0.21138225565250596,
178
+ "scr_metric_threshold_5": 0.14369492905997697,
179
+ "scr_dir2_threshold_5": 0.14369492905997697,
180
+ "scr_dir1_threshold_10": 0.11382110275354018,
181
+ "scr_metric_threshold_10": 0.19354836454274843,
182
+ "scr_dir2_threshold_10": 0.19354836454274843,
183
+ "scr_dir1_threshold_20": 0.06504076859931053,
184
+ "scr_metric_threshold_20": 0.2375365974308822,
185
+ "scr_dir2_threshold_20": 0.2375365974308822,
186
+ "scr_dir1_threshold_50": -0.04878033415422965,
187
+ "scr_metric_threshold_50": 0.2932550607145916,
188
+ "scr_dir2_threshold_50": 0.2932550607145916,
189
+ "scr_dir1_threshold_100": -0.0975606683084593,
190
+ "scr_metric_threshold_100": 0.35777124049340764,
191
+ "scr_dir2_threshold_100": 0.35777124049340764,
192
+ "scr_dir1_threshold_500": -0.032520384299655265,
193
+ "scr_metric_threshold_500": 0.17302041765206616,
194
+ "scr_dir2_threshold_500": 0.17302041765206616
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.016393554752069144,
199
+ "scr_metric_threshold_2": 0.09375017462295412,
200
+ "scr_dir2_threshold_2": 0.09375017462295412,
201
+ "scr_dir1_threshold_5": 0.021857964433295084,
202
+ "scr_metric_threshold_5": 0.16796888824317202,
203
+ "scr_dir2_threshold_5": 0.16796888824317202,
204
+ "scr_dir1_threshold_10": 0.04371592886659017,
205
+ "scr_metric_threshold_10": 0.2695312281721307,
206
+ "scr_dir2_threshold_10": 0.2695312281721307,
207
+ "scr_dir1_threshold_20": 0.0,
208
+ "scr_metric_threshold_20": 0.3554687718278693,
209
+ "scr_dir2_threshold_20": 0.3554687718278693,
210
+ "scr_dir1_threshold_50": -0.00546440968122594,
211
+ "scr_metric_threshold_50": 0.46484374272404355,
212
+ "scr_dir2_threshold_50": 0.46484374272404355,
213
+ "scr_dir1_threshold_100": 0.00546440968122594,
214
+ "scr_metric_threshold_100": 0.4609375436557385,
215
+ "scr_dir2_threshold_100": 0.4609375436557385,
216
+ "scr_dir1_threshold_500": 0.10382508677685816,
217
+ "scr_metric_threshold_500": 0.5312500582076514,
218
+ "scr_dir2_threshold_500": 0.5312500582076514
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.0410254200064373,
223
+ "scr_metric_threshold_2": 0.060483807005948444,
224
+ "scr_dir2_threshold_2": 0.060483807005948444,
225
+ "scr_dir1_threshold_5": 0.07179486709233554,
226
+ "scr_metric_threshold_5": 0.0927419335456348,
227
+ "scr_dir2_threshold_5": 0.0927419335456348,
228
+ "scr_dir1_threshold_10": 0.11282028709877284,
229
+ "scr_metric_threshold_10": 0.1491935348194436,
230
+ "scr_dir2_threshold_10": 0.1491935348194436,
231
+ "scr_dir1_threshold_20": 0.158974152062764,
232
+ "scr_metric_threshold_20": 0.1733872498948507,
233
+ "scr_dir2_threshold_20": 0.1733872498948507,
234
+ "scr_dir1_threshold_50": 0.12820501064172196,
235
+ "scr_metric_threshold_50": 0.23790326263293876,
236
+ "scr_dir2_threshold_50": 0.23790326263293876,
237
+ "scr_dir1_threshold_100": 0.10769214780607521,
238
+ "scr_metric_threshold_100": 0.25403232590278196,
239
+ "scr_dir2_threshold_100": 0.25403232590278196,
240
+ "scr_dir1_threshold_500": 0.10769214780607521,
241
+ "scr_metric_threshold_500": 0.3629033227182599,
242
+ "scr_dir2_threshold_500": 0.3629033227182599
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.03603610860073534,
247
+ "scr_metric_threshold_2": 0.16517854648243513,
248
+ "scr_dir2_threshold_2": 0.16517854648243513,
249
+ "scr_dir1_threshold_5": 0.06756750225933819,
250
+ "scr_metric_threshold_5": 0.22321430234504325,
251
+ "scr_dir2_threshold_5": 0.22321430234504325,
252
+ "scr_dir1_threshold_10": 0.06306305580659313,
253
+ "scr_metric_threshold_10": 0.24553570597033553,
254
+ "scr_dir2_threshold_10": 0.24553570597033553,
255
+ "scr_dir1_threshold_20": 0.1216216651604412,
256
+ "scr_metric_threshold_20": 0.2857142857142857,
257
+ "scr_dir2_threshold_20": 0.2857142857142857,
258
+ "scr_dir1_threshold_50": 0.21171166817289214,
259
+ "scr_metric_threshold_50": 0.3348213204715046,
260
+ "scr_dir2_threshold_50": 0.3348213204715046,
261
+ "scr_dir1_threshold_100": 0.13063055806593132,
262
+ "scr_metric_threshold_100": 0.37946412772208915,
263
+ "scr_dir2_threshold_100": 0.37946412772208915,
264
+ "scr_dir1_threshold_500": 0.13063055806593132,
265
+ "scr_metric_threshold_500": 0.415178479959405,
266
+ "scr_dir2_threshold_500": 0.415178479959405
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.025750992813676425,
271
+ "scr_metric_threshold_2": 0.025750992813676425,
272
+ "scr_dir2_threshold_2": 0.07619033292310208,
273
+ "scr_dir1_threshold_5": 0.03862661712745569,
274
+ "scr_metric_threshold_5": 0.03862661712745569,
275
+ "scr_dir2_threshold_5": 0.11428564130044823,
276
+ "scr_dir1_threshold_10": 0.047210196127387125,
277
+ "scr_metric_threshold_10": 0.047210196127387125,
278
+ "scr_dir2_threshold_10": 0.14285698066766273,
279
+ "scr_dir1_threshold_20": 0.07296144475494565,
280
+ "scr_metric_threshold_20": 0.07296144475494565,
281
+ "scr_dir2_threshold_20": 0.17619044645573817,
282
+ "scr_dir1_threshold_50": 0.1244634303822985,
283
+ "scr_metric_threshold_50": 0.1244634303822985,
284
+ "scr_dir2_threshold_50": 0.17142860386646747,
285
+ "scr_dir1_threshold_100": 0.030043038127524242,
286
+ "scr_metric_threshold_100": 0.030043038127524242,
287
+ "scr_dir2_threshold_100": 0.20476178582295265,
288
+ "scr_dir1_threshold_500": -0.03433482762748996,
289
+ "scr_metric_threshold_500": -0.03433482762748996,
290
+ "scr_dir2_threshold_500": 0.19523810064441124
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_4",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 16384,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "standard_april_update",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "abd03969-7249-47d7-a3de-3a1a513a3034",
73
+ "datetime_epoch_millis": 1740082666829,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.10241346869022357,
77
+ "scr_metric_threshold_2": 0.05750254466386166,
78
+ "scr_dir2_threshold_2": 0.06380746217753987,
79
+ "scr_dir1_threshold_5": 0.12765051570703775,
80
+ "scr_metric_threshold_5": 0.09670225220919013,
81
+ "scr_dir2_threshold_5": 0.10097875977068997,
82
+ "scr_dir1_threshold_10": 0.13228307247039922,
83
+ "scr_metric_threshold_10": 0.1148942178058397,
84
+ "scr_dir2_threshold_10": 0.12226442573690877,
85
+ "scr_dir1_threshold_20": 0.12458719597248878,
86
+ "scr_metric_threshold_20": 0.1364630026577178,
87
+ "scr_dir2_threshold_20": 0.14746342012480057,
88
+ "scr_dir1_threshold_50": 0.03466888039020051,
89
+ "scr_metric_threshold_50": 0.18187353693424071,
90
+ "scr_dir2_threshold_50": 0.19960548928777175,
91
+ "scr_dir1_threshold_100": 0.01792347700788019,
92
+ "scr_metric_threshold_100": 0.21472853035616185,
93
+ "scr_dir2_threshold_100": 0.22650045155760637,
94
+ "scr_dir1_threshold_500": -0.06287465873674672,
95
+ "scr_metric_threshold_500": 0.18135098460217056,
96
+ "scr_dir2_threshold_500": 0.2096874003593654
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.1746029193047256,
103
+ "scr_metric_threshold_2": 0.009828028179079856,
104
+ "scr_dir2_threshold_2": 0.009828028179079856,
105
+ "scr_dir1_threshold_5": 0.23809514798990317,
106
+ "scr_metric_threshold_5": 0.04422605358149169,
107
+ "scr_dir2_threshold_5": 0.04422605358149169,
108
+ "scr_dir1_threshold_10": 0.25396796863469334,
109
+ "scr_metric_threshold_10": 0.04668317046281315,
110
+ "scr_dir2_threshold_10": 0.04668317046281315,
111
+ "scr_dir1_threshold_20": 0.222222327345113,
112
+ "scr_metric_threshold_20": 0.04914014089539928,
113
+ "scr_dir2_threshold_20": 0.04914014089539928,
114
+ "scr_dir1_threshold_50": 0.12698351126433824,
115
+ "scr_metric_threshold_50": 0.0638821099396514,
116
+ "scr_dir2_threshold_50": 0.0638821099396514,
117
+ "scr_dir1_threshold_100": 0.11111069061954806,
118
+ "scr_metric_threshold_100": 0.05896816907447914,
119
+ "scr_dir2_threshold_100": 0.05896816907447914,
120
+ "scr_dir1_threshold_500": 0.031745641289580344,
121
+ "scr_metric_threshold_500": 0.08353816629781112,
122
+ "scr_dir2_threshold_500": 0.08353816629781112
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.15151504204837732,
127
+ "scr_metric_threshold_2": 0.12464581246959615,
128
+ "scr_dir2_threshold_2": 0.12464581246959615,
129
+ "scr_dir1_threshold_5": 0.20202005606450307,
130
+ "scr_metric_threshold_5": 0.1388102743552947,
131
+ "scr_dir2_threshold_5": 0.1388102743552947,
132
+ "scr_dir1_threshold_10": 0.1818176892176979,
133
+ "scr_metric_threshold_10": 0.15580739222576417,
134
+ "scr_dir2_threshold_10": 0.15580739222576417,
135
+ "scr_dir1_threshold_20": 0.17171680682792437,
136
+ "scr_metric_threshold_20": 0.18413597829377729,
137
+ "scr_dir2_threshold_20": 0.18413597829377729,
138
+ "scr_dir1_threshold_50": -0.25252567214788696,
139
+ "scr_metric_threshold_50": 0.26345608682489086,
140
+ "scr_dir2_threshold_50": 0.26345608682489086,
141
+ "scr_dir1_threshold_100": -0.3232324509435598,
142
+ "scr_metric_threshold_100": 0.2974504914175218,
143
+ "scr_dir2_threshold_100": 0.2974504914175218,
144
+ "scr_dir1_threshold_500": -0.343434817790365,
145
+ "scr_metric_threshold_500": 0.2294616822322599,
146
+ "scr_dir2_threshold_500": 0.2294616822322599
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.22580651363649037,
151
+ "scr_metric_threshold_2": 0.012626249703105673,
152
+ "scr_dir2_threshold_2": 0.012626249703105673,
153
+ "scr_dir1_threshold_5": 0.258064174999303,
154
+ "scr_metric_threshold_5": 0.03282830943478244,
155
+ "scr_dir2_threshold_5": 0.03282830943478244,
156
+ "scr_dir1_threshold_10": 0.3064516284091226,
157
+ "scr_metric_threshold_10": 0.042929339300620824,
158
+ "scr_dir2_threshold_10": 0.042929339300620824,
159
+ "scr_dir1_threshold_20": 0.29032183636211567,
160
+ "scr_metric_threshold_20": 0.06313139903229759,
161
+ "scr_dir2_threshold_20": 0.06313139903229759,
162
+ "scr_dir1_threshold_50": 0.17741906022667078,
163
+ "scr_metric_threshold_50": 0.09848492830434731,
164
+ "scr_dir2_threshold_50": 0.09848492830434731,
165
+ "scr_dir1_threshold_100": 0.14516043749825752,
166
+ "scr_metric_threshold_100": 0.13888889725093162,
167
+ "scr_dir2_threshold_100": 0.13888889725093162,
168
+ "scr_dir1_threshold_500": -0.27419396704630994,
169
+ "scr_metric_threshold_500": 0.11868683751925485,
170
+ "scr_dir2_threshold_500": 0.11868683751925485
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.16260192149827632,
175
+ "scr_metric_threshold_2": 0.070381207579754,
176
+ "scr_dir2_threshold_2": 0.070381207579754,
177
+ "scr_dir1_threshold_5": 0.1707318964255635,
178
+ "scr_metric_threshold_5": 0.12023446826882572,
179
+ "scr_dir2_threshold_5": 0.12023446826882572,
180
+ "scr_dir1_threshold_10": 0.10569112782625298,
181
+ "scr_metric_threshold_10": 0.14956013165461465,
182
+ "scr_dir2_threshold_10": 0.14956013165461465,
183
+ "scr_dir1_threshold_20": 0.0894311779716786,
184
+ "scr_metric_threshold_20": 0.16129018725649052,
185
+ "scr_dir2_threshold_20": 0.16129018725649052,
186
+ "scr_dir1_threshold_50": 0.040650359226942455,
187
+ "scr_metric_threshold_50": 0.23167156962994426,
188
+ "scr_dir2_threshold_50": 0.23167156962994426,
189
+ "scr_dir1_threshold_100": -0.13821102753540176,
190
+ "scr_metric_threshold_100": 0.26099705822203345,
191
+ "scr_dir2_threshold_100": 0.26099705822203345,
192
+ "scr_dir1_threshold_500": -0.19512182120742508,
193
+ "scr_metric_threshold_500": 0.13196469866440133,
194
+ "scr_dir2_threshold_500": 0.13196469866440133
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.021857964433295084,
199
+ "scr_metric_threshold_2": 0.04296888824317201,
200
+ "scr_dir2_threshold_2": 0.04296888824317201,
201
+ "scr_dir1_threshold_5": 0.021857964433295084,
202
+ "scr_metric_threshold_5": 0.10937497089617432,
203
+ "scr_dir2_threshold_5": 0.10937497089617432,
204
+ "scr_dir1_threshold_10": 0.03278678379574697,
205
+ "scr_metric_threshold_10": 0.14453122817213074,
206
+ "scr_dir2_threshold_10": 0.14453122817213074,
207
+ "scr_dir1_threshold_20": 0.0,
208
+ "scr_metric_threshold_20": 0.20312514551912844,
209
+ "scr_dir2_threshold_20": 0.20312514551912844,
210
+ "scr_dir1_threshold_50": 0.016393554752069144,
211
+ "scr_metric_threshold_50": 0.292968888243172,
212
+ "scr_dir2_threshold_50": 0.292968888243172,
213
+ "scr_dir1_threshold_100": 0.03825119347697291,
214
+ "scr_metric_threshold_100": 0.3554687718278693,
215
+ "scr_dir2_threshold_100": 0.3554687718278693,
216
+ "scr_dir1_threshold_500": 0.06010915791026799,
217
+ "scr_metric_threshold_500": 0.3320313445874335,
218
+ "scr_dir2_threshold_500": 0.3320313445874335
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.025641002128344394,
223
+ "scr_metric_threshold_2": 0.04435498407738989,
224
+ "scr_dir2_threshold_2": 0.04435498407738989,
225
+ "scr_dir1_threshold_5": 0.05641014354938642,
226
+ "scr_metric_threshold_5": 0.08467752208135552,
227
+ "scr_dir2_threshold_5": 0.08467752208135552,
228
+ "scr_dir1_threshold_10": 0.07692300638503319,
229
+ "scr_metric_threshold_10": 0.1008065853511987,
230
+ "scr_dir2_threshold_10": 0.1008065853511987,
231
+ "scr_dir1_threshold_20": 0.08205114567773082,
232
+ "scr_metric_threshold_20": 0.12096785435318151,
233
+ "scr_dir2_threshold_20": 0.12096785435318151,
234
+ "scr_dir1_threshold_50": 0.10256400851337757,
235
+ "scr_metric_threshold_50": 0.15322574055158325,
236
+ "scr_dir2_threshold_50": 0.15322574055158325,
237
+ "scr_dir1_threshold_100": 0.13846128922711723,
238
+ "scr_metric_threshold_100": 0.18951607282340924,
239
+ "scr_dir2_threshold_100": 0.18951607282340924,
240
+ "scr_dir1_threshold_500": 0.1179487320563267,
241
+ "scr_metric_threshold_500": 0.2419354683650784,
242
+ "scr_dir2_threshold_500": 0.2419354683650784
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.03153139365860286,
247
+ "scr_metric_threshold_2": 0.12946419424511926,
248
+ "scr_dir2_threshold_2": 0.12946419424511926,
249
+ "scr_dir1_threshold_5": 0.022522500753112733,
250
+ "scr_metric_threshold_5": 0.19196417761436174,
251
+ "scr_dir2_threshold_5": 0.19196417761436174,
252
+ "scr_dir1_threshold_10": 0.0405405550534804,
253
+ "scr_metric_threshold_10": 0.21875007483840891,
254
+ "scr_dir2_threshold_10": 0.21875007483840891,
255
+ "scr_dir1_threshold_20": 0.07657666365421574,
256
+ "scr_metric_threshold_20": 0.24553570597033553,
257
+ "scr_dir2_threshold_20": 0.24553570597033553,
258
+ "scr_dir1_threshold_50": 0.03153139365860286,
259
+ "scr_metric_threshold_50": 0.31696414435284664,
260
+ "scr_dir2_threshold_50": 0.31696414435284664,
261
+ "scr_dir1_threshold_100": 0.09459444946519599,
262
+ "scr_metric_threshold_100": 0.3392855479781389,
263
+ "scr_dir2_threshold_100": 0.3392855479781389,
264
+ "scr_dir1_threshold_500": 0.11711721870769615,
265
+ "scr_metric_threshold_500": 0.33035709296487026,
266
+ "scr_dir2_threshold_500": 0.33035709296487026
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.025750992813676425,
271
+ "scr_metric_threshold_2": 0.025750992813676425,
272
+ "scr_dir2_threshold_2": 0.07619033292310208,
273
+ "scr_dir1_threshold_5": 0.05150224144123495,
274
+ "scr_metric_threshold_5": 0.05150224144123495,
275
+ "scr_dir2_threshold_5": 0.08571430193323373,
276
+ "scr_dir1_threshold_10": 0.060085820441166386,
277
+ "scr_metric_threshold_10": 0.060085820441166386,
278
+ "scr_dir2_threshold_10": 0.11904748388971893,
279
+ "scr_dir1_threshold_20": 0.0643776099411321,
280
+ "scr_metric_threshold_20": 0.0643776099411321,
281
+ "scr_dir2_threshold_20": 0.1523809496777944,
282
+ "scr_dir1_threshold_50": 0.03433482762748996,
283
+ "scr_metric_threshold_50": 0.03433482762748996,
284
+ "scr_dir2_threshold_50": 0.17619044645573817,
285
+ "scr_dir1_threshold_100": 0.07725323425491137,
286
+ "scr_metric_threshold_100": 0.07725323425491137,
287
+ "scr_dir2_threshold_100": 0.17142860386646747,
288
+ "scr_dir1_threshold_500": -0.01716741381374498,
289
+ "scr_metric_threshold_500": -0.01716741381374498,
290
+ "scr_dir2_threshold_500": 0.20952391224381361
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_5",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 16384,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "standard_april_update",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "36f72b33-df41-4d38-bfd9-f854ca040630",
73
+ "datetime_epoch_millis": 1740118958907,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.24979050898109004,
77
+ "scr_metric_threshold_2": 0.055854359753664165,
78
+ "scr_dir2_threshold_2": 0.0516723626149536,
79
+ "scr_dir1_threshold_5": 0.27162040359297007,
80
+ "scr_metric_threshold_5": 0.09149941020572316,
81
+ "scr_dir2_threshold_5": 0.08832394684824034,
82
+ "scr_dir1_threshold_10": 0.28952263621706936,
83
+ "scr_metric_threshold_10": 0.13831941343932047,
84
+ "scr_dir2_threshold_10": 0.13359325367877836,
85
+ "scr_dir1_threshold_20": 0.30289985135474484,
86
+ "scr_metric_threshold_20": 0.1961941945802515,
87
+ "scr_dir2_threshold_20": 0.18955717011659254,
88
+ "scr_dir1_threshold_50": 0.2990525207353826,
89
+ "scr_metric_threshold_50": 0.26139090865747827,
90
+ "scr_dir2_threshold_50": 0.26159527889720674,
91
+ "scr_dir1_threshold_100": 0.3108402066933552,
92
+ "scr_metric_threshold_100": 0.2969487983949683,
93
+ "scr_dir2_threshold_100": 0.30143736513053243,
94
+ "scr_dir1_threshold_500": 0.27165005049679475,
95
+ "scr_metric_threshold_500": 0.33243318043883835,
96
+ "scr_dir2_threshold_500": 0.32957960059341024
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.444444654690226,
103
+ "scr_metric_threshold_2": 0.0,
104
+ "scr_dir2_threshold_2": 0.0,
105
+ "scr_dir1_threshold_5": 0.46031747533501616,
106
+ "scr_metric_threshold_5": 0.007371057746493725,
107
+ "scr_dir2_threshold_5": 0.007371057746493725,
108
+ "scr_dir1_threshold_10": 0.444444654690226,
109
+ "scr_metric_threshold_10": 0.01474211549298745,
110
+ "scr_dir2_threshold_10": 0.01474211549298745,
111
+ "scr_dir1_threshold_20": 0.4920631166245965,
112
+ "scr_metric_threshold_20": 0.024570143672067307,
113
+ "scr_dir2_threshold_20": 0.024570143672067307,
114
+ "scr_dir1_threshold_50": 0.46031747533501616,
115
+ "scr_metric_threshold_50": 0.036855142283733294,
116
+ "scr_dir2_threshold_50": 0.036855142283733294,
117
+ "scr_dir1_threshold_100": 0.42857088793941894,
118
+ "scr_metric_threshold_100": 0.061425139507065275,
119
+ "scr_dir2_threshold_100": 0.061425139507065275,
120
+ "scr_dir1_threshold_500": 0.23809514798990317,
121
+ "scr_metric_threshold_500": 0.08599513673039726,
122
+ "scr_dir2_threshold_500": 0.08599513673039726
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.2828283193172076,
127
+ "scr_metric_threshold_2": 0.0679886403335699,
128
+ "scr_dir2_threshold_2": 0.0679886403335699,
129
+ "scr_dir1_threshold_5": 0.30303008409675464,
130
+ "scr_metric_threshold_5": 0.13031163099421397,
131
+ "scr_dir2_threshold_5": 0.13031163099421397,
132
+ "scr_dir1_threshold_10": 0.2626259524704024,
133
+ "scr_metric_threshold_10": 0.15297456738930126,
134
+ "scr_dir2_threshold_10": 0.15297456738930126,
135
+ "scr_dir1_threshold_20": 0.24242418769085533,
136
+ "scr_metric_threshold_20": 0.2096317395253275,
137
+ "scr_dir2_threshold_20": 0.2096317395253275,
138
+ "scr_dir1_threshold_50": 0.2323233053010818,
139
+ "scr_metric_threshold_50": 0.27478755502243446,
140
+ "scr_dir2_threshold_50": 0.27478755502243446,
141
+ "scr_dir1_threshold_100": 0.2121209384542766,
142
+ "scr_metric_threshold_100": 0.3484420138806222,
143
+ "scr_dir2_threshold_100": 0.3484420138806222,
144
+ "scr_dir1_threshold_500": 0.11111091042202506,
145
+ "scr_metric_threshold_500": 0.2521246186273472,
146
+ "scr_dir2_threshold_500": 0.2521246186273472
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.45161302727298075,
151
+ "scr_metric_threshold_2": 0.020202059731676766,
152
+ "scr_dir2_threshold_2": 0.020202059731676766,
153
+ "scr_dir1_threshold_5": 0.48387068863579336,
154
+ "scr_metric_threshold_5": 0.03787889962608624,
155
+ "scr_dir2_threshold_5": 0.03787889962608624,
156
+ "scr_dir1_threshold_10": 0.532258142045613,
157
+ "scr_metric_threshold_10": 0.055555589003726494,
158
+ "scr_dir2_threshold_10": 0.055555589003726494,
159
+ "scr_dir1_threshold_20": 0.516128349998606,
160
+ "scr_metric_threshold_20": 0.08080808840993783,
161
+ "scr_dir2_threshold_20": 0.08080808840993783,
162
+ "scr_dir1_threshold_50": 0.45161302727298075,
163
+ "scr_metric_threshold_50": 0.14646470727950273,
164
+ "scr_dir2_threshold_50": 0.14646470727950273,
165
+ "scr_dir1_threshold_100": 0.532258142045613,
166
+ "scr_metric_threshold_100": 0.17424242652298136,
167
+ "scr_dir2_threshold_100": 0.17424242652298136,
168
+ "scr_dir1_threshold_500": 0.37096695113474787,
169
+ "scr_metric_threshold_500": 0.10606058781614919,
170
+ "scr_dir2_threshold_500": 0.10606058781614919
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.3089429239609653,
175
+ "scr_metric_threshold_2": 0.06451600498511631,
176
+ "scr_dir2_threshold_2": 0.06451600498511631,
177
+ "scr_dir1_threshold_5": 0.32520335840604614,
178
+ "scr_metric_threshold_5": 0.10850441266694984,
179
+ "scr_dir2_threshold_5": 0.10850441266694984,
180
+ "scr_dir1_threshold_10": 0.3739836925602758,
181
+ "scr_metric_threshold_10": 0.15835767335602155,
182
+ "scr_dir2_threshold_10": 0.15835767335602155,
183
+ "scr_dir1_threshold_20": 0.32520335840604614,
184
+ "scr_metric_threshold_20": 0.19354836454274843,
185
+ "scr_dir2_threshold_20": 0.19354836454274843,
186
+ "scr_dir1_threshold_50": 0.1788618713528507,
187
+ "scr_metric_threshold_50": 0.2903225468141226,
188
+ "scr_dir2_threshold_50": 0.2903225468141226,
189
+ "scr_dir1_threshold_100": 0.13821151212590824,
190
+ "scr_metric_threshold_100": 0.3548387265929387,
191
+ "scr_dir2_threshold_100": 0.3548387265929387,
192
+ "scr_dir1_threshold_500": -0.040650359226942455,
193
+ "scr_metric_threshold_500": 0.1671553898511282,
194
+ "scr_dir2_threshold_500": 0.1671553898511282
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.04371592886659017,
199
+ "scr_metric_threshold_2": 0.0742187136202179,
200
+ "scr_dir2_threshold_2": 0.0742187136202179,
201
+ "scr_dir1_threshold_5": 0.04918033854781611,
202
+ "scr_metric_threshold_5": 0.1171876018633899,
203
+ "scr_dir2_threshold_5": 0.1171876018633899,
204
+ "scr_dir1_threshold_10": 0.05464474822904205,
205
+ "scr_metric_threshold_10": 0.21875017462295412,
206
+ "scr_dir2_threshold_10": 0.21875017462295412,
207
+ "scr_dir1_threshold_20": 0.06010915791026799,
208
+ "scr_metric_threshold_20": 0.3828126309672156,
209
+ "scr_dir2_threshold_20": 0.3828126309672156,
210
+ "scr_dir1_threshold_50": 0.07103830298111119,
211
+ "scr_metric_threshold_50": 0.542968888243172,
212
+ "scr_dir2_threshold_50": 0.542968888243172,
213
+ "scr_dir1_threshold_100": 0.09289626741440628,
214
+ "scr_metric_threshold_100": 0.628906199068305,
215
+ "scr_dir2_threshold_100": 0.628906199068305,
216
+ "scr_dir1_threshold_500": 0.08196712234356307,
217
+ "scr_metric_threshold_500": 0.7382811699644793,
218
+ "scr_dir2_threshold_500": 0.7382811699644793
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.1692307363130155,
223
+ "scr_metric_threshold_2": 0.04032253800396563,
224
+ "scr_dir2_threshold_2": 0.04032253800396563,
225
+ "scr_dir1_threshold_5": 0.2102561563194528,
226
+ "scr_metric_threshold_5": 0.036290332271825994,
227
+ "scr_dir2_threshold_5": 0.036290332271825994,
228
+ "scr_dir1_threshold_10": 0.2358974641126534,
229
+ "scr_metric_threshold_10": 0.1008065853511987,
230
+ "scr_dir2_threshold_10": 0.1008065853511987,
231
+ "scr_dir1_threshold_20": 0.28205102341178834,
232
+ "scr_metric_threshold_20": 0.1491935348194436,
233
+ "scr_dir2_threshold_20": 0.1491935348194436,
234
+ "scr_dir1_threshold_50": 0.35897433546167773,
235
+ "scr_metric_threshold_50": 0.1733872498948507,
236
+ "scr_dir2_threshold_50": 0.1733872498948507,
237
+ "scr_dir1_threshold_100": 0.38974347688271976,
238
+ "scr_metric_threshold_100": 0.26209673736706124,
239
+ "scr_dir2_threshold_100": 0.26209673736706124,
240
+ "scr_dir1_threshold_500": 0.512820348231744,
241
+ "scr_metric_threshold_500": 0.4072580664543652,
242
+ "scr_dir2_threshold_500": 0.4072580664543652
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.21171166817289214,
247
+ "scr_metric_threshold_2": 0.09375010809992398,
248
+ "scr_dir2_threshold_2": 0.09375010809992398,
249
+ "scr_dir1_threshold_5": 0.22522527602051473,
250
+ "scr_metric_threshold_5": 0.17857149509445872,
251
+ "scr_dir2_threshold_5": 0.17857149509445872,
252
+ "scr_dir1_threshold_10": 0.27927917043223033,
253
+ "scr_metric_threshold_10": 0.27232133710226214,
254
+ "scr_dir2_threshold_10": 0.27232133710226214,
255
+ "scr_dir1_threshold_20": 0.3378377797860784,
256
+ "scr_metric_threshold_20": 0.36160721769555176,
257
+ "scr_dir2_threshold_20": 0.36160721769555176,
258
+ "scr_dir1_threshold_50": 0.45045055204102946,
259
+ "scr_metric_threshold_50": 0.43749988358469727,
260
+ "scr_dir2_threshold_50": 0.43749988358469727,
261
+ "scr_dir1_threshold_100": 0.49549555354725494,
262
+ "scr_metric_threshold_100": 0.3482142690835282,
263
+ "scr_dir2_threshold_100": 0.3482142690835282,
264
+ "scr_dir1_threshold_500": 0.5855855565597059,
265
+ "scr_metric_threshold_500": 0.5892857475472294,
266
+ "scr_dir2_threshold_500": 0.5892857475472294
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.08583681325484281,
271
+ "scr_metric_threshold_2": 0.08583681325484281,
272
+ "scr_dir2_threshold_2": 0.05238083614515829,
273
+ "scr_dir1_threshold_5": 0.11587985138236706,
274
+ "scr_metric_threshold_5": 0.11587985138236706,
275
+ "scr_dir2_threshold_5": 0.09047614452250444,
276
+ "scr_dir1_threshold_10": 0.13304726519611204,
277
+ "scr_metric_threshold_10": 0.13304726519611204,
278
+ "scr_dir2_threshold_10": 0.09523798711177515,
279
+ "scr_dir1_threshold_20": 0.1673818370097199,
280
+ "scr_metric_threshold_20": 0.1673818370097199,
281
+ "scr_dir2_threshold_20": 0.11428564130044823,
282
+ "scr_dir1_threshold_50": 0.1888412961373127,
283
+ "scr_metric_threshold_50": 0.1888412961373127,
284
+ "scr_dir2_threshold_50": 0.19047625805514054,
285
+ "scr_dir1_threshold_100": 0.19742487513724416,
286
+ "scr_metric_threshold_100": 0.19742487513724416,
287
+ "scr_dir2_threshold_100": 0.2333334090217574,
288
+ "scr_dir1_threshold_500": 0.3133047265196112,
289
+ "scr_metric_threshold_500": 0.3133047265196112,
290
+ "scr_dir2_threshold_500": 0.2904760877561864
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_0",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 65536,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "standard_april_update",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "5a0073cc-a35a-4450-84b3-aa8d13197aa3",
73
+ "datetime_epoch_millis": 1740117859786,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.191448233102913,
77
+ "scr_metric_threshold_2": 0.0472308502002999,
78
+ "scr_dir2_threshold_2": 0.04966290665244659,
79
+ "scr_dir1_threshold_5": 0.2255345910565259,
80
+ "scr_metric_threshold_5": 0.07965041205345416,
81
+ "scr_dir2_threshold_5": 0.08452214993861284,
82
+ "scr_dir1_threshold_10": 0.23798183791765248,
83
+ "scr_metric_threshold_10": 0.11666396452576513,
84
+ "scr_dir2_threshold_10": 0.12510711983182563,
85
+ "scr_dir1_threshold_20": 0.25239838964999795,
86
+ "scr_metric_threshold_20": 0.1787346845260973,
87
+ "scr_dir2_threshold_20": 0.18592096559107646,
88
+ "scr_dir1_threshold_50": 0.24878684492088027,
89
+ "scr_metric_threshold_50": 0.26285298415524505,
90
+ "scr_dir2_threshold_50": 0.27229504826166795,
91
+ "scr_dir1_threshold_100": 0.26382247723884844,
92
+ "scr_metric_threshold_100": 0.3158182968041052,
93
+ "scr_dir2_threshold_100": 0.3274573553390736,
94
+ "scr_dir1_threshold_500": 0.23569674509699307,
95
+ "scr_metric_threshold_500": 0.34450320828286896,
96
+ "scr_dir2_threshold_500": 0.34439078548781776
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.3809524260050484,
103
+ "scr_metric_threshold_2": 0.009828028179079856,
104
+ "scr_dir2_threshold_2": 0.009828028179079856,
105
+ "scr_dir1_threshold_5": 0.444444654690226,
106
+ "scr_metric_threshold_5": 0.012284998611665989,
107
+ "scr_dir2_threshold_5": 0.012284998611665989,
108
+ "scr_dir1_threshold_10": 0.42857088793941894,
109
+ "scr_metric_threshold_10": 0.019656056358159712,
110
+ "scr_dir2_threshold_10": 0.019656056358159712,
111
+ "scr_dir1_threshold_20": 0.3968252466498386,
112
+ "scr_metric_threshold_20": 0.02948408453723957,
113
+ "scr_dir2_threshold_20": 0.02948408453723957,
114
+ "scr_dir1_threshold_50": 0.41269806729462877,
115
+ "scr_metric_threshold_50": 0.06633908037223754,
116
+ "scr_dir2_threshold_50": 0.06633908037223754,
117
+ "scr_dir1_threshold_100": 0.47619029597980633,
118
+ "scr_metric_threshold_100": 0.08108119586522498,
119
+ "scr_dir2_threshold_100": 0.08108119586522498,
120
+ "scr_dir1_threshold_500": 0.2857136099242737,
121
+ "scr_metric_threshold_500": 0.022113026790745845,
122
+ "scr_dir2_threshold_500": 0.022113026790745845
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.2929292017069811,
127
+ "scr_metric_threshold_2": 0.08498575820403938,
128
+ "scr_dir2_threshold_2": 0.08498575820403938,
129
+ "scr_dir1_threshold_5": 0.2929292017069811,
130
+ "scr_metric_threshold_5": 0.12747880615775103,
131
+ "scr_dir2_threshold_5": 0.12747880615775103,
132
+ "scr_dir1_threshold_10": 0.2626259524704024,
133
+ "scr_metric_threshold_10": 0.17280451009623365,
134
+ "scr_dir2_threshold_10": 0.17280451009623365,
135
+ "scr_dir1_threshold_20": 0.2727268348601759,
136
+ "scr_metric_threshold_20": 0.2096317395253275,
137
+ "scr_dir2_threshold_20": 0.2096317395253275,
138
+ "scr_dir1_threshold_50": 0.2626259524704024,
139
+ "scr_metric_threshold_50": 0.32861190232199783,
140
+ "scr_dir2_threshold_50": 0.32861190232199783,
141
+ "scr_dir1_threshold_100": 0.2323233053010818,
142
+ "scr_metric_threshold_100": 0.36543913175109166,
143
+ "scr_dir2_threshold_100": 0.36543913175109166,
144
+ "scr_dir1_threshold_500": 0.22222182084405012,
145
+ "scr_metric_threshold_500": 0.3201132589609171,
146
+ "scr_dir2_threshold_500": 0.3201132589609171
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.37096695113474787,
151
+ "scr_metric_threshold_2": 0.012626249703105673,
152
+ "scr_dir2_threshold_2": 0.012626249703105673,
153
+ "scr_dir1_threshold_5": 0.46774185795438705,
154
+ "scr_metric_threshold_5": 0.017676839894409477,
155
+ "scr_dir2_threshold_5": 0.017676839894409477,
156
+ "scr_dir1_threshold_10": 0.516128349998606,
157
+ "scr_metric_threshold_10": 0.04040411946335353,
158
+ "scr_dir2_threshold_10": 0.04040411946335353,
159
+ "scr_dir1_threshold_20": 0.48387068863579336,
160
+ "scr_metric_threshold_20": 0.08838389843850893,
161
+ "scr_dir2_threshold_20": 0.08838389843850893,
162
+ "scr_dir1_threshold_50": 0.3870967431817548,
163
+ "scr_metric_threshold_50": 0.11868683751925485,
164
+ "scr_dir2_threshold_50": 0.11868683751925485,
165
+ "scr_dir1_threshold_100": 0.37096695113474787,
166
+ "scr_metric_threshold_100": 0.16414139665714297,
167
+ "scr_dir2_threshold_100": 0.16414139665714297,
168
+ "scr_dir1_threshold_500": 0.17741906022667078,
169
+ "scr_metric_threshold_500": 0.14141411708819893,
170
+ "scr_dir2_threshold_500": 0.14141411708819893
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.2926829741063909,
175
+ "scr_metric_threshold_2": 0.06451600498511631,
176
+ "scr_dir2_threshold_2": 0.06451600498511631,
177
+ "scr_dir1_threshold_5": 0.3089429239609653,
178
+ "scr_metric_threshold_5": 0.10557172397278113,
179
+ "scr_dir2_threshold_5": 0.10557172397278113,
180
+ "scr_dir1_threshold_10": 0.31707338347875896,
181
+ "scr_metric_threshold_10": 0.15542515945555258,
182
+ "scr_dir2_threshold_10": 0.15542515945555258,
183
+ "scr_dir1_threshold_20": 0.3089429239609653,
184
+ "scr_metric_threshold_20": 0.211143622739262,
185
+ "scr_dir2_threshold_20": 0.211143622739262,
186
+ "scr_dir1_threshold_50": 0.24390263995216122,
187
+ "scr_metric_threshold_50": 0.281524830319016,
188
+ "scr_dir2_threshold_50": 0.281524830319016,
189
+ "scr_dir1_threshold_100": 0.10569112782625298,
190
+ "scr_metric_threshold_100": 0.3519062126924697,
191
+ "scr_dir2_threshold_100": 0.3519062126924697,
192
+ "scr_dir1_threshold_500": -0.11382110275354018,
193
+ "scr_metric_threshold_500": 0.17595310634623484,
194
+ "scr_dir2_threshold_500": 0.17595310634623484
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.03825119347697291,
199
+ "scr_metric_threshold_2": 0.0585939173469977,
200
+ "scr_dir2_threshold_2": 0.0585939173469977,
201
+ "scr_dir1_threshold_5": 0.04918033854781611,
202
+ "scr_metric_threshold_5": 0.1171876018633899,
203
+ "scr_dir2_threshold_5": 0.1171876018633899,
204
+ "scr_dir1_threshold_10": 0.06010915791026799,
205
+ "scr_metric_threshold_10": 0.18750011641530276,
206
+ "scr_dir2_threshold_10": 0.18750011641530276,
207
+ "scr_dir1_threshold_20": 0.06010915791026799,
208
+ "scr_metric_threshold_20": 0.27734385913934634,
209
+ "scr_dir2_threshold_20": 0.27734385913934634,
210
+ "scr_dir1_threshold_50": 0.01092881936245188,
211
+ "scr_metric_threshold_50": 0.5351562572759564,
212
+ "scr_dir2_threshold_50": 0.5351562572759564,
213
+ "scr_dir1_threshold_100": 0.03278678379574697,
214
+ "scr_metric_threshold_100": 0.6679686554125666,
215
+ "scr_dir2_threshold_100": 0.6679686554125666,
216
+ "scr_dir1_threshold_500": 0.09836067709563222,
217
+ "scr_metric_threshold_500": 0.75781239813661,
218
+ "scr_dir2_threshold_500": 0.75781239813661
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.0410254200064373,
223
+ "scr_metric_threshold_2": 0.024193715075407077,
224
+ "scr_dir2_threshold_2": 0.024193715075407077,
225
+ "scr_dir1_threshold_5": 0.07692300638503319,
226
+ "scr_metric_threshold_5": 0.04032253800396563,
227
+ "scr_dir2_threshold_5": 0.04032253800396563,
228
+ "scr_dir1_threshold_10": 0.12820501064172196,
229
+ "scr_metric_threshold_10": 0.060483807005948444,
230
+ "scr_dir2_threshold_10": 0.060483807005948444,
231
+ "scr_dir1_threshold_20": 0.19487173844135988,
232
+ "scr_metric_threshold_20": 0.15322574055158325,
233
+ "scr_dir2_threshold_20": 0.15322574055158325,
234
+ "scr_dir1_threshold_50": 0.2512818819907463,
235
+ "scr_metric_threshold_50": 0.20161293036111277,
236
+ "scr_dir2_threshold_50": 0.20161293036111277,
237
+ "scr_dir1_threshold_100": 0.3282048883757795,
238
+ "scr_metric_threshold_100": 0.24596767409721804,
239
+ "scr_dir2_threshold_100": 0.24596767409721804,
240
+ "scr_dir1_threshold_500": 0.338461472626031,
241
+ "scr_metric_threshold_500": 0.3951612089166617,
242
+ "scr_dir2_threshold_500": 0.3951612089166617
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.06756750225933819,
247
+ "scr_metric_threshold_2": 0.07589293198126605,
248
+ "scr_dir2_threshold_2": 0.07589293198126605,
249
+ "scr_dir1_threshold_5": 0.11261250376556366,
250
+ "scr_metric_threshold_5": 0.16517854648243513,
251
+ "scr_dir2_threshold_5": 0.16517854648243513,
252
+ "scr_dir1_threshold_10": 0.13963971946080886,
253
+ "scr_metric_threshold_10": 0.24553570597033553,
254
+ "scr_dir2_threshold_10": 0.24553570597033553,
255
+ "scr_dir1_threshold_20": 0.21171166817289214,
256
+ "scr_metric_threshold_20": 0.37053567270882043,
257
+ "scr_dir2_threshold_20": 0.37053567270882043,
258
+ "scr_dir1_threshold_50": 0.297297224732598,
259
+ "scr_metric_threshold_50": 0.4464286046900865,
260
+ "scr_dir2_threshold_50": 0.4464286046900865,
261
+ "scr_dir1_threshold_100": 0.4099099969875491,
262
+ "scr_metric_threshold_100": 0.4955356394473054,
263
+ "scr_dir2_threshold_100": 0.4955356394473054,
264
+ "scr_dir1_threshold_500": 0.5810811101069608,
265
+ "scr_metric_threshold_500": 0.647321237317717,
266
+ "scr_dir2_threshold_500": 0.647321237317717
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.047210196127387125,
271
+ "scr_metric_threshold_2": 0.047210196127387125,
272
+ "scr_dir2_threshold_2": 0.06666664774456064,
273
+ "scr_dir1_threshold_5": 0.05150224144123495,
274
+ "scr_metric_threshold_5": 0.05150224144123495,
275
+ "scr_dir2_threshold_5": 0.09047614452250444,
276
+ "scr_dir1_threshold_10": 0.05150224144123495,
277
+ "scr_metric_threshold_10": 0.05150224144123495,
278
+ "scr_dir2_threshold_10": 0.11904748388971893,
279
+ "scr_dir1_threshold_20": 0.09012885856869063,
280
+ "scr_metric_threshold_20": 0.09012885856869063,
281
+ "scr_dir2_threshold_20": 0.14761910708852366,
282
+ "scr_dir1_threshold_50": 0.1244634303822985,
283
+ "scr_metric_threshold_50": 0.1244634303822985,
284
+ "scr_dir2_threshold_50": 0.19999994323368195,
285
+ "scr_dir1_threshold_100": 0.15450646850982275,
286
+ "scr_metric_threshold_100": 0.15450646850982275,
287
+ "scr_dir2_threshold_100": 0.24761893678956953,
288
+ "scr_dir1_threshold_500": 0.29613731270586624,
289
+ "scr_metric_threshold_500": 0.29613731270586624,
290
+ "scr_dir2_threshold_500": 0.2952379303454571
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_1",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 65536,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "standard_april_update",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "f57eb63e-2a03-4e6b-aaff-e2b92698d5fe",
73
+ "datetime_epoch_millis": 1740119326137,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.20265948821993934,
77
+ "scr_metric_threshold_2": 0.048537309675205614,
78
+ "scr_dir2_threshold_2": 0.045545808662761506,
79
+ "scr_dir1_threshold_5": 0.2488046105832982,
80
+ "scr_metric_threshold_5": 0.0910869476336924,
81
+ "scr_dir2_threshold_5": 0.08505281045129286,
82
+ "scr_dir1_threshold_10": 0.28174835789483105,
83
+ "scr_metric_threshold_10": 0.13036522550176255,
84
+ "scr_dir2_threshold_10": 0.1281375712660796,
85
+ "scr_dir1_threshold_20": 0.2805756361241489,
86
+ "scr_metric_threshold_20": 0.18117000848692164,
87
+ "scr_dir2_threshold_20": 0.1785821184954971,
88
+ "scr_dir1_threshold_50": 0.29662834329464566,
89
+ "scr_metric_threshold_50": 0.25032082969999164,
90
+ "scr_dir2_threshold_50": 0.24444765201309387,
91
+ "scr_dir1_threshold_100": 0.2932320080351788,
92
+ "scr_metric_threshold_100": 0.31924960264000657,
93
+ "scr_dir2_threshold_100": 0.31152428490941975,
94
+ "scr_dir1_threshold_500": 0.21805877454178113,
95
+ "scr_metric_threshold_500": 0.38765785771241235,
96
+ "scr_dir2_threshold_500": 0.382364564457237
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.31746019731987085,
103
+ "scr_metric_threshold_2": 0.0,
104
+ "scr_dir2_threshold_2": 0.0,
105
+ "scr_dir1_threshold_5": 0.42857088793941894,
106
+ "scr_metric_threshold_5": 0.009828028179079856,
107
+ "scr_dir2_threshold_5": 0.009828028179079856,
108
+ "scr_dir1_threshold_10": 0.4920631166245965,
109
+ "scr_metric_threshold_10": 0.01474211549298745,
110
+ "scr_dir2_threshold_10": 0.01474211549298745,
111
+ "scr_dir1_threshold_20": 0.3968252466498386,
112
+ "scr_metric_threshold_20": 0.036855142283733294,
113
+ "scr_dir2_threshold_20": 0.036855142283733294,
114
+ "scr_dir1_threshold_50": 0.41269806729462877,
115
+ "scr_metric_threshold_50": 0.07616710855131739,
116
+ "scr_dir2_threshold_50": 0.07616710855131739,
117
+ "scr_dir1_threshold_100": 0.31746019731987085,
118
+ "scr_metric_threshold_100": 0.09090907759556952,
119
+ "scr_dir2_threshold_100": 0.09090907759556952,
120
+ "scr_dir1_threshold_500": 0.19047573994951578,
121
+ "scr_metric_threshold_500": 0.13513513117706122,
122
+ "scr_dir2_threshold_500": 0.13513513117706122
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.24242418769085533,
127
+ "scr_metric_threshold_2": 0.07365445885818771,
128
+ "scr_dir2_threshold_2": 0.07365445885818771,
129
+ "scr_dir1_threshold_5": 0.2828283193172076,
130
+ "scr_metric_threshold_5": 0.1359772806671398,
131
+ "scr_dir2_threshold_5": 0.1359772806671398,
132
+ "scr_dir1_threshold_10": 0.2727268348601759,
133
+ "scr_metric_threshold_10": 0.16147304189869,
134
+ "scr_dir2_threshold_10": 0.16147304189869,
135
+ "scr_dir1_threshold_20": 0.2626259524704024,
136
+ "scr_metric_threshold_20": 0.21813038288640824,
137
+ "scr_dir2_threshold_20": 0.21813038288640824,
138
+ "scr_dir1_threshold_50": 0.2626259524704024,
139
+ "scr_metric_threshold_50": 0.3002833162539847,
140
+ "scr_dir2_threshold_50": 0.3002833162539847,
141
+ "scr_dir1_threshold_100": 0.25252507008062886,
142
+ "scr_metric_threshold_100": 0.38526907445802405,
143
+ "scr_dir2_threshold_100": 0.38526907445802405,
144
+ "scr_dir1_threshold_500": 0.03030264716932058,
145
+ "scr_metric_threshold_500": 0.5240793488133187,
146
+ "scr_dir2_threshold_500": 0.5240793488133187
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.3064516284091226,
151
+ "scr_metric_threshold_2": 0.015151620057142186,
152
+ "scr_dir2_threshold_2": 0.015151620057142186,
153
+ "scr_dir1_threshold_5": 0.37096695113474787,
154
+ "scr_metric_threshold_5": 0.042929339300620824,
155
+ "scr_dir2_threshold_5": 0.042929339300620824,
156
+ "scr_dir1_threshold_10": 0.43548323522597376,
157
+ "scr_metric_threshold_10": 0.058080808840993786,
158
+ "scr_dir2_threshold_10": 0.058080808840993786,
159
+ "scr_dir1_threshold_20": 0.46774185795438705,
160
+ "scr_metric_threshold_20": 0.10606058781614919,
161
+ "scr_dir2_threshold_20": 0.10606058781614919,
162
+ "scr_dir1_threshold_50": 0.43548323522597376,
163
+ "scr_metric_threshold_50": 0.14898992711677,
164
+ "scr_dir2_threshold_50": 0.14898992711677,
165
+ "scr_dir1_threshold_100": 0.4999995193171997,
166
+ "scr_metric_threshold_100": 0.19444448625465813,
167
+ "scr_dir2_threshold_100": 0.19444448625465813,
168
+ "scr_dir1_threshold_500": 0.24193534431789668,
169
+ "scr_metric_threshold_500": 0.16161617681987567,
170
+ "scr_dir2_threshold_500": 0.16161617681987567
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.23577218043436754,
175
+ "scr_metric_threshold_2": 0.06451600498511631,
176
+ "scr_dir2_threshold_2": 0.06451600498511631,
177
+ "scr_dir1_threshold_5": 0.2764230242518165,
178
+ "scr_metric_threshold_5": 0.10557172397278113,
179
+ "scr_dir2_threshold_5": 0.10557172397278113,
180
+ "scr_dir1_threshold_10": 0.32520335840604614,
181
+ "scr_metric_threshold_10": 0.16422287595065924,
182
+ "scr_dir2_threshold_10": 0.16422287595065924,
183
+ "scr_dir1_threshold_20": 0.32520335840604614,
184
+ "scr_metric_threshold_20": 0.211143622739262,
185
+ "scr_dir2_threshold_20": 0.211143622739262,
186
+ "scr_dir1_threshold_50": 0.22764220550708036,
187
+ "scr_metric_threshold_50": 0.3079178050106362,
188
+ "scr_dir2_threshold_50": 0.3079178050106362,
189
+ "scr_dir1_threshold_100": 0.0894311779716786,
190
+ "scr_metric_threshold_100": 0.40175947338154144,
191
+ "scr_dir2_threshold_100": 0.40175947338154144,
192
+ "scr_dir1_threshold_500": -0.17073141183505702,
193
+ "scr_metric_threshold_500": 0.3137830076052739,
194
+ "scr_dir2_threshold_500": 0.3137830076052739
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.03825119347697291,
199
+ "scr_metric_threshold_2": 0.05078128637978211,
200
+ "scr_dir2_threshold_2": 0.05078128637978211,
201
+ "scr_dir1_threshold_5": 0.04918033854781611,
202
+ "scr_metric_threshold_5": 0.09765637369125917,
203
+ "scr_dir2_threshold_5": 0.09765637369125917,
204
+ "scr_dir1_threshold_10": 0.04918033854781611,
205
+ "scr_metric_threshold_10": 0.16406245634426148,
206
+ "scr_dir2_threshold_10": 0.16406245634426148,
207
+ "scr_dir1_threshold_20": 0.03278678379574697,
208
+ "scr_metric_threshold_20": 0.26171883003552066,
209
+ "scr_dir2_threshold_20": 0.26171883003552066,
210
+ "scr_dir1_threshold_50": 0.05464474822904205,
211
+ "scr_metric_threshold_50": 0.4765625727595642,
212
+ "scr_dir2_threshold_50": 0.4765625727595642,
213
+ "scr_dir1_threshold_100": 0.05464474822904205,
214
+ "scr_metric_threshold_100": 0.6054687718278693,
215
+ "scr_dir2_threshold_100": 0.6054687718278693,
216
+ "scr_dir1_threshold_500": 0.09836067709563222,
217
+ "scr_metric_threshold_500": 0.7070313445874334,
218
+ "scr_dir2_threshold_500": 0.7070313445874334
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.17435887560571312,
223
+ "scr_metric_threshold_2": 0.04032253800396563,
224
+ "scr_dir2_threshold_2": 0.04032253800396563,
225
+ "scr_dir1_threshold_5": 0.2102561563194528,
226
+ "scr_metric_threshold_5": 0.060483807005948444,
227
+ "scr_dir2_threshold_5": 0.060483807005948444,
228
+ "scr_dir1_threshold_10": 0.2358974641126534,
229
+ "scr_metric_threshold_10": 0.08870972781349516,
230
+ "scr_dir2_threshold_10": 0.08870972781349516,
231
+ "scr_dir1_threshold_20": 0.27179474482639304,
232
+ "scr_metric_threshold_20": 0.11290320254761761,
233
+ "scr_dir2_threshold_20": 0.11290320254761761,
234
+ "scr_dir1_threshold_50": 0.3333333333333333,
235
+ "scr_metric_threshold_50": 0.20161293036111277,
236
+ "scr_dir2_threshold_50": 0.20161293036111277,
237
+ "scr_dir1_threshold_100": 0.338461472626031,
238
+ "scr_metric_threshold_100": 0.2661291834404855,
239
+ "scr_dir2_threshold_100": 0.2661291834404855,
240
+ "scr_dir1_threshold_500": 0.48205120681070207,
241
+ "scr_metric_threshold_500": 0.41935492399206875,
242
+ "scr_dir2_threshold_500": 0.41935492399206875
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.22072082956776967,
247
+ "scr_metric_threshold_2": 0.058035755862608125,
248
+ "scr_dir2_threshold_2": 0.058035755862608125,
249
+ "scr_dir1_threshold_5": 0.24774777677362747,
250
+ "scr_metric_threshold_5": 0.15178559787041154,
251
+ "scr_dir2_threshold_5": 0.15178559787041154,
252
+ "scr_dir1_threshold_10": 0.30180167118534307,
253
+ "scr_metric_threshold_10": 0.24999993347696986,
254
+ "scr_dir2_threshold_10": 0.24999993347696986,
255
+ "scr_dir1_threshold_20": 0.32882888688058826,
256
+ "scr_metric_threshold_20": 0.3437500415768938,
257
+ "scr_dir2_threshold_20": 0.3437500415768938,
258
+ "scr_dir1_threshold_50": 0.4234233363457843,
259
+ "scr_metric_threshold_50": 0.2678571095956278,
260
+ "scr_dir2_threshold_50": 0.2678571095956278,
261
+ "scr_dir1_threshold_100": 0.5315313936586028,
262
+ "scr_metric_threshold_100": 0.3482142690835282,
263
+ "scr_dir2_threshold_100": 0.3482142690835282,
264
+ "scr_dir1_threshold_500": 0.5630630558065931,
265
+ "scr_metric_threshold_500": 0.5312499916846213,
266
+ "scr_dir2_threshold_500": 0.5312499916846213
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.08583681325484281,
271
+ "scr_metric_threshold_2": 0.08583681325484281,
272
+ "scr_dir2_threshold_2": 0.06190480515528994,
273
+ "scr_dir1_threshold_5": 0.1244634303822985,
274
+ "scr_metric_threshold_5": 0.1244634303822985,
275
+ "scr_dir2_threshold_5": 0.07619033292310208,
276
+ "scr_dir1_threshold_10": 0.1416308441960435,
277
+ "scr_metric_threshold_10": 0.1416308441960435,
278
+ "scr_dir2_threshold_10": 0.12380961031057988,
279
+ "scr_dir1_threshold_20": 0.15879825800978847,
280
+ "scr_metric_threshold_20": 0.15879825800978847,
281
+ "scr_dir2_threshold_20": 0.13809513807839202,
282
+ "scr_dir1_threshold_50": 0.22317586795092056,
283
+ "scr_metric_threshold_50": 0.22317586795092056,
284
+ "scr_dir2_threshold_50": 0.17619044645573817,
285
+ "scr_dir1_threshold_100": 0.2618024850783763,
286
+ "scr_metric_threshold_100": 0.2618024850783763,
287
+ "scr_dir2_threshold_100": 0.19999994323368195,
288
+ "scr_dir1_threshold_500": 0.3090129370196455,
289
+ "scr_metric_threshold_500": 0.3090129370196455,
290
+ "scr_dir2_threshold_500": 0.2666665909782426
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_2",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 65536,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "standard_april_update",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "463b1d9a-89c9-4e91-b0f7-cf689a793955",
73
+ "datetime_epoch_millis": 1740119695192,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.1749851308390269,
77
+ "scr_metric_threshold_2": 0.05298956479053978,
78
+ "scr_dir2_threshold_2": 0.05101225451692398,
79
+ "scr_dir1_threshold_5": 0.21609188104421403,
80
+ "scr_metric_threshold_5": 0.08972228769387822,
81
+ "scr_dir2_threshold_5": 0.08654682433639538,
82
+ "scr_dir1_threshold_10": 0.25460017912345434,
83
+ "scr_metric_threshold_10": 0.13078740360380972,
84
+ "scr_dir2_threshold_10": 0.13362307107697596,
85
+ "scr_dir1_threshold_20": 0.24341297951261093,
86
+ "scr_metric_threshold_20": 0.1698132270193075,
87
+ "scr_dir2_threshold_20": 0.17395687191433118,
88
+ "scr_dir1_threshold_50": 0.24169105984427836,
89
+ "scr_metric_threshold_50": 0.25147920390900097,
90
+ "scr_dir2_threshold_50": 0.2532930271879518,
91
+ "scr_dir1_threshold_100": 0.2329185038915579,
92
+ "scr_metric_threshold_100": 0.30587952314765465,
93
+ "scr_dir2_threshold_100": 0.29994758882459377,
94
+ "scr_dir1_threshold_500": 0.19456044542020062,
95
+ "scr_metric_threshold_500": 0.3556247366363928,
96
+ "scr_dir2_threshold_500": 0.3515883531012476
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.2857136099242737,
103
+ "scr_metric_threshold_2": 0.007371057746493725,
104
+ "scr_dir2_threshold_2": 0.007371057746493725,
105
+ "scr_dir1_threshold_5": 0.3650786592542414,
106
+ "scr_metric_threshold_5": 0.019656056358159712,
107
+ "scr_dir2_threshold_5": 0.019656056358159712,
108
+ "scr_dir1_threshold_10": 0.42857088793941894,
109
+ "scr_metric_threshold_10": 0.027027114104653437,
110
+ "scr_dir2_threshold_10": 0.027027114104653437,
111
+ "scr_dir1_threshold_20": 0.444444654690226,
112
+ "scr_metric_threshold_20": 0.03439802540241183,
113
+ "scr_dir2_threshold_20": 0.03439802540241183,
114
+ "scr_dir1_threshold_50": 0.3492058386094512,
115
+ "scr_metric_threshold_50": 0.0638821099396514,
116
+ "scr_dir2_threshold_50": 0.0638821099396514,
117
+ "scr_dir1_threshold_100": 0.333333017964661,
118
+ "scr_metric_threshold_100": 0.08108119586522498,
119
+ "scr_dir2_threshold_100": 0.08108119586522498,
120
+ "scr_dir1_threshold_500": 0.25396796863469334,
121
+ "scr_metric_threshold_500": 0.06879605080482366,
122
+ "scr_dir2_threshold_500": 0.06879605080482366
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.22222182084405012,
127
+ "scr_metric_threshold_2": 0.08498575820403938,
128
+ "scr_dir2_threshold_2": 0.08498575820403938,
129
+ "scr_dir1_threshold_5": 0.22222182084405012,
130
+ "scr_metric_threshold_5": 0.14447592402822051,
131
+ "scr_dir2_threshold_5": 0.14447592402822051,
132
+ "scr_dir1_threshold_10": 0.25252507008062886,
133
+ "scr_metric_threshold_10": 0.17280451009623365,
134
+ "scr_dir2_threshold_10": 0.17280451009623365,
135
+ "scr_dir1_threshold_20": 0.2323233053010818,
136
+ "scr_metric_threshold_20": 0.2096317395253275,
137
+ "scr_dir2_threshold_20": 0.2096317395253275,
138
+ "scr_dir1_threshold_50": 0.2323233053010818,
139
+ "scr_metric_threshold_50": 0.322946252649072,
140
+ "scr_dir2_threshold_50": 0.322946252649072,
141
+ "scr_dir1_threshold_100": 0.17171680682792437,
142
+ "scr_metric_threshold_100": 0.3767705999486353,
143
+ "scr_dir2_threshold_100": 0.3767705999486353,
144
+ "scr_dir1_threshold_500": -0.10101063009950968,
145
+ "scr_metric_threshold_500": 0.3597733132264739,
146
+ "scr_dir2_threshold_500": 0.3597733132264739
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.24193534431789668,
151
+ "scr_metric_threshold_2": 0.025252499406211347,
152
+ "scr_dir2_threshold_2": 0.025252499406211347,
153
+ "scr_dir1_threshold_5": 0.33870928977193526,
154
+ "scr_metric_threshold_5": 0.04040411946335353,
155
+ "scr_dir2_threshold_5": 0.04040411946335353,
156
+ "scr_dir1_threshold_10": 0.41935440454456746,
157
+ "scr_metric_threshold_10": 0.07070705854409946,
158
+ "scr_dir2_threshold_10": 0.07070705854409946,
159
+ "scr_dir1_threshold_20": 0.37096695113474787,
160
+ "scr_metric_threshold_20": 0.09343433811304351,
161
+ "scr_dir2_threshold_20": 0.09343433811304351,
162
+ "scr_dir1_threshold_50": 0.37096695113474787,
163
+ "scr_metric_threshold_50": 0.14646470727950273,
164
+ "scr_dir2_threshold_50": 0.14646470727950273,
165
+ "scr_dir1_threshold_100": 0.3064516284091226,
166
+ "scr_metric_threshold_100": 0.2020202962832292,
167
+ "scr_dir2_threshold_100": 0.2020202962832292,
168
+ "scr_dir1_threshold_500": 0.14516043749825752,
169
+ "scr_metric_threshold_500": 0.18686867622608702,
170
+ "scr_dir2_threshold_500": 0.18686867622608702
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.1869918462801379,
175
+ "scr_metric_threshold_2": 0.070381207579754,
176
+ "scr_dir2_threshold_2": 0.070381207579754,
177
+ "scr_dir1_threshold_5": 0.21138225565250596,
178
+ "scr_metric_threshold_5": 0.12023446826882572,
179
+ "scr_dir2_threshold_5": 0.12023446826882572,
180
+ "scr_dir1_threshold_10": 0.2845529991791037,
181
+ "scr_metric_threshold_10": 0.18475064804764177,
182
+ "scr_dir2_threshold_10": 0.18475064804764177,
183
+ "scr_dir1_threshold_20": 0.22764220550708036,
184
+ "scr_metric_threshold_20": 0.21700882533389967,
185
+ "scr_dir2_threshold_20": 0.21700882533389967,
186
+ "scr_dir1_threshold_50": 0.1869918462801379,
187
+ "scr_metric_threshold_50": 0.30205277720969825,
188
+ "scr_dir2_threshold_50": 0.30205277720969825,
189
+ "scr_dir1_threshold_100": 0.13821151212590824,
190
+ "scr_metric_threshold_100": 0.416422217677586,
191
+ "scr_dir2_threshold_100": 0.416422217677586,
192
+ "scr_dir1_threshold_500": 0.08130071845388491,
193
+ "scr_metric_threshold_500": 0.4516129088643129,
194
+ "scr_dir2_threshold_500": 0.4516129088643129
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.05464474822904205,
199
+ "scr_metric_threshold_2": 0.05078128637978211,
200
+ "scr_dir2_threshold_2": 0.05078128637978211,
201
+ "scr_dir1_threshold_5": 0.07650271266233713,
202
+ "scr_metric_threshold_5": 0.0742187136202179,
203
+ "scr_dir2_threshold_5": 0.0742187136202179,
204
+ "scr_dir1_threshold_10": 0.06010915791026799,
205
+ "scr_metric_threshold_10": 0.14062502910382568,
206
+ "scr_dir2_threshold_10": 0.14062502910382568,
207
+ "scr_dir1_threshold_20": 0.03825119347697291,
208
+ "scr_metric_threshold_20": 0.22265637369125918,
209
+ "scr_dir2_threshold_20": 0.22265637369125918,
210
+ "scr_dir1_threshold_50": 0.04371592886659017,
211
+ "scr_metric_threshold_50": 0.3945312281721307,
212
+ "scr_dir2_threshold_50": 0.3945312281721307,
213
+ "scr_dir1_threshold_100": 0.027322374114521025,
214
+ "scr_metric_threshold_100": 0.546875087311477,
215
+ "scr_dir2_threshold_100": 0.546875087311477,
216
+ "scr_dir1_threshold_500": 0.06010915791026799,
217
+ "scr_metric_threshold_500": 0.671875087311477,
218
+ "scr_dir2_threshold_500": 0.671875087311477
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.12820501064172196,
223
+ "scr_metric_threshold_2": 0.036290332271825994,
224
+ "scr_dir2_threshold_2": 0.036290332271825994,
225
+ "scr_dir1_threshold_5": 0.1692307363130155,
226
+ "scr_metric_threshold_5": 0.06451625307937271,
227
+ "scr_dir2_threshold_5": 0.06451625307937271,
228
+ "scr_dir1_threshold_10": 0.2102561563194528,
229
+ "scr_metric_threshold_10": 0.08467752208135552,
230
+ "scr_dir2_threshold_10": 0.08467752208135552,
231
+ "scr_dir1_threshold_20": 0.22564087986240192,
232
+ "scr_metric_threshold_20": 0.10887099681547797,
233
+ "scr_dir2_threshold_20": 0.10887099681547797,
234
+ "scr_dir1_threshold_50": 0.24102560340535104,
235
+ "scr_metric_threshold_50": 0.17741945562699032,
236
+ "scr_dir2_threshold_50": 0.17741945562699032,
237
+ "scr_dir1_threshold_100": 0.3025638862474351,
238
+ "scr_metric_threshold_100": 0.2701613891726251,
239
+ "scr_dir2_threshold_100": 0.2701613891726251,
240
+ "scr_dir1_threshold_500": 0.4051282004256689,
241
+ "scr_metric_threshold_500": 0.36693552845039956,
242
+ "scr_dir2_threshold_500": 0.36693552845039956
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.20720722172014708,
247
+ "scr_metric_threshold_2": 0.07589293198126605,
248
+ "scr_dir2_threshold_2": 0.07589293198126605,
249
+ "scr_dir1_threshold_5": 0.2297297224732598,
250
+ "scr_metric_threshold_5": 0.13839291535050852,
251
+ "scr_dir2_threshold_5": 0.13839291535050852,
252
+ "scr_dir1_threshold_10": 0.26126111613186265,
253
+ "scr_metric_threshold_10": 0.24553570597033553,
254
+ "scr_dir2_threshold_10": 0.24553570597033553,
255
+ "scr_dir1_threshold_20": 0.27927917043223033,
256
+ "scr_metric_threshold_20": 0.3437500415768938,
257
+ "scr_dir2_threshold_20": 0.3437500415768938,
258
+ "scr_dir1_threshold_50": 0.3333333333333333,
259
+ "scr_metric_threshold_50": 0.42857142857142855,
260
+ "scr_dir2_threshold_50": 0.42857142857142855,
261
+ "scr_dir1_threshold_100": 0.3648647269919362,
262
+ "scr_metric_threshold_100": 0.3348213204715046,
263
+ "scr_dir2_threshold_100": 0.3348213204715046,
264
+ "scr_dir1_threshold_500": 0.44144139064615195,
265
+ "scr_metric_threshold_500": 0.4687500083153788,
266
+ "scr_dir2_threshold_500": 0.4687500083153788
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.07296144475494565,
271
+ "scr_metric_threshold_2": 0.07296144475494565,
272
+ "scr_dir2_threshold_2": 0.05714296256601923,
273
+ "scr_dir1_threshold_5": 0.11587985138236706,
274
+ "scr_metric_threshold_5": 0.11587985138236706,
275
+ "scr_dir2_threshold_5": 0.09047614452250444,
276
+ "scr_dir1_threshold_10": 0.12017164088233277,
277
+ "scr_metric_threshold_10": 0.12017164088233277,
278
+ "scr_dir2_threshold_10": 0.14285698066766273,
279
+ "scr_dir1_threshold_20": 0.12875547569614632,
280
+ "scr_metric_threshold_20": 0.12875547569614632,
281
+ "scr_dir2_threshold_20": 0.1619046348563358,
282
+ "scr_dir1_threshold_50": 0.17596567182353345,
283
+ "scr_metric_threshold_50": 0.17596567182353345,
284
+ "scr_dir2_threshold_50": 0.19047625805514054,
285
+ "scr_dir1_threshold_100": 0.21888407845095484,
286
+ "scr_metric_threshold_100": 0.21888407845095484,
287
+ "scr_dir2_threshold_100": 0.17142860386646747,
288
+ "scr_dir1_threshold_500": 0.2703863198921898,
289
+ "scr_metric_threshold_500": 0.2703863198921898,
290
+ "scr_dir2_threshold_500": 0.2380952516110281
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_3",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 65536,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "standard_april_update",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "14980b7b-2b20-47c4-8418-513f2d8cb5b1",
73
+ "datetime_epoch_millis": 1740118592504,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.15256890317343705,
77
+ "scr_metric_threshold_2": 0.05629171484651784,
78
+ "scr_dir2_threshold_2": 0.05115428710735561,
79
+ "scr_dir1_threshold_5": 0.20706361311305155,
80
+ "scr_metric_threshold_5": 0.08724149874951825,
81
+ "scr_dir2_threshold_5": 0.08251533898897613,
82
+ "scr_dir1_threshold_10": 0.23392923560013004,
83
+ "scr_metric_threshold_10": 0.11184717485226564,
84
+ "scr_dir2_threshold_10": 0.11164789859202578,
85
+ "scr_dir1_threshold_20": 0.23205642405218066,
86
+ "scr_metric_threshold_20": 0.15966328593381138,
87
+ "scr_dir2_threshold_20": 0.16303542709447333,
88
+ "scr_dir1_threshold_50": 0.18379708219318097,
89
+ "scr_metric_threshold_50": 0.2091638617208231,
90
+ "scr_dir2_threshold_50": 0.21008863251413723,
91
+ "scr_dir1_threshold_100": 0.16134429044091364,
92
+ "scr_metric_threshold_100": 0.26597175232009695,
93
+ "scr_dir2_threshold_100": 0.2573650745404228,
94
+ "scr_dir1_threshold_500": 0.1260506434186448,
95
+ "scr_metric_threshold_500": 0.3003423314915035,
96
+ "scr_dir2_threshold_500": 0.2961960881394191
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.1746029193047256,
103
+ "scr_metric_threshold_2": 0.009828028179079856,
104
+ "scr_dir2_threshold_2": 0.009828028179079856,
105
+ "scr_dir1_threshold_5": 0.31746019731987085,
106
+ "scr_metric_threshold_5": 0.012284998611665989,
107
+ "scr_dir2_threshold_5": 0.012284998611665989,
108
+ "scr_dir1_threshold_10": 0.3968252466498386,
109
+ "scr_metric_threshold_10": 0.019656056358159712,
110
+ "scr_dir2_threshold_10": 0.019656056358159712,
111
+ "scr_dir1_threshold_20": 0.3650786592542414,
112
+ "scr_metric_threshold_20": 0.04668317046281315,
113
+ "scr_dir2_threshold_20": 0.04668317046281315,
114
+ "scr_dir1_threshold_50": 0.3492058386094512,
115
+ "scr_metric_threshold_50": 0.07125316768614513,
116
+ "scr_dir2_threshold_50": 0.07125316768614513,
117
+ "scr_dir1_threshold_100": 0.31746019731987085,
118
+ "scr_metric_threshold_100": 0.07616710855131739,
119
+ "scr_dir2_threshold_100": 0.07616710855131739,
120
+ "scr_dir1_threshold_500": 0.20634856059430595,
121
+ "scr_metric_threshold_500": 0.06879605080482366,
122
+ "scr_dir2_threshold_500": 0.06879605080482366
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.17171680682792437,
127
+ "scr_metric_threshold_2": 0.08781875189219428,
128
+ "scr_dir2_threshold_2": 0.08781875189219428,
129
+ "scr_dir1_threshold_5": 0.24242418769085533,
130
+ "scr_metric_threshold_5": 0.1388102743552947,
131
+ "scr_dir2_threshold_5": 0.1388102743552947,
132
+ "scr_dir1_threshold_10": 0.2323233053010818,
133
+ "scr_metric_threshold_10": 0.15580739222576417,
134
+ "scr_dir2_threshold_10": 0.15580739222576417,
135
+ "scr_dir1_threshold_20": 0.2323233053010818,
136
+ "scr_metric_threshold_20": 0.22662885739579697,
137
+ "scr_dir2_threshold_20": 0.22662885739579697,
138
+ "scr_dir1_threshold_50": 0.1414141596586038,
139
+ "scr_metric_threshold_50": 0.3484420138806222,
140
+ "scr_dir2_threshold_50": 0.3484420138806222,
141
+ "scr_dir1_threshold_100": -0.2626265545376605,
142
+ "scr_metric_threshold_100": 0.40509918601664846,
143
+ "scr_dir2_threshold_100": 0.40509918601664846,
144
+ "scr_dir1_threshold_500": -0.24242478975811346,
145
+ "scr_metric_threshold_500": 0.40509918601664846,
146
+ "scr_dir2_threshold_500": 0.40509918601664846
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.1935478909080771,
151
+ "scr_metric_threshold_2": 0.022727279568944055,
152
+ "scr_dir2_threshold_2": 0.022727279568944055,
153
+ "scr_dir1_threshold_5": 0.2741930056807093,
154
+ "scr_metric_threshold_5": 0.025252499406211347,
155
+ "scr_dir2_threshold_5": 0.025252499406211347,
156
+ "scr_dir1_threshold_10": 0.35483812045334157,
157
+ "scr_metric_threshold_10": 0.04040411946335353,
158
+ "scr_dir2_threshold_10": 0.04040411946335353,
159
+ "scr_dir1_threshold_20": 0.3870967431817548,
160
+ "scr_metric_threshold_20": 0.06818183870683217,
161
+ "scr_dir2_threshold_20": 0.06818183870683217,
162
+ "scr_dir1_threshold_50": 0.16129022954526445,
163
+ "scr_metric_threshold_50": 0.1085859581701857,
164
+ "scr_dir2_threshold_50": 0.1085859581701857,
165
+ "scr_dir1_threshold_100": 0.258064174999303,
166
+ "scr_metric_threshold_100": 0.13383845757639704,
167
+ "scr_dir2_threshold_100": 0.13383845757639704,
168
+ "scr_dir1_threshold_500": 0.11290277613544487,
169
+ "scr_metric_threshold_500": 0.21212117563229838,
170
+ "scr_dir2_threshold_500": 0.21212117563229838
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.13821151212590824,
175
+ "scr_metric_threshold_2": 0.070381207579754,
176
+ "scr_dir2_threshold_2": 0.070381207579754,
177
+ "scr_dir1_threshold_5": 0.1869918462801379,
178
+ "scr_metric_threshold_5": 0.12903218476393236,
179
+ "scr_dir2_threshold_5": 0.12903218476393236,
180
+ "scr_dir1_threshold_10": 0.23577218043436754,
181
+ "scr_metric_threshold_10": 0.17888562024670382,
182
+ "scr_dir2_threshold_10": 0.17888562024670382,
183
+ "scr_dir1_threshold_20": 0.19512182120742508,
184
+ "scr_metric_threshold_20": 0.22873888093577555,
185
+ "scr_dir2_threshold_20": 0.22873888093577555,
186
+ "scr_dir1_threshold_50": 0.10569112782625298,
187
+ "scr_metric_threshold_50": 0.3049852911101672,
188
+ "scr_dir2_threshold_50": 0.3049852911101672,
189
+ "scr_dir1_threshold_100": 0.09756115289896579,
190
+ "scr_metric_threshold_100": 0.3401759822968941,
191
+ "scr_dir2_threshold_100": 0.3401759822968941,
192
+ "scr_dir1_threshold_500": -0.04878033415422965,
193
+ "scr_metric_threshold_500": 0.2932550607145916,
194
+ "scr_dir2_threshold_500": 0.2932550607145916
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.05464474822904205,
199
+ "scr_metric_threshold_2": 0.03906268917486696,
200
+ "scr_dir2_threshold_2": 0.03906268917486696,
201
+ "scr_dir1_threshold_5": 0.05464474822904205,
202
+ "scr_metric_threshold_5": 0.06250011641530274,
203
+ "scr_dir2_threshold_5": 0.06250011641530274,
204
+ "scr_dir1_threshold_10": 0.05464474822904205,
205
+ "scr_metric_threshold_10": 0.10937497089617432,
206
+ "scr_dir2_threshold_10": 0.10937497089617432,
207
+ "scr_dir1_threshold_20": 0.05464474822904205,
208
+ "scr_metric_threshold_20": 0.16406245634426148,
209
+ "scr_dir2_threshold_20": 0.16406245634426148,
210
+ "scr_dir1_threshold_50": 0.04371592886659017,
211
+ "scr_metric_threshold_50": 0.292968888243172,
212
+ "scr_dir2_threshold_50": 0.292968888243172,
213
+ "scr_dir1_threshold_100": 0.04371592886659017,
214
+ "scr_metric_threshold_100": 0.4335936845163922,
215
+ "scr_dir2_threshold_100": 0.4335936845163922,
216
+ "scr_dir1_threshold_500": 0.07103830298111119,
217
+ "scr_metric_threshold_500": 0.48828116996447934,
218
+ "scr_dir2_threshold_500": 0.48828116996447934
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.16410229135546164,
223
+ "scr_metric_threshold_2": 0.028225920807546715,
224
+ "scr_dir2_threshold_2": 0.028225920807546715,
225
+ "scr_dir1_threshold_5": 0.1999998777340575,
226
+ "scr_metric_threshold_5": 0.036290332271825994,
227
+ "scr_dir2_threshold_5": 0.036290332271825994,
228
+ "scr_dir1_threshold_10": 0.21538460127700665,
229
+ "scr_metric_threshold_10": 0.060483807005948444,
230
+ "scr_dir2_threshold_10": 0.060483807005948444,
231
+ "scr_dir1_threshold_20": 0.1999998777340575,
232
+ "scr_metric_threshold_20": 0.09677413927777444,
233
+ "scr_dir2_threshold_20": 0.09677413927777444,
234
+ "scr_dir1_threshold_50": 0.23076901915509954,
235
+ "scr_metric_threshold_50": 0.16935480382142643,
236
+ "scr_dir2_threshold_50": 0.16935480382142643,
237
+ "scr_dir1_threshold_100": 0.29230760766203984,
238
+ "scr_metric_threshold_100": 0.23790326263293876,
239
+ "scr_dir2_threshold_100": 0.23790326263293876,
240
+ "scr_dir1_threshold_500": 0.3435896119187286,
241
+ "scr_metric_threshold_500": 0.3145161329087304,
242
+ "scr_dir2_threshold_500": 0.3145161329087304
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.22072082956776967,
247
+ "scr_metric_threshold_2": 0.08928561450116908,
248
+ "scr_dir2_threshold_2": 0.08928561450116908,
249
+ "scr_dir1_threshold_5": 0.24774777677362747,
250
+ "scr_metric_threshold_5": 0.1607143189758008,
251
+ "scr_dir2_threshold_5": 0.1607143189758008,
252
+ "scr_dir1_threshold_10": 0.26576583107399515,
253
+ "scr_metric_threshold_10": 0.214285581239654,
254
+ "scr_dir2_threshold_10": 0.214285581239654,
255
+ "scr_dir1_threshold_20": 0.3063063861274755,
256
+ "scr_metric_threshold_20": 0.33035709296487026,
257
+ "scr_dir2_threshold_20": 0.33035709296487026,
258
+ "scr_dir1_threshold_50": 0.28378388537436283,
259
+ "scr_metric_threshold_50": 0.22321430234504325,
260
+ "scr_dir2_threshold_50": 0.22321430234504325,
261
+ "scr_dir1_threshold_100": 0.34684694118095594,
262
+ "scr_metric_threshold_100": 0.30357146183294365,
263
+ "scr_dir2_threshold_100": 0.30357146183294365,
264
+ "scr_dir1_threshold_500": 0.34684694118095594,
265
+ "scr_metric_threshold_500": 0.40178579743950193,
266
+ "scr_dir2_threshold_500": 0.40178579743950193
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.10300422706858779,
271
+ "scr_metric_threshold_2": 0.10300422706858779,
272
+ "scr_dir2_threshold_2": 0.06190480515528994,
273
+ "scr_dir1_threshold_5": 0.13304726519611204,
274
+ "scr_metric_threshold_5": 0.13304726519611204,
275
+ "scr_dir2_threshold_5": 0.09523798711177515,
276
+ "scr_dir1_threshold_10": 0.11587985138236706,
277
+ "scr_metric_threshold_10": 0.11587985138236706,
278
+ "scr_dir2_threshold_10": 0.11428564130044823,
279
+ "scr_dir1_threshold_20": 0.11587985138236706,
280
+ "scr_metric_threshold_20": 0.11587985138236706,
281
+ "scr_dir2_threshold_20": 0.14285698066766273,
282
+ "scr_dir1_threshold_50": 0.15450646850982275,
283
+ "scr_metric_threshold_50": 0.15450646850982275,
284
+ "scr_dir2_threshold_50": 0.1619046348563358,
285
+ "scr_dir1_threshold_100": 0.19742487513724416,
286
+ "scr_metric_threshold_100": 0.19742487513724416,
287
+ "scr_dir2_threshold_100": 0.12857145289985059,
288
+ "scr_dir1_threshold_500": 0.21888407845095484,
289
+ "scr_metric_threshold_500": 0.21888407845095484,
290
+ "scr_dir2_threshold_500": 0.18571413163427958
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_4",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 65536,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "standard_april_update",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "27ed03bf-60d4-451d-9e57-401f095de7ea",
73
+ "datetime_epoch_millis": 1740118225927,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.1142210034909799,
77
+ "scr_metric_threshold_2": 0.05233540228360544,
78
+ "scr_dir2_threshold_2": 0.04809464850873175,
79
+ "scr_dir1_threshold_5": 0.14348341242287968,
80
+ "scr_metric_threshold_5": 0.07776038298153087,
81
+ "scr_dir2_threshold_5": 0.07929573349265376,
82
+ "scr_dir1_threshold_10": 0.14915675437318082,
83
+ "scr_metric_threshold_10": 0.10487540293578126,
84
+ "scr_dir2_threshold_10": 0.10765997073038494,
85
+ "scr_dir1_threshold_20": 0.15564832380130597,
86
+ "scr_metric_threshold_20": 0.142009526688755,
87
+ "scr_dir2_threshold_20": 0.14652106429712086,
88
+ "scr_dir1_threshold_50": 0.11252191600583017,
89
+ "scr_metric_threshold_50": 0.1861256099642392,
90
+ "scr_dir2_threshold_50": 0.194149840334079,
91
+ "scr_dir1_threshold_100": 0.06532253756081764,
92
+ "scr_metric_threshold_100": 0.23192969976931124,
93
+ "scr_dir2_threshold_100": 0.2407841905096759,
94
+ "scr_dir1_threshold_500": -0.03862469553313101,
95
+ "scr_metric_threshold_500": 0.2413952857133858,
96
+ "scr_dir2_threshold_500": 0.24970561383191892
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.1746029193047256,
103
+ "scr_metric_threshold_2": 0.007371057746493725,
104
+ "scr_dir2_threshold_2": 0.007371057746493725,
105
+ "scr_dir1_threshold_5": 0.25396796863469334,
106
+ "scr_metric_threshold_5": 0.012284998611665989,
107
+ "scr_dir2_threshold_5": 0.012284998611665989,
108
+ "scr_dir1_threshold_10": 0.25396796863469334,
109
+ "scr_metric_threshold_10": 0.019656056358159712,
110
+ "scr_dir2_threshold_10": 0.019656056358159712,
111
+ "scr_dir1_threshold_20": 0.23809514798990317,
112
+ "scr_metric_threshold_20": 0.036855142283733294,
113
+ "scr_dir2_threshold_20": 0.036855142283733294,
114
+ "scr_dir1_threshold_50": 0.2857136099242737,
115
+ "scr_metric_threshold_50": 0.04914014089539928,
116
+ "scr_dir2_threshold_50": 0.04914014089539928,
117
+ "scr_dir1_threshold_100": 0.25396796863469334,
118
+ "scr_metric_threshold_100": 0.06879605080482366,
119
+ "scr_dir2_threshold_100": 0.06879605080482366,
120
+ "scr_dir1_threshold_500": 0.031745641289580344,
121
+ "scr_metric_threshold_500": 0.06633908037223754,
122
+ "scr_dir2_threshold_500": 0.06633908037223754
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.15151504204837732,
127
+ "scr_metric_threshold_2": 0.0934844015651201,
128
+ "scr_dir2_threshold_2": 0.0934844015651201,
129
+ "scr_dir1_threshold_5": 0.17171680682792437,
130
+ "scr_metric_threshold_5": 0.12747880615775103,
131
+ "scr_dir2_threshold_5": 0.12747880615775103,
132
+ "scr_dir1_threshold_10": 0.20202005606450307,
133
+ "scr_metric_threshold_10": 0.15580739222576417,
134
+ "scr_dir2_threshold_10": 0.15580739222576417,
135
+ "scr_dir1_threshold_20": 0.19191917367472955,
136
+ "scr_metric_threshold_20": 0.21813038288640824,
137
+ "scr_dir2_threshold_20": 0.21813038288640824,
138
+ "scr_dir1_threshold_50": -0.08080826325270449,
139
+ "scr_metric_threshold_50": 0.2974504914175218,
140
+ "scr_dir2_threshold_50": 0.2974504914175218,
141
+ "scr_dir1_threshold_100": -0.343434817790365,
142
+ "scr_metric_threshold_100": 0.36260630691462875,
143
+ "scr_dir2_threshold_100": 0.36260630691462875,
144
+ "scr_dir1_threshold_500": -0.4646466106021636,
145
+ "scr_metric_threshold_500": 0.31728043412445417,
146
+ "scr_dir2_threshold_500": 0.31728043412445417
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.17741906022667078,
151
+ "scr_metric_threshold_2": 0.017676839894409477,
152
+ "scr_dir2_threshold_2": 0.017676839894409477,
153
+ "scr_dir1_threshold_5": 0.24193534431789668,
154
+ "scr_metric_threshold_5": 0.03535352927204973,
155
+ "scr_dir2_threshold_5": 0.03535352927204973,
156
+ "scr_dir1_threshold_10": 0.2741930056807093,
157
+ "scr_metric_threshold_10": 0.06313139903229759,
158
+ "scr_dir2_threshold_10": 0.06313139903229759,
159
+ "scr_dir1_threshold_20": 0.33870928977193526,
160
+ "scr_metric_threshold_20": 0.08333330824720513,
161
+ "scr_dir2_threshold_20": 0.08333330824720513,
162
+ "scr_dir1_threshold_50": 0.20967672158948342,
163
+ "scr_metric_threshold_50": 0.11111117800745299,
164
+ "scr_dir2_threshold_50": 0.11111117800745299,
165
+ "scr_dir1_threshold_100": 0.17741906022667078,
166
+ "scr_metric_threshold_100": 0.13888889725093162,
167
+ "scr_dir2_threshold_100": 0.13888889725093162,
168
+ "scr_dir1_threshold_500": -0.20967768295508404,
169
+ "scr_metric_threshold_500": 0.13888889725093162,
170
+ "scr_dir2_threshold_500": 0.13888889725093162
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.10569112782625298,
175
+ "scr_metric_threshold_2": 0.06451600498511631,
176
+ "scr_dir2_threshold_2": 0.06451600498511631,
177
+ "scr_dir1_threshold_5": 0.13821151212590824,
178
+ "scr_metric_threshold_5": 0.1260996708634634,
179
+ "scr_dir2_threshold_5": 0.1260996708634634,
180
+ "scr_dir1_threshold_10": 0.0894311779716786,
181
+ "scr_metric_threshold_10": 0.17008790375159719,
182
+ "scr_dir2_threshold_10": 0.17008790375159719,
183
+ "scr_dir1_threshold_20": 0.0894311779716786,
184
+ "scr_metric_threshold_20": 0.21700882533389967,
185
+ "scr_dir2_threshold_20": 0.21700882533389967,
186
+ "scr_dir1_threshold_50": 0.040650359226942455,
187
+ "scr_metric_threshold_50": 0.2697947747171401,
188
+ "scr_dir2_threshold_50": 0.2697947747171401,
189
+ "scr_dir1_threshold_100": -0.032520384299655265,
190
+ "scr_metric_threshold_100": 0.3079178050106362,
191
+ "scr_dir2_threshold_100": 0.3079178050106362,
192
+ "scr_dir1_threshold_500": -0.20325179613471228,
193
+ "scr_metric_threshold_500": 0.1964808784432174,
194
+ "scr_dir2_threshold_500": 0.1964808784432174
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.04371592886659017,
199
+ "scr_metric_threshold_2": 0.03125005820765137,
200
+ "scr_dir2_threshold_2": 0.03125005820765137,
201
+ "scr_dir1_threshold_5": 0.04918033854781611,
202
+ "scr_metric_threshold_5": 0.03906268917486696,
203
+ "scr_dir2_threshold_5": 0.03906268917486696,
204
+ "scr_dir1_threshold_10": 0.04371592886659017,
205
+ "scr_metric_threshold_10": 0.07812514551912843,
206
+ "scr_dir2_threshold_10": 0.07812514551912843,
207
+ "scr_dir1_threshold_20": 0.03825119347697291,
208
+ "scr_metric_threshold_20": 0.12109380093169494,
209
+ "scr_dir2_threshold_20": 0.12109380093169494,
210
+ "scr_dir1_threshold_50": 0.016393554752069144,
211
+ "scr_metric_threshold_50": 0.24609380093169494,
212
+ "scr_dir2_threshold_50": 0.24609380093169494,
213
+ "scr_dir1_threshold_100": 0.027322374114521025,
214
+ "scr_metric_threshold_100": 0.3320313445874335,
215
+ "scr_dir2_threshold_100": 0.3320313445874335,
216
+ "scr_dir1_threshold_500": 0.016393554752069144,
217
+ "scr_metric_threshold_500": 0.3828126309672156,
218
+ "scr_dir2_threshold_500": 0.3828126309672156
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.06666642213478169,
223
+ "scr_metric_threshold_2": 0.020161269001982816,
224
+ "scr_dir2_threshold_2": 0.020161269001982816,
225
+ "scr_dir1_threshold_5": 0.10256400851337757,
226
+ "scr_metric_threshold_5": 0.05241939554166917,
227
+ "scr_dir2_threshold_5": 0.05241939554166917,
228
+ "scr_dir1_threshold_10": 0.11282028709877284,
229
+ "scr_metric_threshold_10": 0.060483807005948444,
230
+ "scr_dir2_threshold_10": 0.060483807005948444,
231
+ "scr_dir1_threshold_20": 0.1179487320563267,
232
+ "scr_metric_threshold_20": 0.1008065853511987,
233
+ "scr_dir2_threshold_20": 0.1008065853511987,
234
+ "scr_dir1_threshold_50": 0.17948701489841076,
235
+ "scr_metric_threshold_50": 0.1653225980892868,
236
+ "scr_dir2_threshold_50": 0.1653225980892868,
237
+ "scr_dir1_threshold_100": 0.20512801702675515,
238
+ "scr_metric_threshold_100": 0.19758072462897314,
239
+ "scr_dir2_threshold_100": 0.19758072462897314,
240
+ "scr_dir1_threshold_500": 0.22051274056970427,
241
+ "scr_metric_threshold_500": 0.2943548639067476,
242
+ "scr_dir2_threshold_500": 0.2943548639067476
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.11261250376556366,
247
+ "scr_metric_threshold_2": 0.10267856311319266,
248
+ "scr_dir2_threshold_2": 0.10267856311319266,
249
+ "scr_dir1_threshold_5": 0.1216216651604412,
250
+ "scr_metric_threshold_5": 0.1607143189758008,
251
+ "scr_dir2_threshold_5": 0.1607143189758008,
252
+ "scr_dir1_threshold_10": 0.14414416591355395,
253
+ "scr_metric_threshold_10": 0.21875007483840891,
254
+ "scr_dir2_threshold_10": 0.21875007483840891,
255
+ "scr_dir1_threshold_20": 0.1621622202139216,
256
+ "scr_metric_threshold_20": 0.29017851322092003,
257
+ "scr_dir2_threshold_20": 0.29017851322092003,
258
+ "scr_dir1_threshold_50": 0.18468472096703434,
259
+ "scr_metric_threshold_50": 0.2857142857142857,
260
+ "scr_dir2_threshold_50": 0.2857142857142857,
261
+ "scr_dir1_threshold_100": 0.15315305881904406,
262
+ "scr_metric_threshold_100": 0.3660714452021861,
263
+ "scr_dir2_threshold_100": 0.3660714452021861,
264
+ "scr_dir1_threshold_500": 0.17117111311941172,
265
+ "scr_metric_threshold_500": 0.4062500249461363,
266
+ "scr_dir2_threshold_500": 0.4062500249461363
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.08154502375487709,
271
+ "scr_metric_threshold_2": 0.08154502375487709,
272
+ "scr_dir2_threshold_2": 0.04761899355588758,
273
+ "scr_dir1_threshold_5": 0.06866965525497992,
274
+ "scr_metric_threshold_5": 0.06866965525497992,
275
+ "scr_dir2_threshold_5": 0.08095245934396302,
276
+ "scr_dir1_threshold_10": 0.07296144475494565,
277
+ "scr_metric_threshold_10": 0.07296144475494565,
278
+ "scr_dir2_threshold_10": 0.09523798711177515,
279
+ "scr_dir1_threshold_20": 0.06866965525497992,
280
+ "scr_metric_threshold_20": 0.06866965525497992,
281
+ "scr_dir2_threshold_20": 0.10476195612190681,
282
+ "scr_dir1_threshold_50": 0.0643776099411321,
283
+ "scr_metric_threshold_50": 0.0643776099411321,
284
+ "scr_dir2_threshold_50": 0.12857145289985059,
285
+ "scr_dir1_threshold_100": 0.08154502375487709,
286
+ "scr_metric_threshold_100": 0.08154502375487709,
287
+ "scr_dir2_threshold_100": 0.1523809496777944,
288
+ "scr_dir1_threshold_500": 0.12875547569614632,
289
+ "scr_metric_threshold_500": 0.12875547569614632,
290
+ "scr_dir2_threshold_500": 0.19523810064441124
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_5",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 65536,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "standard_april_update",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "2ec4e509-aed7-47d4-bf32-debfdb4aec28",
73
+ "datetime_epoch_millis": 1740083940679,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.16850950289668623,
77
+ "scr_metric_threshold_2": 0.093901679924911,
78
+ "scr_dir2_threshold_2": 0.0968702100645534,
79
+ "scr_dir1_threshold_5": 0.18118138219026625,
80
+ "scr_metric_threshold_5": 0.15506268109458352,
81
+ "scr_dir2_threshold_5": 0.15874395833242452,
82
+ "scr_dir1_threshold_10": 0.16729569936829275,
83
+ "scr_metric_threshold_10": 0.19084407333268408,
84
+ "scr_dir2_threshold_10": 0.1938049465147259,
85
+ "scr_dir1_threshold_20": 0.10218449386610517,
86
+ "scr_metric_threshold_20": 0.24559804992893186,
87
+ "scr_dir2_threshold_20": 0.2509909475863851,
88
+ "scr_dir1_threshold_50": -0.013090734939745156,
89
+ "scr_metric_threshold_50": 0.28144591081930076,
90
+ "scr_dir2_threshold_50": 0.29703193644611364,
91
+ "scr_dir1_threshold_100": -0.03253704541769291,
92
+ "scr_metric_threshold_100": 0.2892820189548879,
93
+ "scr_dir2_threshold_100": 0.30462535757723425,
94
+ "scr_dir1_threshold_500": -0.2050006117583244,
95
+ "scr_metric_threshold_500": 0.2740043108561361,
96
+ "scr_dir2_threshold_500": 0.2991141280983367
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.3968252466498386,
103
+ "scr_metric_threshold_2": 0.007371057746493725,
104
+ "scr_dir2_threshold_2": 0.007371057746493725,
105
+ "scr_dir1_threshold_5": 0.3968252466498386,
106
+ "scr_metric_threshold_5": 0.024570143672067307,
107
+ "scr_dir2_threshold_5": 0.024570143672067307,
108
+ "scr_dir1_threshold_10": 0.3968252466498386,
109
+ "scr_metric_threshold_10": 0.05896816907447914,
110
+ "scr_dir2_threshold_10": 0.05896816907447914,
111
+ "scr_dir1_threshold_20": 0.3015873766750807,
112
+ "scr_metric_threshold_20": 0.07371013811873126,
113
+ "scr_dir2_threshold_20": 0.07371013811873126,
114
+ "scr_dir1_threshold_50": 0.15873009865993543,
115
+ "scr_metric_threshold_50": 0.1081081635211431,
116
+ "scr_dir2_threshold_50": 0.1081081635211431,
117
+ "scr_dir1_threshold_100": 0.07936504932996771,
118
+ "scr_metric_threshold_100": 0.13759224805838266,
119
+ "scr_dir2_threshold_100": 0.13759224805838266,
120
+ "scr_dir1_threshold_500": -0.11111163672556491,
121
+ "scr_metric_threshold_500": 0.09828013534206324,
122
+ "scr_dir2_threshold_500": 0.09828013534206324
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.1414141596586038,
127
+ "scr_metric_threshold_2": 0.21813038288640824,
128
+ "scr_dir2_threshold_2": 0.21813038288640824,
129
+ "scr_dir1_threshold_5": 0.12121179281179859,
130
+ "scr_metric_threshold_5": 0.30878195961506544,
131
+ "scr_dir2_threshold_5": 0.30878195961506544,
132
+ "scr_dir1_threshold_10": 0.06060589640589929,
133
+ "scr_metric_threshold_10": 0.32861190232199783,
134
+ "scr_dir2_threshold_10": 0.32861190232199783,
135
+ "scr_dir1_threshold_20": -0.5555557562446416,
136
+ "scr_metric_threshold_20": 0.37960342478509823,
137
+ "scr_dir2_threshold_20": 0.37960342478509823,
138
+ "scr_dir1_threshold_50": -0.6363640194973461,
139
+ "scr_metric_threshold_50": 0.43059494724819863,
140
+ "scr_dir2_threshold_50": 0.43059494724819863,
141
+ "scr_dir1_threshold_100": -0.48484897744896877,
142
+ "scr_metric_threshold_100": 0.23512750075687772,
143
+ "scr_dir2_threshold_100": 0.23512750075687772,
144
+ "scr_dir1_threshold_500": -0.8484855600188809,
145
+ "scr_metric_threshold_500": 0.17563733493269656,
146
+ "scr_dir2_threshold_500": 0.17563733493269656
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.46774185795438705,
151
+ "scr_metric_threshold_2": 0.017676839894409477,
152
+ "scr_dir2_threshold_2": 0.017676839894409477,
153
+ "scr_dir1_threshold_5": 0.4999995193171997,
154
+ "scr_metric_threshold_5": 0.042929339300620824,
155
+ "scr_dir2_threshold_5": 0.042929339300620824,
156
+ "scr_dir1_threshold_10": 0.40322557386316116,
157
+ "scr_metric_threshold_10": 0.06313139903229759,
158
+ "scr_dir2_threshold_10": 0.06313139903229759,
159
+ "scr_dir1_threshold_20": 0.40322557386316116,
160
+ "scr_metric_threshold_20": 0.11111117800745299,
161
+ "scr_dir2_threshold_20": 0.11111117800745299,
162
+ "scr_dir1_threshold_50": -0.09677490681963916,
163
+ "scr_metric_threshold_50": 0.15151514695403728,
164
+ "scr_dir2_threshold_50": 0.15151514695403728,
165
+ "scr_dir1_threshold_100": -0.3870977045473554,
166
+ "scr_metric_threshold_100": 0.20959595579503107,
167
+ "scr_dir2_threshold_100": 0.20959595579503107,
168
+ "scr_dir1_threshold_500": -1.0806460761382328,
169
+ "scr_metric_threshold_500": 0.11616161768198757,
170
+ "scr_dir2_threshold_500": 0.11616161768198757
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.14634148705319544,
175
+ "scr_metric_threshold_2": 0.18768316194811074,
176
+ "scr_dir2_threshold_2": 0.18768316194811074,
177
+ "scr_dir1_threshold_5": 0.15447146198048264,
178
+ "scr_metric_threshold_5": 0.25513185562739576,
179
+ "scr_dir2_threshold_5": 0.25513185562739576,
180
+ "scr_dir1_threshold_10": 0.12195107768082737,
181
+ "scr_metric_threshold_10": 0.13489738735857004,
182
+ "scr_dir2_threshold_10": 0.13489738735857004,
183
+ "scr_dir1_threshold_20": 0.07317074352659772,
184
+ "scr_metric_threshold_20": 0.20234608103785506,
185
+ "scr_dir2_threshold_20": 0.20234608103785506,
186
+ "scr_dir1_threshold_50": 0.032520384299655265,
187
+ "scr_metric_threshold_50": 0.22580636703530657,
188
+ "scr_dir2_threshold_50": 0.22580636703530657,
189
+ "scr_dir1_threshold_100": 0.016260434445080872,
190
+ "scr_metric_threshold_100": 0.2521993417269268,
191
+ "scr_dir2_threshold_100": 0.2521993417269268,
192
+ "scr_dir1_threshold_500": 0.07317074352659772,
193
+ "scr_metric_threshold_500": 0.18768316194811074,
194
+ "scr_dir2_threshold_500": 0.18768316194811074
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.021857964433295084,
199
+ "scr_metric_threshold_2": 0.10937497089617432,
200
+ "scr_dir2_threshold_2": 0.10937497089617432,
201
+ "scr_dir1_threshold_5": 0.03278678379574697,
202
+ "scr_metric_threshold_5": 0.1835939173469977,
203
+ "scr_dir2_threshold_5": 0.1835939173469977,
204
+ "scr_dir1_threshold_10": 0.03825119347697291,
205
+ "scr_metric_threshold_10": 0.3515625727595642,
206
+ "scr_dir2_threshold_10": 0.3515625727595642,
207
+ "scr_dir1_threshold_20": 0.04918033854781611,
208
+ "scr_metric_threshold_20": 0.4414063154836078,
209
+ "scr_dir2_threshold_20": 0.4414063154836078,
210
+ "scr_dir1_threshold_50": 0.07103830298111119,
211
+ "scr_metric_threshold_50": 0.542968888243172,
212
+ "scr_dir2_threshold_50": 0.542968888243172,
213
+ "scr_dir1_threshold_100": -0.05464474822904205,
214
+ "scr_metric_threshold_100": 0.621093800931695,
215
+ "scr_dir2_threshold_100": 0.621093800931695,
216
+ "scr_dir1_threshold_500": 0.021857964433295084,
217
+ "scr_metric_threshold_500": 0.621093800931695,
218
+ "scr_dir2_threshold_500": 0.621093800931695
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.07692300638503319,
223
+ "scr_metric_threshold_2": 0.056451601273808806,
224
+ "scr_dir2_threshold_2": 0.056451601273808806,
225
+ "scr_dir1_threshold_5": 0.10256400851337757,
226
+ "scr_metric_threshold_5": 0.0927419335456348,
227
+ "scr_dir2_threshold_5": 0.0927419335456348,
228
+ "scr_dir1_threshold_10": 0.16410229135546164,
229
+ "scr_metric_threshold_10": 0.1733872498948507,
230
+ "scr_dir2_threshold_10": 0.1733872498948507,
231
+ "scr_dir1_threshold_20": 0.24615374269804866,
232
+ "scr_metric_threshold_20": 0.2217741993630956,
233
+ "scr_dir2_threshold_20": 0.2217741993630956,
234
+ "scr_dir1_threshold_50": 0.17948701489841076,
235
+ "scr_metric_threshold_50": 0.2500001201706423,
236
+ "scr_dir2_threshold_50": 0.2500001201706423,
237
+ "scr_dir1_threshold_100": 0.20512801702675515,
238
+ "scr_metric_threshold_100": 0.29032265817460795,
239
+ "scr_dir2_threshold_100": 0.29032265817460795,
240
+ "scr_dir1_threshold_500": 0.12307687134902433,
241
+ "scr_metric_threshold_500": 0.41935492399206875,
242
+ "scr_dir2_threshold_500": 0.41935492399206875
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.05405389441171559,
247
+ "scr_metric_threshold_2": 0.11160701812646134,
248
+ "scr_dir2_threshold_2": 0.11160701812646134,
249
+ "scr_dir1_threshold_5": 0.09009000301245093,
250
+ "scr_metric_threshold_5": 0.28125005820765137,
251
+ "scr_dir2_threshold_5": 0.28125005820765137,
252
+ "scr_dir1_threshold_10": 0.06756750225933819,
253
+ "scr_metric_threshold_10": 0.33035709296487026,
254
+ "scr_dir2_threshold_10": 0.33035709296487026,
255
+ "scr_dir1_threshold_20": 0.16666666666666666,
256
+ "scr_metric_threshold_20": 0.40178579743950193,
257
+ "scr_dir2_threshold_20": 0.40178579743950193,
258
+ "scr_dir1_threshold_50": 0.13513500451867638,
259
+ "scr_metric_threshold_50": 0.49107141194067105,
260
+ "scr_dir2_threshold_50": 0.49107141194067105,
261
+ "scr_dir1_threshold_100": 0.2882883318271079,
262
+ "scr_metric_threshold_100": 0.49107141194067105,
263
+ "scr_dir2_threshold_100": 0.49107141194067105,
264
+ "scr_dir1_threshold_500": 0.13063055806593132,
265
+ "scr_metric_threshold_500": 0.5223212705792319,
266
+ "scr_dir2_threshold_500": 0.5223212705792319
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.042918406627421406,
271
+ "scr_metric_threshold_2": 0.042918406627421406,
272
+ "scr_dir2_threshold_2": 0.06666664774456064,
273
+ "scr_dir1_threshold_5": 0.05150224144123495,
274
+ "scr_metric_threshold_5": 0.05150224144123495,
275
+ "scr_dir2_threshold_5": 0.08095245934396302,
276
+ "scr_dir1_threshold_10": 0.08583681325484281,
277
+ "scr_metric_threshold_10": 0.08583681325484281,
278
+ "scr_dir2_threshold_10": 0.10952379871117751,
279
+ "scr_dir1_threshold_20": 0.13304726519611204,
280
+ "scr_metric_threshold_20": 0.13304726519611204,
281
+ "scr_dir2_threshold_20": 0.17619044645573817,
282
+ "scr_dir1_threshold_50": 0.05150224144123495,
283
+ "scr_metric_threshold_50": 0.05150224144123495,
284
+ "scr_dir2_threshold_50": 0.17619044645573817,
285
+ "scr_dir1_threshold_100": 0.07725323425491137,
286
+ "scr_metric_threshold_100": 0.07725323425491137,
287
+ "scr_dir2_threshold_100": 0.19999994323368195,
288
+ "scr_dir1_threshold_500": 0.05150224144123495,
289
+ "scr_metric_threshold_500": 0.05150224144123495,
290
+ "scr_dir2_threshold_500": 0.25238077937884024
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_0",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 16384,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "topk",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "aa8a56f3-bca3-4809-8c0f-2debd808c38d",
73
+ "datetime_epoch_millis": 1740083428528,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.1699267441709838,
77
+ "scr_metric_threshold_2": 0.11889230522170918,
78
+ "scr_dir2_threshold_2": 0.12370531844744001,
79
+ "scr_dir1_threshold_5": 0.18619048742565383,
80
+ "scr_metric_threshold_5": 0.19779614726768455,
81
+ "scr_dir2_threshold_5": 0.20254274689965168,
82
+ "scr_dir1_threshold_10": 0.19342518870248077,
83
+ "scr_metric_threshold_10": 0.2473679439686653,
84
+ "scr_dir2_threshold_10": 0.25818439909070934,
85
+ "scr_dir1_threshold_20": 0.20336369618692823,
86
+ "scr_metric_threshold_20": 0.28983190251141955,
87
+ "scr_dir2_threshold_20": 0.3000454703522042,
88
+ "scr_dir1_threshold_50": 0.16258389538425,
89
+ "scr_metric_threshold_50": 0.3555756830330066,
90
+ "scr_dir2_threshold_50": 0.36274665018278457,
91
+ "scr_dir1_threshold_100": 0.056663748267019316,
92
+ "scr_metric_threshold_100": 0.3397213764030587,
93
+ "scr_dir2_threshold_100": 0.34593687747343627,
94
+ "scr_dir1_threshold_500": -0.25931553028314963,
95
+ "scr_metric_threshold_500": 0.3191237256612666,
96
+ "scr_dir2_threshold_500": 0.33499588101350813
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.3968252466498386,
103
+ "scr_metric_threshold_2": 0.01474211549298745,
104
+ "scr_dir2_threshold_2": 0.01474211549298745,
105
+ "scr_dir1_threshold_5": 0.41269806729462877,
106
+ "scr_metric_threshold_5": 0.04668317046281315,
107
+ "scr_dir2_threshold_5": 0.04668317046281315,
108
+ "scr_dir1_threshold_10": 0.42857088793941894,
109
+ "scr_metric_threshold_10": 0.05896816907447914,
110
+ "scr_dir2_threshold_10": 0.05896816907447914,
111
+ "scr_dir1_threshold_20": 0.3809524260050484,
112
+ "scr_metric_threshold_20": 0.09090907759556952,
113
+ "scr_dir2_threshold_20": 0.09090907759556952,
114
+ "scr_dir1_threshold_50": 0.3809524260050484,
115
+ "scr_metric_threshold_50": 0.13267816074447508,
116
+ "scr_dir2_threshold_50": 0.13267816074447508,
117
+ "scr_dir1_threshold_100": 0.3492058386094512,
118
+ "scr_metric_threshold_100": 0.1719902734607945,
119
+ "scr_dir2_threshold_100": 0.1719902734607945,
120
+ "scr_dir1_threshold_500": -0.5238106501262105,
121
+ "scr_metric_threshold_500": 0.20884526929579247,
122
+ "scr_dir2_threshold_500": 0.20884526929579247
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.10101002803225154,
127
+ "scr_metric_threshold_2": 0.17280451009623365,
128
+ "scr_dir2_threshold_2": 0.17280451009623365,
129
+ "scr_dir1_threshold_5": 0.16161592443815084,
130
+ "scr_metric_threshold_5": 0.26345608682489086,
131
+ "scr_dir2_threshold_5": 0.26345608682489086,
132
+ "scr_dir1_threshold_10": 0.1414141596586038,
133
+ "scr_metric_threshold_10": 0.29461749772936685,
134
+ "scr_dir2_threshold_10": 0.29461749772936685,
135
+ "scr_dir1_threshold_20": 0.040403529559094105,
136
+ "scr_metric_threshold_20": 0.36827195658755457,
137
+ "scr_dir2_threshold_20": 0.36827195658755457,
138
+ "scr_dir1_threshold_50": 0.09090914564247801,
139
+ "scr_metric_threshold_50": 0.4447592402822052,
140
+ "scr_dir2_threshold_50": 0.4447592402822052,
141
+ "scr_dir1_threshold_100": -0.434343963432843,
142
+ "scr_metric_threshold_100": 0.11898016279667031,
143
+ "scr_dir2_threshold_100": 0.11898016279667031,
144
+ "scr_dir1_threshold_500": -0.979798235220453,
145
+ "scr_metric_threshold_500": 0.011331468197543647,
146
+ "scr_dir2_threshold_500": 0.011331468197543647
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.516128349998606,
151
+ "scr_metric_threshold_2": 0.03787889962608624,
152
+ "scr_dir2_threshold_2": 0.03787889962608624,
153
+ "scr_dir1_threshold_5": 0.532258142045613,
154
+ "scr_metric_threshold_5": 0.08333330824720513,
155
+ "scr_dir2_threshold_5": 0.08333330824720513,
156
+ "scr_dir1_threshold_10": 0.46774185795438705,
157
+ "scr_metric_threshold_10": 0.1085859581701857,
158
+ "scr_dir2_threshold_10": 0.1085859581701857,
159
+ "scr_dir1_threshold_20": 0.3870967431817548,
160
+ "scr_metric_threshold_20": 0.19444448625465813,
161
+ "scr_dir2_threshold_20": 0.19444448625465813,
162
+ "scr_dir1_threshold_50": 0.3870967431817548,
163
+ "scr_metric_threshold_50": 0.2550505149329192,
164
+ "scr_dir2_threshold_50": 0.2550505149329192,
165
+ "scr_dir1_threshold_100": -0.5483879340926199,
166
+ "scr_metric_threshold_100": 0.3080808840993784,
167
+ "scr_dir2_threshold_100": 0.3080808840993784,
168
+ "scr_dir1_threshold_500": -1.1290335295480525,
169
+ "scr_metric_threshold_500": 0.08585867860124165,
170
+ "scr_dir2_threshold_500": 0.08585867860124165
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.0894311779716786,
175
+ "scr_metric_threshold_2": 0.18768316194811074,
176
+ "scr_dir2_threshold_2": 0.18768316194811074,
177
+ "scr_dir1_threshold_5": -0.01625994985457439,
178
+ "scr_metric_threshold_5": 0.28445751901318467,
179
+ "scr_dir2_threshold_5": 0.28445751901318467,
180
+ "scr_dir1_threshold_10": -0.040650359226942455,
181
+ "scr_metric_threshold_10": 0.3401759822968941,
182
+ "scr_dir2_threshold_10": 0.3401759822968941,
183
+ "scr_dir1_threshold_20": 0.10569112782625298,
184
+ "scr_metric_threshold_20": 0.21700882533389967,
185
+ "scr_dir2_threshold_20": 0.21700882533389967,
186
+ "scr_dir1_threshold_50": -0.07317074352659772,
187
+ "scr_metric_threshold_50": 0.2932550607145916,
188
+ "scr_dir2_threshold_50": 0.2932550607145916,
189
+ "scr_dir1_threshold_100": 0.34146330826062055,
190
+ "scr_metric_threshold_100": 0.31671552150574284,
191
+ "scr_dir2_threshold_100": 0.31671552150574284,
192
+ "scr_dir1_threshold_500": 0.032520384299655265,
193
+ "scr_metric_threshold_500": 0.2434018000255199,
194
+ "scr_dir2_threshold_500": 0.2434018000255199
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.03278678379574697,
199
+ "scr_metric_threshold_2": 0.10156257275956422,
200
+ "scr_dir2_threshold_2": 0.10156257275956422,
201
+ "scr_dir1_threshold_5": 0.0,
202
+ "scr_metric_threshold_5": 0.1992187136202179,
203
+ "scr_dir2_threshold_5": 0.1992187136202179,
204
+ "scr_dir1_threshold_10": 0.01092881936245188,
205
+ "scr_metric_threshold_10": 0.42578128637978213,
206
+ "scr_dir2_threshold_10": 0.42578128637978213,
207
+ "scr_dir1_threshold_20": 0.021857964433295084,
208
+ "scr_metric_threshold_20": 0.5742187136202179,
209
+ "scr_dir2_threshold_20": 0.5742187136202179,
210
+ "scr_dir1_threshold_50": -0.021857964433295084,
211
+ "scr_metric_threshold_50": 0.621093800931695,
212
+ "scr_dir2_threshold_50": 0.621093800931695,
213
+ "scr_dir1_threshold_100": 0.04371592886659017,
214
+ "scr_metric_threshold_100": 0.6875001164153027,
215
+ "scr_dir2_threshold_100": 0.6875001164153027,
216
+ "scr_dir1_threshold_500": 0.06557389329988525,
217
+ "scr_metric_threshold_500": 0.7187499417923486,
218
+ "scr_dir2_threshold_500": 0.7187499417923486
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.07692300638503319,
223
+ "scr_metric_threshold_2": 0.11693564862104187,
224
+ "scr_dir2_threshold_2": 0.11693564862104187,
225
+ "scr_dir1_threshold_5": 0.1692307363130155,
226
+ "scr_metric_threshold_5": 0.17741945562699032,
227
+ "scr_dir2_threshold_5": 0.17741945562699032,
228
+ "scr_dir1_threshold_10": 0.2512818819907463,
229
+ "scr_metric_threshold_10": 0.1653225980892868,
230
+ "scr_dir2_threshold_10": 0.1653225980892868,
231
+ "scr_dir1_threshold_20": 0.29230760766203984,
232
+ "scr_metric_threshold_20": 0.20967734182539205,
233
+ "scr_dir2_threshold_20": 0.20967734182539205,
234
+ "scr_dir1_threshold_50": 0.22564087986240192,
235
+ "scr_metric_threshold_50": 0.35483867091269605,
236
+ "scr_dir2_threshold_50": 0.35483867091269605,
237
+ "scr_dir1_threshold_100": 0.29230760766203984,
238
+ "scr_metric_threshold_100": 0.3588711169861203,
239
+ "scr_dir2_threshold_100": 0.3588711169861203,
240
+ "scr_dir1_threshold_500": 0.1999998777340575,
241
+ "scr_metric_threshold_500": 0.5483871898095295,
242
+ "scr_dir2_threshold_500": 0.5483871898095295
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.09909916440732847,
247
+ "scr_metric_threshold_2": 0.27232133710226214,
248
+ "scr_dir2_threshold_2": 0.27232133710226214,
249
+ "scr_dir1_threshold_5": 0.14414416591355395,
250
+ "scr_metric_threshold_5": 0.4419641110913316,
251
+ "scr_dir2_threshold_5": 0.4419641110913316,
252
+ "scr_dir1_threshold_10": 0.19369361387252446,
253
+ "scr_metric_threshold_10": 0.49107141194067105,
254
+ "scr_dir2_threshold_10": 0.49107141194067105,
255
+ "scr_dir1_threshold_20": 0.26126111613186265,
256
+ "scr_metric_threshold_20": 0.5267857641779868,
257
+ "scr_dir2_threshold_20": 0.5267857641779868,
258
+ "scr_dir1_threshold_50": 0.13513500451867638,
259
+ "scr_metric_threshold_50": 0.5669643439219371,
260
+ "scr_dir2_threshold_50": 0.5669643439219371,
261
+ "scr_dir1_threshold_100": 0.2162161146256372,
262
+ "scr_metric_threshold_100": 0.5624998503231822,
263
+ "scr_dir2_threshold_100": 0.5624998503231822,
264
+ "scr_dir1_threshold_500": 0.14414416591355395,
265
+ "scr_metric_threshold_500": 0.6205356061857903,
266
+ "scr_dir2_threshold_500": 0.6205356061857903
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.047210196127387125,
271
+ "scr_metric_threshold_2": 0.047210196127387125,
272
+ "scr_dir2_threshold_2": 0.08571430193323373,
273
+ "scr_dir1_threshold_5": 0.08583681325484281,
274
+ "scr_metric_threshold_5": 0.08583681325484281,
275
+ "scr_dir2_threshold_5": 0.12380961031057988,
276
+ "scr_dir1_threshold_10": 0.09442064806865635,
277
+ "scr_metric_threshold_10": 0.09442064806865635,
278
+ "scr_dir2_threshold_10": 0.18095228904500887,
279
+ "scr_dir1_threshold_20": 0.13733905469607777,
280
+ "scr_metric_threshold_20": 0.13733905469607777,
281
+ "scr_dir2_threshold_20": 0.21904759742235502,
282
+ "scr_dir1_threshold_50": 0.17596567182353345,
283
+ "scr_metric_threshold_50": 0.17596567182353345,
284
+ "scr_dir2_threshold_50": 0.2333334090217574,
285
+ "scr_dir1_threshold_100": 0.19313308563727843,
286
+ "scr_metric_threshold_100": 0.19313308563727843,
287
+ "scr_dir2_threshold_100": 0.24285709420029883,
288
+ "scr_dir1_threshold_500": 0.11587985138236706,
289
+ "scr_metric_threshold_500": 0.11587985138236706,
290
+ "scr_dir2_threshold_500": 0.24285709420029883
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_1",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 16384,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "topk",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "e59e15aa-1732-4bcf-ada2-f23059acd302",
73
+ "datetime_epoch_millis": 1740084108689,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.23016029609896008,
77
+ "scr_metric_threshold_2": 0.133505232259874,
78
+ "scr_dir2_threshold_2": 0.1334899183446729,
79
+ "scr_dir1_threshold_5": 0.24746817668817958,
80
+ "scr_metric_threshold_5": 0.18929242279049366,
81
+ "scr_dir2_threshold_5": 0.19707393417913163,
82
+ "scr_dir1_threshold_10": 0.24762715808396457,
83
+ "scr_metric_threshold_10": 0.257545207298011,
84
+ "scr_dir2_threshold_10": 0.2646650392902777,
85
+ "scr_dir1_threshold_20": 0.1956337478852235,
86
+ "scr_metric_threshold_20": 0.31836927809173293,
87
+ "scr_dir2_threshold_20": 0.32160859206486747,
88
+ "scr_dir1_threshold_50": 0.19076252750729009,
89
+ "scr_metric_threshold_50": 0.39533970852666916,
90
+ "scr_dir2_threshold_50": 0.3927365046200435,
91
+ "scr_dir1_threshold_100": 0.08035173906669002,
92
+ "scr_metric_threshold_100": 0.4207988205734642,
93
+ "scr_dir2_threshold_100": 0.42021637316362975,
94
+ "scr_dir1_threshold_500": -0.5984937803274676,
95
+ "scr_metric_threshold_500": 0.33555481586389424,
96
+ "scr_dir2_threshold_500": 0.3185253871706358
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.46031747533501616,
103
+ "scr_metric_threshold_2": 0.024570143672067307,
104
+ "scr_dir2_threshold_2": 0.024570143672067307,
105
+ "scr_dir1_threshold_5": 0.5396825246649839,
106
+ "scr_metric_threshold_5": 0.04914014089539928,
107
+ "scr_dir2_threshold_5": 0.04914014089539928,
108
+ "scr_dir1_threshold_10": 0.5238097040201937,
109
+ "scr_metric_threshold_10": 0.05896816907447914,
110
+ "scr_dir2_threshold_10": 0.05896816907447914,
111
+ "scr_dir1_threshold_20": 0.47619029597980633,
112
+ "scr_metric_threshold_20": 0.07862407898390353,
113
+ "scr_dir2_threshold_20": 0.07862407898390353,
114
+ "scr_dir1_threshold_50": 0.47619029597980633,
115
+ "scr_metric_threshold_50": 0.1277642198793028,
116
+ "scr_dir2_threshold_50": 0.1277642198793028,
117
+ "scr_dir1_threshold_100": 0.20634856059430595,
118
+ "scr_metric_threshold_100": 0.16461921571430077,
119
+ "scr_dir2_threshold_100": 0.16461921571430077,
120
+ "scr_dir1_threshold_500": -1.1428582241211622,
121
+ "scr_metric_threshold_500": 0.22604420877263073,
122
+ "scr_dir2_threshold_500": 0.22604420877263073
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.2121209384542766,
127
+ "scr_metric_threshold_2": 0.16997168525977072,
128
+ "scr_dir2_threshold_2": 0.16997168525977072,
129
+ "scr_dir1_threshold_5": 0.22222182084405012,
130
+ "scr_metric_threshold_5": 0.18980162796670313,
131
+ "scr_dir2_threshold_5": 0.18980162796670313,
132
+ "scr_dir1_threshold_10": 0.2121209384542766,
133
+ "scr_metric_threshold_10": 0.2974504914175218,
134
+ "scr_dir2_threshold_10": 0.2974504914175218,
135
+ "scr_dir1_threshold_20": -0.12121239487905673,
136
+ "scr_metric_threshold_20": 0.3711047814240175,
137
+ "scr_dir2_threshold_20": 0.3711047814240175,
138
+ "scr_dir1_threshold_50": -0.06060649847315743,
139
+ "scr_metric_threshold_50": 0.42209630388711794,
140
+ "scr_dir2_threshold_50": 0.42209630388711794,
141
+ "scr_dir1_threshold_100": -1.1010106300995097,
142
+ "scr_metric_threshold_100": 0.419263479050655,
143
+ "scr_dir2_threshold_100": 0.419263479050655,
144
+ "scr_dir1_threshold_500": -1.90909145642478,
145
+ "scr_metric_threshold_500": -0.15864021706222708,
146
+ "scr_dir2_threshold_500": -0.15864021706222708
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.532258142045613,
151
+ "scr_metric_threshold_2": 0.042929339300620824,
152
+ "scr_dir2_threshold_2": 0.042929339300620824,
153
+ "scr_dir1_threshold_5": 0.532258142045613,
154
+ "scr_metric_threshold_5": 0.08333330824720513,
155
+ "scr_dir2_threshold_5": 0.08333330824720513,
156
+ "scr_dir1_threshold_10": 0.4999995193171997,
157
+ "scr_metric_threshold_10": 0.13636367741366434,
158
+ "scr_dir2_threshold_10": 0.13636367741366434,
159
+ "scr_dir1_threshold_20": 0.532258142045613,
160
+ "scr_metric_threshold_20": 0.21717176582360218,
161
+ "scr_dir2_threshold_20": 0.21717176582360218,
162
+ "scr_dir1_threshold_50": 0.29032183636211567,
163
+ "scr_metric_threshold_50": 0.25757573477018647,
164
+ "scr_dir2_threshold_50": 0.25757573477018647,
165
+ "scr_dir1_threshold_100": 0.20967672158948342,
166
+ "scr_metric_threshold_100": 0.3080808840993784,
167
+ "scr_dir2_threshold_100": 0.3080808840993784,
168
+ "scr_dir1_threshold_500": -2.967743300002788,
169
+ "scr_metric_threshold_500": -0.010101029865838383,
170
+ "scr_dir2_threshold_500": -0.010101029865838383
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.2764230242518165,
175
+ "scr_metric_threshold_2": 0.211143622739262,
176
+ "scr_dir2_threshold_2": 0.211143622739262,
177
+ "scr_dir1_threshold_5": 0.1869918462801379,
178
+ "scr_metric_threshold_5": 0.2668620860229714,
179
+ "scr_dir2_threshold_5": 0.2668620860229714,
180
+ "scr_dir1_threshold_10": -0.032520384299655265,
181
+ "scr_metric_threshold_10": 0.33724329360272537,
182
+ "scr_dir2_threshold_10": 0.33724329360272537,
183
+ "scr_dir1_threshold_20": -0.22764220550708036,
184
+ "scr_metric_threshold_20": 0.29618757461506057,
185
+ "scr_dir2_threshold_20": 0.29618757461506057,
186
+ "scr_dir1_threshold_50": -0.14634148705319544,
187
+ "scr_metric_threshold_50": 0.39882695948107244,
188
+ "scr_dir2_threshold_50": 0.39882695948107244,
189
+ "scr_dir1_threshold_100": 0.49593477024110316,
190
+ "scr_metric_threshold_100": 0.3313782658017874,
191
+ "scr_dir2_threshold_100": 0.3313782658017874,
192
+ "scr_dir1_threshold_500": 0.24390263995216122,
193
+ "scr_metric_threshold_500": 0.26099705822203345,
194
+ "scr_dir2_threshold_500": 0.26099705822203345
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.05464474822904205,
199
+ "scr_metric_threshold_2": 0.1328126309672156,
200
+ "scr_dir2_threshold_2": 0.1328126309672156,
201
+ "scr_dir1_threshold_5": 0.03278678379574697,
202
+ "scr_metric_threshold_5": 0.2421876018633899,
203
+ "scr_dir2_threshold_5": 0.2421876018633899,
204
+ "scr_dir1_threshold_10": 0.07650271266233713,
205
+ "scr_metric_threshold_10": 0.417968888243172,
206
+ "scr_dir2_threshold_10": 0.417968888243172,
207
+ "scr_dir1_threshold_20": 0.03278678379574697,
208
+ "scr_metric_threshold_20": 0.5703125145519129,
209
+ "scr_dir2_threshold_20": 0.5703125145519129,
210
+ "scr_dir1_threshold_50": -0.03825151918536423,
211
+ "scr_metric_threshold_50": 0.6835936845163922,
212
+ "scr_dir2_threshold_50": 0.6835936845163922,
213
+ "scr_dir1_threshold_100": 0.0,
214
+ "scr_metric_threshold_100": 0.7304687718278693,
215
+ "scr_dir2_threshold_100": 0.7304687718278693,
216
+ "scr_dir1_threshold_500": 0.06010915791026799,
217
+ "scr_metric_threshold_500": 0.75781239813661,
218
+ "scr_dir2_threshold_500": 0.75781239813661
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.10256400851337757,
223
+ "scr_metric_threshold_2": 0.0927419335456348,
224
+ "scr_dir2_threshold_2": 0.0927419335456348,
225
+ "scr_dir1_threshold_5": 0.1999998777340575,
226
+ "scr_metric_threshold_5": 0.13306447154960044,
227
+ "scr_dir2_threshold_5": 0.13306447154960044,
228
+ "scr_dir1_threshold_10": 0.2615384662409978,
229
+ "scr_metric_threshold_10": 0.16129039235714715,
230
+ "scr_dir2_threshold_10": 0.16129039235714715,
231
+ "scr_dir1_threshold_20": 0.3282048883757795,
232
+ "scr_metric_threshold_20": 0.21774199363095595,
233
+ "scr_dir2_threshold_20": 0.21774199363095595,
234
+ "scr_dir1_threshold_50": 0.28717946836934216,
235
+ "scr_metric_threshold_50": 0.40322586072222555,
236
+ "scr_dir2_threshold_50": 0.40322586072222555,
237
+ "scr_dir1_threshold_100": 0.35384589050412385,
238
+ "scr_metric_threshold_100": 0.46774187346031365,
239
+ "scr_dir2_threshold_100": 0.46774187346031365,
240
+ "scr_dir1_threshold_500": 0.369230614047073,
241
+ "scr_metric_threshold_500": 0.5604838070059485,
242
+ "scr_dir2_threshold_500": 0.5604838070059485
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.11711721870769615,
247
+ "scr_metric_threshold_2": 0.308035689339578,
248
+ "scr_dir2_threshold_2": 0.308035689339578,
249
+ "scr_dir1_threshold_5": 0.1756755595721568,
250
+ "scr_metric_threshold_5": 0.45982128720998955,
251
+ "scr_dir2_threshold_5": 0.45982128720998955,
252
+ "scr_dir1_threshold_10": 0.3108108325802206,
253
+ "scr_metric_threshold_10": 0.5223212705792319,
254
+ "scr_dir2_threshold_10": 0.5223212705792319,
255
+ "scr_dir1_threshold_20": 0.351351387633701,
256
+ "scr_metric_threshold_20": 0.6026784300671324,
257
+ "scr_dir2_threshold_20": 0.6026784300671324,
258
+ "scr_dir1_threshold_50": 0.4729730527941422,
259
+ "scr_metric_threshold_50": 0.6249998336924246,
260
+ "scr_dir2_threshold_50": 0.6249998336924246,
261
+ "scr_dir1_threshold_100": 0.2162161146256372,
262
+ "scr_metric_threshold_100": 0.6830355895550327,
263
+ "scr_dir2_threshold_100": 0.6830355895550327,
264
+ "scr_dir1_threshold_500": 0.19369361387252446,
265
+ "scr_metric_threshold_500": 0.6830355895550327,
266
+ "scr_dir2_threshold_500": 0.6830355895550327
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.08583681325484281,
271
+ "scr_metric_threshold_2": 0.08583681325484281,
272
+ "scr_dir2_threshold_2": 0.08571430193323373,
273
+ "scr_dir1_threshold_5": 0.09012885856869063,
274
+ "scr_metric_threshold_5": 0.09012885856869063,
275
+ "scr_dir2_threshold_5": 0.1523809496777944,
276
+ "scr_dir1_threshold_10": 0.12875547569614632,
277
+ "scr_metric_threshold_10": 0.12875547569614632,
278
+ "scr_dir2_threshold_10": 0.18571413163427958,
279
+ "scr_dir1_threshold_20": 0.19313308563727843,
280
+ "scr_metric_threshold_20": 0.19313308563727843,
281
+ "scr_dir2_threshold_20": 0.21904759742235502,
282
+ "scr_dir1_threshold_50": 0.24463507126463127,
283
+ "scr_metric_threshold_50": 0.24463507126463127,
284
+ "scr_dir2_threshold_50": 0.22380944001162575,
285
+ "scr_dir1_threshold_100": 0.2618024850783763,
286
+ "scr_metric_threshold_100": 0.2618024850783763,
287
+ "scr_dir2_threshold_100": 0.25714290579970117,
288
+ "scr_dir1_threshold_500": 0.36480671214696403,
289
+ "scr_metric_threshold_500": 0.36480671214696403,
290
+ "scr_dir2_threshold_500": 0.22857128260089646
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_2",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 16384,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "topk",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "68a533fb-737f-40a8-9ef5-6258e8e4f74d",
73
+ "datetime_epoch_millis": 1740084278342,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.2787146899762559,
77
+ "scr_metric_threshold_2": 0.1141391158653483,
78
+ "scr_dir2_threshold_2": 0.10745099172312679,
79
+ "scr_dir1_threshold_5": 0.32062371097063297,
80
+ "scr_metric_threshold_5": 0.20177285177522555,
81
+ "scr_dir2_threshold_5": 0.1984722181878159,
82
+ "scr_dir1_threshold_10": 0.2892944163300857,
83
+ "scr_metric_threshold_10": 0.26738657101098257,
84
+ "scr_dir2_threshold_10": 0.2653862543856163,
85
+ "scr_dir1_threshold_20": 0.35023429904325515,
86
+ "scr_metric_threshold_20": 0.29224445802298504,
87
+ "scr_dir2_threshold_20": 0.29119191854268334,
88
+ "scr_dir1_threshold_50": 0.24298437994866598,
89
+ "scr_metric_threshold_50": 0.35309283101166244,
90
+ "scr_dir2_threshold_50": 0.34845356019525786,
91
+ "scr_dir1_threshold_100": 0.2623086148330749,
92
+ "scr_metric_threshold_100": 0.33536733606642893,
93
+ "scr_dir2_threshold_100": 0.32007253414453324,
94
+ "scr_dir1_threshold_500": -0.15362099418655276,
95
+ "scr_metric_threshold_500": 0.3069312131012075,
96
+ "scr_dir2_threshold_500": 0.27089761192140027
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.47619029597980633,
103
+ "scr_metric_threshold_2": 0.024570143672067307,
104
+ "scr_dir2_threshold_2": 0.024570143672067307,
105
+ "scr_dir1_threshold_5": 0.5714281659545642,
106
+ "scr_metric_threshold_5": 0.04668317046281315,
107
+ "scr_dir2_threshold_5": 0.04668317046281315,
108
+ "scr_dir1_threshold_10": 0.6031747533501614,
109
+ "scr_metric_threshold_10": 0.07616710855131739,
110
+ "scr_dir2_threshold_10": 0.07616710855131739,
111
+ "scr_dir1_threshold_20": 0.5873019327053712,
112
+ "scr_metric_threshold_20": 0.09828013534206324,
113
+ "scr_dir2_threshold_20": 0.09828013534206324,
114
+ "scr_dir1_threshold_50": 0.3492058386094512,
115
+ "scr_metric_threshold_50": 0.16953315657947304,
116
+ "scr_dir2_threshold_50": 0.16953315657947304,
117
+ "scr_dir1_threshold_100": 0.3968252466498386,
118
+ "scr_metric_threshold_100": 0.21375921016096475,
119
+ "scr_dir2_threshold_100": 0.21375921016096475,
120
+ "scr_dir1_threshold_500": -0.5714291120605811,
121
+ "scr_metric_threshold_500": 0.11302210438631537,
122
+ "scr_dir2_threshold_500": 0.11302210438631537
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.2323233053010818,
127
+ "scr_metric_threshold_2": 0.15297456738930126,
128
+ "scr_dir2_threshold_2": 0.15297456738930126,
129
+ "scr_dir1_threshold_5": 0.2828283193172076,
130
+ "scr_metric_threshold_5": 0.21529738919825334,
131
+ "scr_dir2_threshold_5": 0.21529738919825334,
132
+ "scr_dir1_threshold_10": 0.07070677879567282,
133
+ "scr_metric_threshold_10": 0.27478755502243446,
134
+ "scr_dir2_threshold_10": 0.27478755502243446,
135
+ "scr_dir1_threshold_20": 0.05050501401612577,
136
+ "scr_metric_threshold_20": 0.33427755199492365,
137
+ "scr_dir2_threshold_20": 0.33427755199492365,
138
+ "scr_dir1_threshold_50": 0.17171680682792437,
139
+ "scr_metric_threshold_50": 0.3427761953560044,
140
+ "scr_dir2_threshold_50": 0.3427761953560044,
141
+ "scr_dir1_threshold_100": 0.42424247897581135,
142
+ "scr_metric_threshold_100": 0.42209630388711794,
143
+ "scr_dir2_threshold_100": 0.42209630388711794,
144
+ "scr_dir1_threshold_500": -0.4040407141962643,
145
+ "scr_metric_threshold_500": 0.07648728369465063,
146
+ "scr_dir2_threshold_500": 0.07648728369465063
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.5483869727270193,
151
+ "scr_metric_threshold_2": 0.03535352927204973,
152
+ "scr_dir2_threshold_2": 0.03535352927204973,
153
+ "scr_dir1_threshold_5": 0.5645158034084256,
154
+ "scr_metric_threshold_5": 0.06818183870683217,
155
+ "scr_dir2_threshold_5": 0.06818183870683217,
156
+ "scr_dir1_threshold_10": 0.5806446340898319,
157
+ "scr_metric_threshold_10": 0.14898992711677,
158
+ "scr_dir2_threshold_10": 0.14898992711677,
159
+ "scr_dir1_threshold_20": 0.5483869727270193,
160
+ "scr_metric_threshold_20": 0.2070707359577638,
161
+ "scr_dir2_threshold_20": 0.2070707359577638,
162
+ "scr_dir1_threshold_50": 0.37096695113474787,
163
+ "scr_metric_threshold_50": 0.2651515447987576,
164
+ "scr_dir2_threshold_50": 0.2651515447987576,
165
+ "scr_dir1_threshold_100": 0.35483812045334157,
166
+ "scr_metric_threshold_100": 0.06565661886956488,
167
+ "scr_dir2_threshold_100": 0.06565661886956488,
168
+ "scr_dir1_threshold_500": -2.1451623602294587,
169
+ "scr_metric_threshold_500": 0.005050590191303803,
170
+ "scr_dir2_threshold_500": 0.005050590191303803
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.3333333333333333,
175
+ "scr_metric_threshold_2": 0.14369492905997697,
176
+ "scr_dir2_threshold_2": 0.14369492905997697,
177
+ "scr_dir1_threshold_5": 0.32520335840604614,
178
+ "scr_metric_threshold_5": 0.21994133923436865,
179
+ "scr_dir2_threshold_5": 0.21994133923436865,
180
+ "scr_dir1_threshold_10": 0.05691079367202333,
181
+ "scr_metric_threshold_10": 0.28445751901318467,
182
+ "scr_dir2_threshold_10": 0.28445751901318467,
183
+ "scr_dir1_threshold_20": 0.4390244611595863,
184
+ "scr_metric_threshold_20": 0.12316715696299442,
185
+ "scr_dir2_threshold_20": 0.12316715696299442,
186
+ "scr_dir1_threshold_50": -0.24390215536165474,
187
+ "scr_metric_threshold_50": 0.13196469866440133,
188
+ "scr_dir2_threshold_50": 0.13196469866440133,
189
+ "scr_dir1_threshold_100": -0.39024364241485016,
190
+ "scr_metric_threshold_100": 0.1260996708634634,
191
+ "scr_dir2_threshold_100": 0.1260996708634634,
192
+ "scr_dir1_threshold_500": 0.39837410193264383,
193
+ "scr_metric_threshold_500": -0.10850441266694984,
194
+ "scr_dir2_threshold_500": -0.10850441266694984
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.04918033854781611,
199
+ "scr_metric_threshold_2": 0.21875017462295412,
200
+ "scr_dir2_threshold_2": 0.21875017462295412,
201
+ "scr_dir1_threshold_5": 0.08743153202478901,
202
+ "scr_metric_threshold_5": 0.4335936845163922,
203
+ "scr_dir2_threshold_5": 0.4335936845163922,
204
+ "scr_dir1_threshold_10": 0.09836067709563222,
205
+ "scr_metric_threshold_10": 0.5390624563442614,
206
+ "scr_dir2_threshold_10": 0.5390624563442614,
207
+ "scr_dir1_threshold_20": 0.08196712234356307,
208
+ "scr_metric_threshold_20": 0.6054687718278693,
209
+ "scr_dir2_threshold_20": 0.6054687718278693,
210
+ "scr_dir1_threshold_50": 0.04918033854781611,
211
+ "scr_metric_threshold_50": 0.6992187136202179,
212
+ "scr_dir2_threshold_50": 0.6992187136202179,
213
+ "scr_dir1_threshold_100": -0.03278678379574697,
214
+ "scr_metric_threshold_100": 0.75,
215
+ "scr_dir2_threshold_100": 0.75,
216
+ "scr_dir1_threshold_500": 0.1311474608913792,
217
+ "scr_metric_threshold_500": 0.75,
218
+ "scr_dir2_threshold_500": 0.75
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.2358974641126534,
223
+ "scr_metric_threshold_2": 0.05241939554166917,
224
+ "scr_dir2_threshold_2": 0.05241939554166917,
225
+ "scr_dir1_threshold_5": 0.24102560340535104,
226
+ "scr_metric_threshold_5": 0.1008065853511987,
227
+ "scr_dir2_threshold_5": 0.1008065853511987,
228
+ "scr_dir1_threshold_10": 0.29743574695473746,
229
+ "scr_metric_threshold_10": 0.14516132908730398,
230
+ "scr_dir2_threshold_10": 0.14516132908730398,
231
+ "scr_dir1_threshold_20": 0.38974347688271976,
232
+ "scr_metric_threshold_20": 0.19758072462897314,
233
+ "scr_dir2_threshold_20": 0.19758072462897314,
234
+ "scr_dir1_threshold_50": 0.4153844790110642,
235
+ "scr_metric_threshold_50": 0.282258006369044,
236
+ "scr_dir2_threshold_50": 0.282258006369044,
237
+ "scr_dir1_threshold_100": 0.4153844790110642,
238
+ "scr_metric_threshold_100": 0.3145161329087304,
239
+ "scr_dir2_threshold_100": 0.3145161329087304,
240
+ "scr_dir1_threshold_500": 0.5333332110673908,
241
+ "scr_metric_threshold_500": 0.5483871898095295,
242
+ "scr_dir2_threshold_500": 0.5483871898095295
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.23423416892600485,
247
+ "scr_metric_threshold_2": 0.16517854648243513,
248
+ "scr_dir2_threshold_2": 0.16517854648243513,
249
+ "scr_dir1_threshold_5": 0.34234222623882343,
250
+ "scr_metric_threshold_5": 0.37946412772208915,
251
+ "scr_dir2_threshold_5": 0.37946412772208915,
252
+ "scr_dir1_threshold_10": 0.4054052820454166,
253
+ "scr_metric_threshold_10": 0.4687500083153788,
254
+ "scr_dir2_threshold_10": 0.4687500083153788,
255
+ "scr_dir1_threshold_20": 0.47747749924688726,
256
+ "scr_metric_threshold_20": 0.5446426742045243,
257
+ "scr_dir2_threshold_20": 0.5446426742045243,
258
+ "scr_dir1_threshold_50": 0.5180180543003676,
259
+ "scr_metric_threshold_50": 0.6205356061857903,
260
+ "scr_dir2_threshold_50": 0.6205356061857903,
261
+ "scr_dir1_threshold_100": 0.6126125037655636,
262
+ "scr_metric_threshold_100": 0.4732142358220131,
263
+ "scr_dir2_threshold_100": 0.4732142358220131,
264
+ "scr_dir1_threshold_500": 0.37387388838681374,
265
+ "scr_metric_threshold_500": 0.616071378679156,
266
+ "scr_dir2_threshold_500": 0.616071378679156
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.12017164088233277,
271
+ "scr_metric_threshold_2": 0.12017164088233277,
272
+ "scr_dir2_threshold_2": 0.06666664774456064,
273
+ "scr_dir1_threshold_5": 0.15021467900985702,
274
+ "scr_metric_threshold_5": 0.15021467900985702,
275
+ "scr_dir2_threshold_5": 0.12380961031057988,
276
+ "scr_dir1_threshold_10": 0.20171666463720986,
277
+ "scr_metric_threshold_10": 0.20171666463720986,
278
+ "scr_dir2_threshold_10": 0.18571413163427958,
279
+ "scr_dir1_threshold_20": 0.2274679132647684,
280
+ "scr_metric_threshold_20": 0.2274679132647684,
281
+ "scr_dir2_threshold_20": 0.21904759742235502,
282
+ "scr_dir1_threshold_50": 0.3133047265196112,
283
+ "scr_metric_threshold_50": 0.3133047265196112,
284
+ "scr_dir2_threshold_50": 0.2761905599883743,
285
+ "scr_dir1_threshold_100": 0.31759651601957695,
286
+ "scr_metric_threshold_100": 0.31759651601957695,
287
+ "scr_dir2_threshold_100": 0.19523810064441124,
288
+ "scr_dir1_threshold_500": 0.45493557071565466,
289
+ "scr_metric_threshold_500": 0.45493557071565466,
290
+ "scr_dir2_threshold_500": 0.16666676127719673
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_3",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 16384,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "topk",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "a4371d8b-7e2d-4c09-87f9-8211ea50be20",
73
+ "datetime_epoch_millis": 1740083772572,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.17525184744564948,
77
+ "scr_metric_threshold_2": 0.13172952570784896,
78
+ "scr_dir2_threshold_2": 0.12647454921741172,
79
+ "scr_dir1_threshold_5": 0.18994166469241808,
80
+ "scr_metric_threshold_5": 0.2032884449620494,
81
+ "scr_dir2_threshold_5": 0.20213373810135787,
82
+ "scr_dir1_threshold_10": 0.3466043196434342,
83
+ "scr_metric_threshold_10": 0.23569176308087691,
84
+ "scr_dir2_threshold_10": 0.22792300262932813,
85
+ "scr_dir1_threshold_20": 0.1285310890951475,
86
+ "scr_metric_threshold_20": 0.27956601624605387,
87
+ "scr_dir2_threshold_20": 0.2754785650090814,
88
+ "scr_dir1_threshold_50": 0.41817032949362426,
89
+ "scr_metric_threshold_50": 0.2929689074559695,
90
+ "scr_dir2_threshold_50": 0.25184385225933864,
91
+ "scr_dir1_threshold_100": 0.1952709066044856,
92
+ "scr_metric_threshold_100": 0.2848449242504194,
93
+ "scr_dir2_threshold_100": 0.26633124822681414,
94
+ "scr_dir1_threshold_500": 0.17126963234106926,
95
+ "scr_metric_threshold_500": 0.267981388553453,
96
+ "scr_dir2_threshold_500": 0.26486211885402133
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.4920631166245965,
103
+ "scr_metric_threshold_2": 0.024570143672067307,
104
+ "scr_dir2_threshold_2": 0.024570143672067307,
105
+ "scr_dir1_threshold_5": 0.5079359372693867,
106
+ "scr_metric_threshold_5": 0.11056513395372923,
107
+ "scr_dir2_threshold_5": 0.11056513395372923,
108
+ "scr_dir1_threshold_10": 0.5396825246649839,
109
+ "scr_metric_threshold_10": 0.17690421432596676,
110
+ "scr_dir2_threshold_10": 0.17690421432596676,
111
+ "scr_dir1_threshold_20": 0.42857088793941894,
112
+ "scr_metric_threshold_20": 0.22850132565395218,
113
+ "scr_dir2_threshold_20": 0.22850132565395218,
114
+ "scr_dir1_threshold_50": 0.333333017964661,
115
+ "scr_metric_threshold_50": 0.21621618059355086,
116
+ "scr_dir2_threshold_50": 0.21621618059355086,
117
+ "scr_dir1_threshold_100": -1.5079378294814203,
118
+ "scr_metric_threshold_100": 0.2800982905332023,
119
+ "scr_dir2_threshold_100": 0.2800982905332023,
120
+ "scr_dir1_threshold_500": -0.1269844573703551,
121
+ "scr_metric_threshold_500": 0.22604420877263073,
122
+ "scr_dir2_threshold_500": 0.22604420877263073
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": -0.10101063009950968,
127
+ "scr_metric_threshold_2": 0.11898016279667031,
128
+ "scr_dir2_threshold_2": 0.11898016279667031,
129
+ "scr_dir1_threshold_5": -0.2323233053010818,
130
+ "scr_metric_threshold_5": 0.1643058667351529,
131
+ "scr_dir2_threshold_5": 0.1643058667351529,
132
+ "scr_dir1_threshold_10": 0.42424247897581135,
133
+ "scr_metric_threshold_10": 0.16147304189869,
134
+ "scr_dir2_threshold_10": 0.16147304189869,
135
+ "scr_dir1_threshold_20": 0.5252525070080629,
136
+ "scr_metric_threshold_20": 0.21813038288640824,
137
+ "scr_dir2_threshold_20": 0.21813038288640824,
138
+ "scr_dir1_threshold_50": 0.42424247897581135,
139
+ "scr_metric_threshold_50": 0.3031161410904476,
140
+ "scr_dir2_threshold_50": 0.3031161410904476,
141
+ "scr_dir1_threshold_100": 0.6969699159032454,
142
+ "scr_metric_threshold_100": 0.33144472715846074,
143
+ "scr_dir2_threshold_100": 0.33144472715846074,
144
+ "scr_dir1_threshold_500": 0.2323233053010818,
145
+ "scr_metric_threshold_500": 0.19546744649132095,
146
+ "scr_dir2_threshold_500": 0.19546744649132095
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.3870967431817548,
151
+ "scr_metric_threshold_2": 0.03030308959751515,
152
+ "scr_dir2_threshold_2": 0.03030308959751515,
153
+ "scr_dir1_threshold_5": 0.46774185795438705,
154
+ "scr_metric_threshold_5": 0.1035353679788819,
155
+ "scr_dir2_threshold_5": 0.1035353679788819,
156
+ "scr_dir1_threshold_10": 0.6129032568182452,
157
+ "scr_metric_threshold_10": 0.16161617681987567,
158
+ "scr_dir2_threshold_10": 0.16161617681987567,
159
+ "scr_dir1_threshold_20": -1.9193558465929683,
160
+ "scr_metric_threshold_20": 0.29545463439627273,
161
+ "scr_dir2_threshold_20": 0.29545463439627273,
162
+ "scr_dir1_threshold_50": 0.1290316068168512,
163
+ "scr_metric_threshold_50": 0.3535354432372665,
164
+ "scr_dir2_threshold_50": 0.3535354432372665,
165
+ "scr_dir1_threshold_100": 0.24193534431789668,
166
+ "scr_metric_threshold_100": 0.07323242889813597,
167
+ "scr_dir2_threshold_100": 0.07323242889813597,
168
+ "scr_dir1_threshold_500": 0.0,
169
+ "scr_metric_threshold_500": -0.06313124851552836,
170
+ "scr_dir2_threshold_500": -0.06313124851552836
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.05691079367202333,
175
+ "scr_metric_threshold_2": 0.0791787492811609,
176
+ "scr_dir2_threshold_2": 0.0791787492811609,
177
+ "scr_dir1_threshold_5": 0.13821151212590824,
178
+ "scr_metric_threshold_5": 0.21407613663973096,
179
+ "scr_dir2_threshold_5": 0.21407613663973096,
180
+ "scr_dir1_threshold_10": 0.08130071845388491,
181
+ "scr_metric_threshold_10": 0.04105571898766481,
182
+ "scr_dir2_threshold_10": 0.04105571898766481,
183
+ "scr_dir1_threshold_20": 0.617886332512437,
184
+ "scr_metric_threshold_20": -0.011730230395575622,
185
+ "scr_dir2_threshold_20": -0.011730230395575622,
186
+ "scr_dir1_threshold_50": 0.7317074352659771,
187
+ "scr_metric_threshold_50": -0.29325523550829136,
188
+ "scr_dir2_threshold_50": -0.29325523550829136,
189
+ "scr_dir1_threshold_100": 0.6991870509663219,
190
+ "scr_metric_threshold_100": -0.20527876973202377,
191
+ "scr_dir2_threshold_100": -0.20527876973202377,
192
+ "scr_dir1_threshold_500": -0.13821102753540176,
193
+ "scr_metric_threshold_500": -0.37536667348362096,
194
+ "scr_dir2_threshold_500": -0.37536667348362096
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.04918033854781611,
199
+ "scr_metric_threshold_2": 0.3164063154836078,
200
+ "scr_dir2_threshold_2": 0.3164063154836078,
201
+ "scr_dir1_threshold_5": 0.0,
202
+ "scr_metric_threshold_5": 0.40625005820765137,
203
+ "scr_dir2_threshold_5": 0.40625005820765137,
204
+ "scr_dir1_threshold_10": 0.22404372830578545,
205
+ "scr_metric_threshold_10": 0.5312500582076514,
206
+ "scr_dir2_threshold_10": 0.5312500582076514,
207
+ "scr_dir1_threshold_20": 0.3060108506493485,
208
+ "scr_metric_threshold_20": 0.5976563736912591,
209
+ "scr_dir2_threshold_20": 0.5976563736912591,
210
+ "scr_dir1_threshold_50": 0.3661200085596165,
211
+ "scr_metric_threshold_50": 0.7187499417923486,
212
+ "scr_dir2_threshold_50": 0.7187499417923486,
213
+ "scr_dir1_threshold_100": -0.016393554752069144,
214
+ "scr_metric_threshold_100": 0.746093800931695,
215
+ "scr_dir2_threshold_100": 0.746093800931695,
216
+ "scr_dir1_threshold_500": 0.10382508677685816,
217
+ "scr_metric_threshold_500": 0.8242187136202179,
218
+ "scr_dir2_threshold_500": 0.8242187136202179
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.18461515419110838,
223
+ "scr_metric_threshold_2": 0.06854845881151235,
224
+ "scr_dir2_threshold_2": 0.06854845881151235,
225
+ "scr_dir1_threshold_5": 0.22564087986240192,
226
+ "scr_metric_threshold_5": 0.0927419335456348,
227
+ "scr_dir2_threshold_5": 0.0927419335456348,
228
+ "scr_dir1_threshold_10": 0.28205102341178834,
229
+ "scr_metric_threshold_10": 0.1370969176230247,
230
+ "scr_dir2_threshold_10": 0.1370969176230247,
231
+ "scr_dir1_threshold_20": 0.369230614047073,
232
+ "scr_metric_threshold_20": 0.14516132908730398,
233
+ "scr_dir2_threshold_20": 0.14516132908730398,
234
+ "scr_dir1_threshold_50": 0.4974359303536512,
235
+ "scr_metric_threshold_50": 0.08870972781349516,
236
+ "scr_dir2_threshold_50": 0.08870972781349516,
237
+ "scr_dir1_threshold_100": 0.45641020468235766,
238
+ "scr_metric_threshold_100": 0.2500001201706423,
239
+ "scr_dir2_threshold_100": 0.2500001201706423,
240
+ "scr_dir1_threshold_500": 0.5025640696463488,
241
+ "scr_metric_threshold_500": 0.39112900318452204,
242
+ "scr_dir2_threshold_500": 0.39112900318452204
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.23873861537874994,
247
+ "scr_metric_threshold_2": 0.3214286379516016,
248
+ "scr_dir2_threshold_2": 0.3214286379516016,
249
+ "scr_dir1_threshold_5": 0.27927917043223033,
250
+ "scr_metric_threshold_5": 0.40178579743950193,
251
+ "scr_dir2_threshold_5": 0.40178579743950193,
252
+ "scr_dir1_threshold_10": 0.43693694419340684,
253
+ "scr_metric_threshold_10": 0.504464094460574,
254
+ "scr_dir2_threshold_10": 0.504464094460574,
255
+ "scr_dir1_threshold_20": 0.47747749924688726,
256
+ "scr_metric_threshold_20": 0.54017844669789,
257
+ "scr_dir2_threshold_20": 0.54017844669789,
258
+ "scr_dir1_threshold_50": 0.5630630558065931,
259
+ "scr_metric_threshold_50": 0.6562499584231062,
260
+ "scr_dir2_threshold_50": 0.6562499584231062,
261
+ "scr_dir1_threshold_100": 0.648648612366299,
262
+ "scr_metric_threshold_100": 0.45982128720998955,
263
+ "scr_dir2_threshold_100": 0.45982128720998955,
264
+ "scr_dir1_threshold_500": 0.32882888688058826,
265
+ "scr_metric_threshold_500": 0.47767846332864744,
266
+ "scr_dir2_threshold_500": 0.47767846332864744
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.09442064806865635,
271
+ "scr_metric_threshold_2": 0.09442064806865635,
272
+ "scr_dir2_threshold_2": 0.05238083614515829,
273
+ "scr_dir1_threshold_5": 0.13304726519611204,
274
+ "scr_metric_threshold_5": 0.13304726519611204,
275
+ "scr_dir2_threshold_5": 0.12380961031057988,
276
+ "scr_dir1_threshold_10": 0.17167388232356773,
277
+ "scr_metric_threshold_10": 0.17167388232356773,
278
+ "scr_dir2_threshold_10": 0.10952379871117751,
279
+ "scr_dir1_threshold_20": 0.22317586795092056,
280
+ "scr_metric_threshold_20": 0.22317586795092056,
281
+ "scr_dir2_threshold_20": 0.19047625805514054,
282
+ "scr_dir1_threshold_50": 0.30042910220583197,
283
+ "scr_metric_threshold_50": 0.30042910220583197,
284
+ "scr_dir2_threshold_50": -0.0285713393672145,
285
+ "scr_dir1_threshold_100": 0.3433475088332533,
286
+ "scr_metric_threshold_100": 0.3433475088332533,
287
+ "scr_dir2_threshold_100": 0.19523810064441124,
288
+ "scr_dir1_threshold_500": 0.46781119502943397,
289
+ "scr_metric_threshold_500": 0.46781119502943397,
290
+ "scr_dir2_threshold_500": 0.44285703743398075
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_4",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 16384,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "topk",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "8d2c6670-e453-457b-985d-3ee452d1e856",
73
+ "datetime_epoch_millis": 1740083601328,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.23624787905729394,
77
+ "scr_metric_threshold_2": 0.13649217290777038,
78
+ "scr_dir2_threshold_2": 0.13474985720207183,
79
+ "scr_dir1_threshold_5": 0.37506985772073476,
80
+ "scr_metric_threshold_5": 0.18671736939211342,
81
+ "scr_dir2_threshold_5": 0.16109145087815802,
82
+ "scr_dir1_threshold_10": 0.17323610656862803,
83
+ "scr_metric_threshold_10": 0.23143625754838917,
84
+ "scr_dir2_threshold_10": 0.17812028875840913,
85
+ "scr_dir1_threshold_20": -0.3660628022019214,
86
+ "scr_metric_threshold_20": 0.2951802904574344,
87
+ "scr_dir2_threshold_20": 0.22965047268025215,
88
+ "scr_dir1_threshold_50": 0.046392766008983566,
89
+ "scr_metric_threshold_50": 0.26712707522625806,
90
+ "scr_dir2_threshold_50": 0.18778157690130304,
91
+ "scr_dir1_threshold_100": -0.11794641428207871,
92
+ "scr_metric_threshold_100": 0.30861422383793685,
93
+ "scr_dir2_threshold_100": 0.24045307334836807,
94
+ "scr_dir1_threshold_500": -1.100767720766611,
95
+ "scr_metric_threshold_500": 0.36043783602846824,
96
+ "scr_dir2_threshold_500": 0.28383353023283897
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.5714281659545642,
103
+ "scr_metric_threshold_2": 0.009828028179079856,
104
+ "scr_dir2_threshold_2": 0.009828028179079856,
105
+ "scr_dir1_threshold_5": 0.25396796863469334,
106
+ "scr_metric_threshold_5": 0.039312112716319424,
107
+ "scr_dir2_threshold_5": 0.039312112716319424,
108
+ "scr_dir1_threshold_10": 0.42857088793941894,
109
+ "scr_metric_threshold_10": 0.09336619447689097,
110
+ "scr_dir2_threshold_10": 0.09336619447689097,
111
+ "scr_dir1_threshold_20": -0.11111163672556491,
112
+ "scr_metric_threshold_20": 0.012284998611665989,
113
+ "scr_dir2_threshold_20": 0.012284998611665989,
114
+ "scr_dir1_threshold_50": 0.47619029597980633,
115
+ "scr_metric_threshold_50": 0.2825552609657884,
116
+ "scr_dir2_threshold_50": 0.2825552609657884,
117
+ "scr_dir1_threshold_100": -2.365081497572292,
118
+ "scr_metric_threshold_100": 0.27518434966803,
119
+ "scr_dir2_threshold_100": 0.27518434966803,
120
+ "scr_dir1_threshold_500": -5.444448439114294,
121
+ "scr_metric_threshold_500": 0.07371013811873126,
122
+ "scr_dir2_threshold_500": 0.07371013811873126
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.1414141596586038,
127
+ "scr_metric_threshold_2": 0.1359772806671398,
128
+ "scr_dir2_threshold_2": 0.1359772806671398,
129
+ "scr_dir1_threshold_5": 0.636363417430088,
130
+ "scr_metric_threshold_5": 0.1359772806671398,
131
+ "scr_dir2_threshold_5": 0.1359772806671398,
132
+ "scr_dir1_threshold_10": -0.797980546002755,
133
+ "scr_metric_threshold_10": 0.12747880615775103,
134
+ "scr_dir2_threshold_10": 0.12747880615775103,
135
+ "scr_dir1_threshold_20": 0.54545427178761,
136
+ "scr_metric_threshold_20": 0.23796032559334063,
137
+ "scr_dir2_threshold_20": 0.23796032559334063,
138
+ "scr_dir1_threshold_50": 0.47474749299193714,
139
+ "scr_metric_threshold_50": -0.14447592402822051,
140
+ "scr_dir2_threshold_50": -0.14447592402822051,
141
+ "scr_dir1_threshold_100": 0.30303008409675464,
142
+ "scr_metric_threshold_100": 0.10481586976266374,
143
+ "scr_dir2_threshold_100": 0.10481586976266374,
144
+ "scr_dir1_threshold_500": 0.5151516246182893,
145
+ "scr_metric_threshold_500": 0.67705391620262,
146
+ "scr_dir2_threshold_500": 0.67705391620262
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.532258142045613,
151
+ "scr_metric_threshold_2": 0.08080808840993783,
152
+ "scr_dir2_threshold_2": 0.08080808840993783,
153
+ "scr_dir1_threshold_5": 0.516128349998606,
154
+ "scr_metric_threshold_5": 0.08080808840993783,
155
+ "scr_dir2_threshold_5": 0.08080808840993783,
156
+ "scr_dir1_threshold_10": -0.14516139886385812,
157
+ "scr_metric_threshold_10": 0.11363639784472028,
158
+ "scr_dir2_threshold_10": 0.11363639784472028,
159
+ "scr_dir1_threshold_20": -3.790324239776117,
160
+ "scr_metric_threshold_20": 0.23989904539254622,
161
+ "scr_dir2_threshold_20": 0.23989904539254622,
162
+ "scr_dir1_threshold_50": -1.967743300002788,
163
+ "scr_metric_threshold_50": 0.19949492592919268,
164
+ "scr_dir2_threshold_50": 0.19949492592919268,
165
+ "scr_dir1_threshold_100": -0.4193553659101681,
166
+ "scr_metric_threshold_100": -0.050504998812422694,
167
+ "scr_dir2_threshold_100": -0.050504998812422694,
168
+ "scr_dir1_threshold_500": -4.887099146595756,
169
+ "scr_metric_threshold_500": -0.1439393369254662,
170
+ "scr_dir2_threshold_500": -0.1439393369254662
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.21138225565250596,
175
+ "scr_metric_threshold_2": 0.04398823288813378,
176
+ "scr_dir2_threshold_2": 0.04398823288813378,
177
+ "scr_dir1_threshold_5": 0.7479673851205516,
178
+ "scr_metric_threshold_5": 0.17302041765206616,
179
+ "scr_dir2_threshold_5": 0.17302041765206616,
180
+ "scr_dir1_threshold_10": 0.617886332512437,
181
+ "scr_metric_threshold_10": 0.20234608103785506,
182
+ "scr_dir2_threshold_10": 0.20234608103785506,
183
+ "scr_dir1_threshold_20": -0.9512191812552638,
184
+ "scr_metric_threshold_20": 0.22873888093577555,
185
+ "scr_dir2_threshold_20": 0.22873888093577555,
186
+ "scr_dir1_threshold_50": 0.2845529991791037,
187
+ "scr_metric_threshold_50": 0.038123030293496105,
188
+ "scr_dir2_threshold_50": 0.038123030293496105,
189
+ "scr_dir1_threshold_100": 0.02439040937236807,
190
+ "scr_metric_threshold_100": 0.005865027800937943,
191
+ "scr_dir2_threshold_100": 0.005865027800937943,
192
+ "scr_dir1_threshold_500": -0.06504076859931053,
193
+ "scr_metric_threshold_500": -0.1583578481497213,
194
+ "scr_dir2_threshold_500": -0.1583578481497213
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.00546440968122594,
199
+ "scr_metric_threshold_2": 0.4531251455191284,
200
+ "scr_dir2_threshold_2": 0.4531251455191284,
201
+ "scr_dir1_threshold_5": 0.16939898007674342,
202
+ "scr_metric_threshold_5": 0.5507812863797821,
203
+ "scr_dir2_threshold_5": 0.5507812863797821,
204
+ "scr_dir1_threshold_10": 0.46994542104486603,
205
+ "scr_metric_threshold_10": 0.621093800931695,
206
+ "scr_dir2_threshold_10": 0.621093800931695,
207
+ "scr_dir1_threshold_20": 0.502732204840613,
208
+ "scr_metric_threshold_20": 0.6796874854480871,
209
+ "scr_dir2_threshold_20": 0.6796874854480871,
210
+ "scr_dir1_threshold_50": 0.09836067709563222,
211
+ "scr_metric_threshold_50": 0.6875001164153027,
212
+ "scr_dir2_threshold_50": 0.6875001164153027,
213
+ "scr_dir1_threshold_100": -0.07103830298111119,
214
+ "scr_metric_threshold_100": 0.7304687718278693,
215
+ "scr_dir2_threshold_100": 0.7304687718278693,
216
+ "scr_dir1_threshold_500": 0.3114752603305745,
217
+ "scr_metric_threshold_500": 0.6093749708961743,
218
+ "scr_dir2_threshold_500": 0.6093749708961743
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.14871787347736873,
223
+ "scr_metric_threshold_2": 0.11290320254761761,
224
+ "scr_dir2_threshold_2": 0.11290320254761761,
225
+ "scr_dir1_threshold_5": 0.18974359914866223,
226
+ "scr_metric_threshold_5": 0.056451601273808806,
227
+ "scr_dir2_threshold_5": 0.056451601273808806,
228
+ "scr_dir1_threshold_10": 0.3333333333333333,
229
+ "scr_metric_threshold_10": 0.10483879108333834,
230
+ "scr_dir2_threshold_10": 0.10483879108333834,
231
+ "scr_dir1_threshold_20": 0.4358973418467109,
232
+ "scr_metric_threshold_20": 0.23387105690079912,
233
+ "scr_dir2_threshold_20": 0.23387105690079912,
234
+ "scr_dir1_threshold_50": 0.399999755468115,
235
+ "scr_metric_threshold_50": 0.1854838670912696,
236
+ "scr_dir2_threshold_50": 0.1854838670912696,
237
+ "scr_dir1_threshold_100": 0.54871793461034,
238
+ "scr_metric_threshold_100": 0.39112900318452204,
239
+ "scr_dir2_threshold_100": 0.39112900318452204,
240
+ "scr_dir1_threshold_500": 0.5948717995743311,
241
+ "scr_metric_threshold_500": 0.7580645316349216,
242
+ "scr_dir2_threshold_500": 0.7580645316349216
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.1891891674197794,
247
+ "scr_metric_threshold_2": 0.16517854648243513,
248
+ "scr_dir2_threshold_2": 0.16517854648243513,
249
+ "scr_dir1_threshold_5": 0.3153152790329657,
250
+ "scr_metric_threshold_5": 0.2857142857142857,
251
+ "scr_dir2_threshold_5": 0.2857142857142857,
252
+ "scr_dir1_threshold_10": 0.2432433303208824,
253
+ "scr_metric_threshold_10": 0.35267849659016254,
254
+ "scr_dir2_threshold_10": 0.35267849659016254,
255
+ "scr_dir1_threshold_20": 0.13963971946080886,
256
+ "scr_metric_threshold_20": 0.42857142857142855,
257
+ "scr_dir2_threshold_20": 0.42857142857142855,
258
+ "scr_dir1_threshold_50": 0.2702702775267402,
259
+ "scr_metric_threshold_50": 0.5535713953099135,
260
+ "scr_dir2_threshold_50": 0.5535713953099135,
261
+ "scr_dir1_threshold_100": 0.6666666666666666,
262
+ "scr_metric_threshold_100": 0.6428570098110826,
263
+ "scr_dir2_threshold_100": 0.6428570098110826,
264
+ "scr_dir1_threshold_500": -0.148648612366299,
265
+ "scr_metric_threshold_500": 0.7499998004309095,
266
+ "scr_dir2_threshold_500": 0.7499998004309095
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.09012885856869063,
271
+ "scr_metric_threshold_2": 0.09012885856869063,
272
+ "scr_dir2_threshold_2": 0.07619033292310208,
273
+ "scr_dir1_threshold_5": 0.17167388232356773,
274
+ "scr_metric_threshold_5": 0.17167388232356773,
275
+ "scr_dir2_threshold_5": -0.03333346578807544,
276
+ "scr_dir1_threshold_10": 0.23605149226469982,
277
+ "scr_metric_threshold_10": 0.23605149226469982,
278
+ "scr_dir2_threshold_10": -0.19047625805514054,
279
+ "scr_dir1_threshold_20": 0.30042910220583197,
280
+ "scr_metric_threshold_20": 0.30042910220583197,
281
+ "scr_dir2_threshold_20": -0.22380944001162575,
282
+ "scr_dir1_threshold_50": 0.3347639298333219,
283
+ "scr_metric_threshold_50": 0.3347639298333219,
284
+ "scr_dir2_threshold_50": -0.300000056766318,
285
+ "scr_dir1_threshold_100": 0.3690987574608119,
286
+ "scr_metric_threshold_100": 0.3690987574608119,
287
+ "scr_dir2_threshold_100": -0.17619044645573817,
288
+ "scr_dir1_threshold_500": 0.31759651601957695,
289
+ "scr_metric_threshold_500": 0.31759651601957695,
290
+ "scr_dir2_threshold_500": -0.2952379303454571
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_5",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 16384,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "topk",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "cea9d7be-9e73-4798-843e-6d8c516a9214",
73
+ "datetime_epoch_millis": 1740121474301,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.22437969541106126,
77
+ "scr_metric_threshold_2": 0.0664077256897066,
78
+ "scr_dir2_threshold_2": 0.054296044082894276,
79
+ "scr_dir1_threshold_5": 0.26069761525928214,
80
+ "scr_metric_threshold_5": 0.11660904868495596,
81
+ "scr_dir2_threshold_5": 0.10252767828317963,
82
+ "scr_dir1_threshold_10": 0.21854631401593982,
83
+ "scr_metric_threshold_10": 0.15495424347712866,
84
+ "scr_dir2_threshold_10": 0.1445618392475292,
85
+ "scr_dir1_threshold_20": 0.20196367515212205,
86
+ "scr_metric_threshold_20": 0.20970272301165202,
87
+ "scr_dir2_threshold_20": 0.19728194080660943,
88
+ "scr_dir1_threshold_50": 0.21156297310732708,
89
+ "scr_metric_threshold_50": 0.2246714082589436,
90
+ "scr_dir2_threshold_50": 0.20439497665821477,
91
+ "scr_dir1_threshold_100": 0.042267191675846104,
92
+ "scr_metric_threshold_100": 0.24865649137972923,
93
+ "scr_dir2_threshold_100": 0.22646916309914827,
94
+ "scr_dir1_threshold_500": -0.10066329040273446,
95
+ "scr_metric_threshold_500": 0.25778452325621154,
96
+ "scr_dir2_threshold_500": 0.22450739304198242
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.3809524260050484,
103
+ "scr_metric_threshold_2": 0.0024571168813214595,
104
+ "scr_dir2_threshold_2": 0.0024571168813214595,
105
+ "scr_dir1_threshold_5": 0.42857088793941894,
106
+ "scr_metric_threshold_5": 0.027027114104653437,
107
+ "scr_dir2_threshold_5": 0.027027114104653437,
108
+ "scr_dir1_threshold_10": 0.3809524260050484,
109
+ "scr_metric_threshold_10": 0.04668317046281315,
110
+ "scr_dir2_threshold_10": 0.04668317046281315,
111
+ "scr_dir1_threshold_20": 0.3492058386094512,
112
+ "scr_metric_threshold_20": 0.06879605080482366,
113
+ "scr_dir2_threshold_20": 0.06879605080482366,
114
+ "scr_dir1_threshold_50": 0.3015873766750807,
115
+ "scr_metric_threshold_50": 0.12039316213280908,
116
+ "scr_dir2_threshold_50": 0.12039316213280908,
117
+ "scr_dir1_threshold_100": 0.23809514798990317,
118
+ "scr_metric_threshold_100": 0.15233421710263478,
119
+ "scr_dir2_threshold_100": 0.15233421710263478,
120
+ "scr_dir1_threshold_500": -0.2539689147407102,
121
+ "scr_metric_threshold_500": 0.11302210438631537,
122
+ "scr_dir2_threshold_500": 0.11302210438631537
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.09090914564247801,
127
+ "scr_metric_threshold_2": 0.0679886403335699,
128
+ "scr_dir2_threshold_2": 0.0679886403335699,
129
+ "scr_dir1_threshold_5": 0.11111091042202506,
130
+ "scr_metric_threshold_5": 0.12181298763313322,
131
+ "scr_dir2_threshold_5": 0.12181298763313322,
132
+ "scr_dir1_threshold_10": 0.06060589640589929,
133
+ "scr_metric_threshold_10": 0.16147304189869,
134
+ "scr_dir2_threshold_10": 0.16147304189869,
135
+ "scr_dir1_threshold_20": 0.06060589640589929,
136
+ "scr_metric_threshold_20": 0.2549574434638101,
137
+ "scr_dir2_threshold_20": 0.2549574434638101,
138
+ "scr_dir1_threshold_50": 0.020201764779547052,
139
+ "scr_metric_threshold_50": 0.3456090201924673,
140
+ "scr_dir2_threshold_50": 0.3456090201924673,
141
+ "scr_dir1_threshold_100": -0.24242478975811346,
142
+ "scr_metric_threshold_100": 0.18980162796670313,
143
+ "scr_dir2_threshold_100": 0.18980162796670313,
144
+ "scr_dir1_threshold_500": -0.6767681511236984,
145
+ "scr_metric_threshold_500": 0.2096317395253275,
146
+ "scr_dir2_threshold_500": 0.2096317395253275
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.48387068863579336,
151
+ "scr_metric_threshold_2": 0.03535352927204973,
152
+ "scr_dir2_threshold_2": 0.03535352927204973,
153
+ "scr_dir1_threshold_5": 0.532258142045613,
154
+ "scr_metric_threshold_5": 0.07575764873540326,
155
+ "scr_dir2_threshold_5": 0.07575764873540326,
156
+ "scr_dir1_threshold_10": 0.22580651363649037,
157
+ "scr_metric_threshold_10": 0.1010101481416146,
158
+ "scr_dir2_threshold_10": 0.1010101481416146,
159
+ "scr_dir1_threshold_20": 0.22580651363649037,
160
+ "scr_metric_threshold_20": 0.1666667670111795,
161
+ "scr_dir2_threshold_20": 0.1666667670111795,
162
+ "scr_dir1_threshold_50": 0.22580651363649037,
163
+ "scr_metric_threshold_50": 0.2045455161204965,
164
+ "scr_dir2_threshold_50": 0.2045455161204965,
165
+ "scr_dir1_threshold_100": -0.8387107318203362,
166
+ "scr_metric_threshold_100": 0.24747485542111733,
167
+ "scr_dir2_threshold_100": 0.24747485542111733,
168
+ "scr_dir1_threshold_500": -1.435485157957175,
169
+ "scr_metric_threshold_500": 0.09090911827577622,
170
+ "scr_dir2_threshold_500": 0.09090911827577622
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.12195107768082737,
175
+ "scr_metric_threshold_2": 0.07331372148022297,
176
+ "scr_dir2_threshold_2": 0.07331372148022297,
177
+ "scr_dir1_threshold_5": 0.13008153719862106,
178
+ "scr_metric_threshold_5": 0.17595310634623484,
179
+ "scr_dir2_threshold_5": 0.17595310634623484,
180
+ "scr_dir1_threshold_10": 0.11382110275354018,
181
+ "scr_metric_threshold_10": 0.22580636703530657,
182
+ "scr_dir2_threshold_10": 0.22580636703530657,
183
+ "scr_dir1_threshold_20": 0.040650359226942455,
184
+ "scr_metric_threshold_20": 0.14369492905997697,
185
+ "scr_dir2_threshold_20": 0.14369492905997697,
186
+ "scr_dir1_threshold_50": 0.016260434445080872,
187
+ "scr_metric_threshold_50": 0.24926682782645782,
188
+ "scr_dir2_threshold_50": 0.24926682782645782,
189
+ "scr_dir1_threshold_100": -0.040650359226942455,
190
+ "scr_metric_threshold_100": 0.2434018000255199,
191
+ "scr_dir2_threshold_100": 0.2434018000255199,
192
+ "scr_dir1_threshold_500": 0.25203261487944845,
193
+ "scr_metric_threshold_500": 0.1671553898511282,
194
+ "scr_dir2_threshold_500": 0.1671553898511282
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.03825119347697291,
199
+ "scr_metric_threshold_2": 0.08203134458743348,
200
+ "scr_dir2_threshold_2": 0.08203134458743348,
201
+ "scr_dir1_threshold_5": 0.08196712234356307,
202
+ "scr_metric_threshold_5": 0.09765637369125917,
203
+ "scr_dir2_threshold_5": 0.09765637369125917,
204
+ "scr_dir1_threshold_10": 0.08743153202478901,
205
+ "scr_metric_threshold_10": 0.16406245634426148,
206
+ "scr_dir2_threshold_10": 0.16406245634426148,
207
+ "scr_dir1_threshold_20": 0.01092881936245188,
208
+ "scr_metric_threshold_20": 0.292968888243172,
209
+ "scr_dir2_threshold_20": 0.292968888243172,
210
+ "scr_dir1_threshold_50": 0.04371592886659017,
211
+ "scr_metric_threshold_50": 0.4570313445874335,
212
+ "scr_dir2_threshold_50": 0.4570313445874335,
213
+ "scr_dir1_threshold_100": 0.03825119347697291,
214
+ "scr_metric_threshold_100": 0.542968888243172,
215
+ "scr_dir2_threshold_100": 0.542968888243172,
216
+ "scr_dir1_threshold_500": 0.04371592886659017,
217
+ "scr_metric_threshold_500": 0.5976563736912591,
218
+ "scr_dir2_threshold_500": 0.5976563736912591
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.24102560340535104,
223
+ "scr_metric_threshold_2": 0.04435498407738989,
224
+ "scr_dir2_threshold_2": 0.04435498407738989,
225
+ "scr_dir1_threshold_5": 0.29743574695473746,
226
+ "scr_metric_threshold_5": 0.08064531634921589,
227
+ "scr_dir2_threshold_5": 0.08064531634921589,
228
+ "scr_dir1_threshold_10": 0.2769228841190907,
229
+ "scr_metric_threshold_10": 0.08870972781349516,
230
+ "scr_dir2_threshold_10": 0.08870972781349516,
231
+ "scr_dir1_threshold_20": 0.3179486097903842,
232
+ "scr_metric_threshold_20": 0.16129039235714715,
233
+ "scr_dir2_threshold_20": 0.16129039235714715,
234
+ "scr_dir1_threshold_50": 0.4153844790110642,
235
+ "scr_metric_threshold_50": 0.036290332271825994,
236
+ "scr_dir2_threshold_50": 0.036290332271825994,
237
+ "scr_dir1_threshold_100": 0.4256410632613156,
238
+ "scr_metric_threshold_100": 0.14112912335516434,
239
+ "scr_dir2_threshold_100": 0.14112912335516434,
240
+ "scr_dir1_threshold_500": 0.47179462256045057,
241
+ "scr_metric_threshold_500": 0.32661299044643394,
242
+ "scr_dir2_threshold_500": 0.32661299044643394
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.27927917043223033,
247
+ "scr_metric_threshold_2": 0.06696421087587681,
248
+ "scr_dir2_threshold_2": 0.06696421087587681,
249
+ "scr_dir1_threshold_5": 0.3153152790329657,
250
+ "scr_metric_threshold_5": 0.16517854648243513,
251
+ "scr_dir2_threshold_5": 0.16517854648243513,
252
+ "scr_dir1_threshold_10": 0.4054052820454166,
253
+ "scr_metric_threshold_10": 0.2544641609836042,
254
+ "scr_dir2_threshold_10": 0.2544641609836042,
255
+ "scr_dir1_threshold_20": 0.38738749623443636,
256
+ "scr_metric_threshold_20": 0.3660714452021861,
257
+ "scr_dir2_threshold_20": 0.3660714452021861,
258
+ "scr_dir1_threshold_50": 0.4549549984937745,
259
+ "scr_metric_threshold_50": 0.16964277398906946,
260
+ "scr_dir2_threshold_50": 0.16964277398906946,
261
+ "scr_dir1_threshold_100": 0.5090088929054901,
262
+ "scr_metric_threshold_100": 0.22321430234504325,
263
+ "scr_dir2_threshold_100": 0.22321430234504325,
264
+ "scr_dir1_threshold_500": 0.44144139064615195,
265
+ "scr_metric_threshold_500": 0.20535712622638533,
266
+ "scr_dir2_threshold_500": 0.20535712622638533
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.15879825800978847,
271
+ "scr_metric_threshold_2": 0.15879825800978847,
272
+ "scr_dir2_threshold_2": 0.06190480515528994,
273
+ "scr_dir1_threshold_5": 0.1888412961373127,
274
+ "scr_metric_threshold_5": 0.1888412961373127,
275
+ "scr_dir2_threshold_5": 0.07619033292310208,
276
+ "scr_dir1_threshold_10": 0.19742487513724416,
277
+ "scr_metric_threshold_10": 0.19742487513724416,
278
+ "scr_dir2_threshold_10": 0.11428564130044823,
279
+ "scr_dir1_threshold_20": 0.22317586795092056,
280
+ "scr_metric_threshold_20": 0.22317586795092056,
281
+ "scr_dir2_threshold_20": 0.12380961031057988,
282
+ "scr_dir1_threshold_50": 0.21459228895098914,
283
+ "scr_metric_threshold_50": 0.21459228895098914,
284
+ "scr_dir2_threshold_50": 0.05238083614515829,
285
+ "scr_dir1_threshold_100": 0.2489271165784791,
286
+ "scr_metric_threshold_100": 0.2489271165784791,
287
+ "scr_dir2_threshold_100": 0.07142849033383136,
288
+ "scr_dir1_threshold_500": 0.3519313436470669,
289
+ "scr_metric_threshold_500": 0.3519313436470669,
290
+ "scr_dir2_threshold_500": 0.08571430193323373
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_0",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 65536,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "topk",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "fbfefc4f-778f-48c7-b700-f51940f8f077",
73
+ "datetime_epoch_millis": 1740120136936,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.24201844358607494,
77
+ "scr_metric_threshold_2": 0.07482945290019065,
78
+ "scr_dir2_threshold_2": 0.06295280134024435,
79
+ "scr_dir1_threshold_5": 0.27005249015663546,
80
+ "scr_metric_threshold_5": 0.132060589774245,
81
+ "scr_dir2_threshold_5": 0.12637134245565068,
82
+ "scr_dir1_threshold_10": 0.27573126492637157,
83
+ "scr_metric_threshold_10": 0.1891209995436452,
84
+ "scr_dir2_threshold_10": 0.18485724291923455,
85
+ "scr_dir1_threshold_20": 0.29557757920145794,
86
+ "scr_metric_threshold_20": 0.23562467676574025,
87
+ "scr_dir2_threshold_20": 0.2364830339652907,
88
+ "scr_dir1_threshold_50": 0.28019139479663957,
89
+ "scr_metric_threshold_50": 0.2703382108254873,
90
+ "scr_dir2_threshold_50": 0.25803490991603567,
91
+ "scr_dir1_threshold_100": 0.07507968743503643,
92
+ "scr_metric_threshold_100": 0.2883350028638063,
93
+ "scr_dir2_threshold_100": 0.2769795110761546,
94
+ "scr_dir1_threshold_500": -0.0796332869221493,
95
+ "scr_metric_threshold_500": 0.27625912454574264,
96
+ "scr_dir2_threshold_500": 0.25583455184228526
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.3650786592542414,
103
+ "scr_metric_threshold_2": 0.0,
104
+ "scr_dir2_threshold_2": 0.0,
105
+ "scr_dir1_threshold_5": 0.3968252466498386,
106
+ "scr_metric_threshold_5": 0.024570143672067307,
107
+ "scr_dir2_threshold_5": 0.024570143672067307,
108
+ "scr_dir1_threshold_10": 0.42857088793941894,
109
+ "scr_metric_threshold_10": 0.04176908314890556,
110
+ "scr_dir2_threshold_10": 0.04176908314890556,
111
+ "scr_dir1_threshold_20": 0.41269806729462877,
112
+ "scr_metric_threshold_20": 0.05896816907447914,
113
+ "scr_dir2_threshold_20": 0.05896816907447914,
114
+ "scr_dir1_threshold_50": 0.3650786592542414,
115
+ "scr_metric_threshold_50": 0.07371013811873126,
116
+ "scr_dir2_threshold_50": 0.07371013811873126,
117
+ "scr_dir1_threshold_100": -0.17460386541074246,
118
+ "scr_metric_threshold_100": 0.09828013534206324,
119
+ "scr_dir2_threshold_100": 0.09828013534206324,
120
+ "scr_dir1_threshold_500": -0.3174611434258877,
121
+ "scr_metric_threshold_500": 0.036855142283733294,
122
+ "scr_dir2_threshold_500": 0.036855142283733294
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.11111091042202506,
127
+ "scr_metric_threshold_2": 0.08215293336757645,
128
+ "scr_dir2_threshold_2": 0.08215293336757645,
129
+ "scr_dir1_threshold_5": 0.24242418769085533,
130
+ "scr_metric_threshold_5": 0.18980162796670313,
131
+ "scr_dir2_threshold_5": 0.18980162796670313,
132
+ "scr_dir1_threshold_10": 0.25252507008062886,
133
+ "scr_metric_threshold_10": 0.23512750075687772,
134
+ "scr_dir2_threshold_10": 0.23512750075687772,
135
+ "scr_dir1_threshold_20": 0.20202005606450307,
136
+ "scr_metric_threshold_20": 0.3257790774855349,
137
+ "scr_dir2_threshold_20": 0.3257790774855349,
138
+ "scr_dir1_threshold_50": 0.20202005606450307,
139
+ "scr_metric_threshold_50": 0.40509918601664846,
140
+ "scr_dir2_threshold_50": 0.40509918601664846,
141
+ "scr_dir1_threshold_100": -0.010101484457031663,
142
+ "scr_metric_threshold_100": 0.10764869459912667,
143
+ "scr_dir2_threshold_100": 0.10764869459912667,
144
+ "scr_dir1_threshold_500": -0.2828283193172076,
145
+ "scr_metric_threshold_500": 0.06515581549710699,
146
+ "scr_dir2_threshold_500": 0.06515581549710699
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.516128349998606,
151
+ "scr_metric_threshold_2": 0.0530303691664592,
152
+ "scr_dir2_threshold_2": 0.0530303691664592,
153
+ "scr_dir1_threshold_5": 0.5483869727270193,
154
+ "scr_metric_threshold_5": 0.1010101481416146,
155
+ "scr_dir2_threshold_5": 0.1010101481416146,
156
+ "scr_dir1_threshold_10": 0.45161302727298075,
157
+ "scr_metric_threshold_10": 0.1565657371453411,
158
+ "scr_dir2_threshold_10": 0.1565657371453411,
159
+ "scr_dir1_threshold_20": 0.4999995193171997,
160
+ "scr_metric_threshold_20": 0.21969698566086945,
161
+ "scr_dir2_threshold_20": 0.21969698566086945,
162
+ "scr_dir1_threshold_50": 0.3870967431817548,
163
+ "scr_metric_threshold_50": 0.25000007525838464,
164
+ "scr_dir2_threshold_50": 0.25000007525838464,
165
+ "scr_dir1_threshold_100": -0.9354846772743747,
166
+ "scr_metric_threshold_100": 0.2904040442049689,
167
+ "scr_dir2_threshold_100": 0.2904040442049689,
168
+ "scr_dir1_threshold_500": -1.7258079556848913,
169
+ "scr_metric_threshold_500": 0.012626249703105673,
170
+ "scr_dir2_threshold_500": 0.012626249703105673
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.19512182120742508,
175
+ "scr_metric_threshold_2": 0.08211143797532962,
176
+ "scr_dir2_threshold_2": 0.08211143797532962,
177
+ "scr_dir1_threshold_5": 0.10569112782625298,
178
+ "scr_metric_threshold_5": 0.19941339234368635,
179
+ "scr_dir2_threshold_5": 0.19941339234368635,
180
+ "scr_dir1_threshold_10": 0.10569112782625298,
181
+ "scr_metric_threshold_10": 0.25513185562739576,
182
+ "scr_dir2_threshold_10": 0.25513185562739576,
183
+ "scr_dir1_threshold_20": 0.20325228072521878,
184
+ "scr_metric_threshold_20": 0.21407613663973096,
185
+ "scr_dir2_threshold_20": 0.21407613663973096,
186
+ "scr_dir1_threshold_50": 0.11382110275354018,
187
+ "scr_metric_threshold_50": 0.28739003291365367,
188
+ "scr_dir2_threshold_50": 0.28739003291365367,
189
+ "scr_dir1_threshold_100": 0.41463405178721824,
190
+ "scr_metric_threshold_100": 0.348973523998301,
191
+ "scr_dir2_threshold_100": 0.348973523998301,
192
+ "scr_dir1_threshold_500": 0.25203261487944845,
193
+ "scr_metric_threshold_500": 0.140762415159508,
194
+ "scr_dir2_threshold_500": 0.140762415159508
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.04918033854781611,
199
+ "scr_metric_threshold_2": 0.0664063154836078,
200
+ "scr_dir2_threshold_2": 0.0664063154836078,
201
+ "scr_dir1_threshold_5": 0.06557389329988525,
202
+ "scr_metric_threshold_5": 0.0742187136202179,
203
+ "scr_dir2_threshold_5": 0.0742187136202179,
204
+ "scr_dir1_threshold_10": 0.04918033854781611,
205
+ "scr_metric_threshold_10": 0.1914063154836078,
206
+ "scr_dir2_threshold_10": 0.1914063154836078,
207
+ "scr_dir1_threshold_20": 0.027322374114521025,
208
+ "scr_metric_threshold_20": 0.29687508731147705,
209
+ "scr_dir2_threshold_20": 0.29687508731147705,
210
+ "scr_dir1_threshold_50": 0.05464474822904205,
211
+ "scr_metric_threshold_50": 0.5,
212
+ "scr_dir2_threshold_50": 0.5,
213
+ "scr_dir1_threshold_100": 0.09289626741440628,
214
+ "scr_metric_threshold_100": 0.6132811699644793,
215
+ "scr_dir2_threshold_100": 0.6132811699644793,
216
+ "scr_dir1_threshold_500": 0.07650271266233713,
217
+ "scr_metric_threshold_500": 0.7070313445874334,
218
+ "scr_dir2_threshold_500": 0.7070313445874334
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.23076901915509954,
223
+ "scr_metric_threshold_2": 0.036290332271825994,
224
+ "scr_dir2_threshold_2": 0.036290332271825994,
225
+ "scr_dir1_threshold_5": 0.2615384662409978,
226
+ "scr_metric_threshold_5": 0.06451625307937271,
227
+ "scr_dir2_threshold_5": 0.06451625307937271,
228
+ "scr_dir1_threshold_10": 0.3025638862474351,
229
+ "scr_metric_threshold_10": 0.09677413927777444,
230
+ "scr_dir2_threshold_10": 0.09677413927777444,
231
+ "scr_dir1_threshold_20": 0.34871775121142623,
232
+ "scr_metric_threshold_20": 0.16129039235714715,
233
+ "scr_dir2_threshold_20": 0.16129039235714715,
234
+ "scr_dir1_threshold_50": 0.4051282004256689,
235
+ "scr_metric_threshold_50": 0.1290322658174608,
236
+ "scr_dir2_threshold_50": 0.1290322658174608,
237
+ "scr_dir1_threshold_100": 0.45128206538966004,
238
+ "scr_metric_threshold_100": 0.23790326263293876,
239
+ "scr_dir2_threshold_100": 0.23790326263293876,
240
+ "scr_dir1_threshold_500": 0.45128206538966004,
241
+ "scr_metric_threshold_500": 0.463709667728174,
242
+ "scr_dir2_threshold_500": 0.463709667728174
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.29279277827985295,
247
+ "scr_metric_threshold_2": 0.10267856311319266,
248
+ "scr_dir2_threshold_2": 0.10267856311319266,
249
+ "scr_dir1_threshold_5": 0.34684694118095594,
250
+ "scr_metric_threshold_5": 0.20982135373301966,
251
+ "scr_dir2_threshold_5": 0.20982135373301966,
252
+ "scr_dir1_threshold_10": 0.4054052820454166,
253
+ "scr_metric_threshold_10": 0.3258928654582359,
254
+ "scr_dir2_threshold_10": 0.3258928654582359,
255
+ "scr_dir1_threshold_20": 0.47747749924688726,
256
+ "scr_metric_threshold_20": 0.415178479959405,
257
+ "scr_dir2_threshold_20": 0.415178479959405,
258
+ "scr_dir1_threshold_50": 0.4819819456996323,
259
+ "scr_metric_threshold_50": 0.2857142857142857,
260
+ "scr_dir2_threshold_50": 0.2857142857142857,
261
+ "scr_dir1_threshold_100": 0.5045044464527451,
262
+ "scr_metric_threshold_100": 0.35267849659016254,
263
+ "scr_dir2_threshold_100": 0.35267849659016254,
264
+ "scr_dir1_threshold_500": 0.5315313936586028,
265
+ "scr_metric_threshold_500": 0.4062500249461363,
266
+ "scr_dir2_threshold_500": 0.4062500249461363
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.17596567182353345,
271
+ "scr_metric_threshold_2": 0.17596567182353345,
272
+ "scr_dir2_threshold_2": 0.08095245934396302,
273
+ "scr_dir1_threshold_5": 0.19313308563727843,
274
+ "scr_metric_threshold_5": 0.19313308563727843,
275
+ "scr_dir2_threshold_5": 0.14761910708852366,
276
+ "scr_dir1_threshold_10": 0.21030049945102341,
277
+ "scr_metric_threshold_10": 0.21030049945102341,
278
+ "scr_dir2_threshold_10": 0.17619044645573817,
279
+ "scr_dir1_threshold_20": 0.19313308563727843,
280
+ "scr_metric_threshold_20": 0.19313308563727843,
281
+ "scr_dir2_threshold_20": 0.19999994323368195,
282
+ "scr_dir1_threshold_50": 0.23175970276473412,
283
+ "scr_metric_threshold_50": 0.23175970276473412,
284
+ "scr_dir2_threshold_50": 0.1333332954891213,
285
+ "scr_dir1_threshold_100": 0.25751069557841055,
286
+ "scr_metric_threshold_100": 0.25751069557841055,
287
+ "scr_dir2_threshold_100": 0.16666676127719673,
288
+ "scr_dir1_threshold_500": 0.37768233646074334,
289
+ "scr_metric_threshold_500": 0.37768233646074334,
290
+ "scr_dir2_threshold_500": 0.21428575483308432
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_1",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 65536,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "topk",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "daaada01-6214-4b0d-b44d-fef49863bea4",
73
+ "datetime_epoch_millis": 1740121916806,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.2713980040415791,
77
+ "scr_metric_threshold_2": 0.11336947725755878,
78
+ "scr_dir2_threshold_2": 0.10787181376265494,
79
+ "scr_dir1_threshold_5": 0.2850142555951747,
80
+ "scr_metric_threshold_5": 0.17270425294655634,
81
+ "scr_dir2_threshold_5": 0.16916858931228068,
82
+ "scr_dir1_threshold_10": 0.2803922807345098,
83
+ "scr_metric_threshold_10": 0.21243680830920622,
84
+ "scr_dir2_threshold_10": 0.2081807406191314,
85
+ "scr_dir1_threshold_20": 0.202750902939444,
86
+ "scr_metric_threshold_20": 0.272277784180776,
87
+ "scr_dir2_threshold_20": 0.27188688861789684,
88
+ "scr_dir1_threshold_50": 0.28935335382768046,
89
+ "scr_metric_threshold_50": 0.31743658205899067,
90
+ "scr_dir2_threshold_50": 0.31274620806180997,
91
+ "scr_dir1_threshold_100": 0.26987852616648605,
92
+ "scr_metric_threshold_100": 0.37235005842033236,
93
+ "scr_dir2_threshold_100": 0.35467429972485254,
94
+ "scr_dir1_threshold_500": -0.0350965380491828,
95
+ "scr_metric_threshold_500": 0.34443889697026253,
96
+ "scr_dir2_threshold_500": 0.3179290874058431
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.47619029597980633,
103
+ "scr_metric_threshold_2": 0.0024571168813214595,
104
+ "scr_dir2_threshold_2": 0.0024571168813214595,
105
+ "scr_dir1_threshold_5": 0.47619029597980633,
106
+ "scr_metric_threshold_5": 0.012284998611665989,
107
+ "scr_dir2_threshold_5": 0.012284998611665989,
108
+ "scr_dir1_threshold_10": 0.46031747533501616,
109
+ "scr_metric_threshold_10": 0.06633908037223754,
110
+ "scr_dir2_threshold_10": 0.06633908037223754,
111
+ "scr_dir1_threshold_20": 0.444444654690226,
112
+ "scr_metric_threshold_20": 0.09582316490947711,
113
+ "scr_dir2_threshold_20": 0.09582316490947711,
114
+ "scr_dir1_threshold_50": 0.444444654690226,
115
+ "scr_metric_threshold_50": 0.14496315935614107,
116
+ "scr_dir2_threshold_50": 0.14496315935614107,
117
+ "scr_dir1_threshold_100": 0.3809524260050484,
118
+ "scr_metric_threshold_100": 0.3218673736821078,
119
+ "scr_dir2_threshold_100": 0.3218673736821078,
120
+ "scr_dir1_threshold_500": -1.031747533501614,
121
+ "scr_metric_threshold_500": 0.35626539908451965,
122
+ "scr_dir2_threshold_500": 0.35626539908451965
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.16161592443815084,
127
+ "scr_metric_threshold_2": 0.09065157672865719,
128
+ "scr_dir2_threshold_2": 0.09065157672865719,
129
+ "scr_dir1_threshold_5": 0.20202005606450307,
130
+ "scr_metric_threshold_5": 0.22379603255933406,
131
+ "scr_dir2_threshold_5": 0.22379603255933406,
132
+ "scr_dir1_threshold_10": 0.1818176892176979,
133
+ "scr_metric_threshold_10": 0.2521246186273472,
134
+ "scr_dir2_threshold_10": 0.2521246186273472,
135
+ "scr_dir1_threshold_20": 0.06060589640589929,
136
+ "scr_metric_threshold_20": 0.3031161410904476,
137
+ "scr_dir2_threshold_20": 0.3031161410904476,
138
+ "scr_dir1_threshold_50": 0.17171680682792437,
139
+ "scr_metric_threshold_50": 0.40509918601664846,
140
+ "scr_dir2_threshold_50": 0.40509918601664846,
141
+ "scr_dir1_threshold_100": 0.040403529559094105,
142
+ "scr_metric_threshold_100": 0.4419264154457423,
143
+ "scr_dir2_threshold_100": 0.4419264154457423,
144
+ "scr_dir1_threshold_500": -0.707071400360277,
145
+ "scr_metric_threshold_500": -0.04532587279017459,
146
+ "scr_dir2_threshold_500": -0.04532587279017459
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.5645158034084256,
151
+ "scr_metric_threshold_2": 0.03535352927204973,
152
+ "scr_dir2_threshold_2": 0.03535352927204973,
153
+ "scr_dir1_threshold_5": 0.5483869727270193,
154
+ "scr_metric_threshold_5": 0.08585867860124165,
155
+ "scr_dir2_threshold_5": 0.08585867860124165,
156
+ "scr_dir1_threshold_10": 0.4999995193171997,
157
+ "scr_metric_threshold_10": 0.1085859581701857,
158
+ "scr_dir2_threshold_10": 0.1085859581701857,
159
+ "scr_dir1_threshold_20": 0.24193534431789668,
160
+ "scr_metric_threshold_20": 0.16919198684844677,
161
+ "scr_dir2_threshold_20": 0.16919198684844677,
162
+ "scr_dir1_threshold_50": 0.3064516284091226,
163
+ "scr_metric_threshold_50": 0.21212117563229838,
164
+ "scr_dir2_threshold_50": 0.21212117563229838,
165
+ "scr_dir1_threshold_100": 0.1935478909080771,
166
+ "scr_metric_threshold_100": 0.30303029390807457,
167
+ "scr_dir2_threshold_100": 0.30303029390807457,
168
+ "scr_dir1_threshold_500": -0.11290373750104549,
169
+ "scr_metric_threshold_500": -0.04545455913788811,
170
+ "scr_dir2_threshold_500": -0.04545455913788811
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.2764230242518165,
175
+ "scr_metric_threshold_2": 0.1260996708634634,
176
+ "scr_dir2_threshold_2": 0.1260996708634634,
177
+ "scr_dir1_threshold_5": 0.2601625898067356,
178
+ "scr_metric_threshold_5": 0.19061585064227946,
179
+ "scr_dir2_threshold_5": 0.19061585064227946,
180
+ "scr_dir1_threshold_10": 0.20325228072521878,
181
+ "scr_metric_threshold_10": 0.2434018000255199,
182
+ "scr_dir2_threshold_10": 0.2434018000255199,
183
+ "scr_dir1_threshold_20": -0.13821102753540176,
184
+ "scr_metric_threshold_20": 0.3137830076052739,
185
+ "scr_dir2_threshold_20": 0.3137830076052739,
186
+ "scr_dir1_threshold_50": 0.40650407685993106,
187
+ "scr_metric_threshold_50": 0.3225805493066808,
188
+ "scr_dir2_threshold_50": 0.3225805493066808,
189
+ "scr_dir1_threshold_100": 0.49593477024110316,
190
+ "scr_metric_threshold_100": 0.12903218476393236,
191
+ "scr_dir2_threshold_100": 0.12903218476393236,
192
+ "scr_dir1_threshold_500": 0.4471544360868735,
193
+ "scr_metric_threshold_500": 0.21407613663973096,
194
+ "scr_dir2_threshold_500": 0.21407613663973096
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.04371592886659017,
199
+ "scr_metric_threshold_2": 0.3671876018633899,
200
+ "scr_dir2_threshold_2": 0.3671876018633899,
201
+ "scr_dir1_threshold_5": 0.06557389329988525,
202
+ "scr_metric_threshold_5": 0.43750011641530273,
203
+ "scr_dir2_threshold_5": 0.43750011641530273,
204
+ "scr_dir1_threshold_10": 0.06557389329988525,
205
+ "scr_metric_threshold_10": 0.44921871362021787,
206
+ "scr_dir2_threshold_10": 0.44921871362021787,
207
+ "scr_dir1_threshold_20": 0.03825119347697291,
208
+ "scr_metric_threshold_20": 0.5390624563442614,
209
+ "scr_dir2_threshold_20": 0.5390624563442614,
210
+ "scr_dir1_threshold_50": -0.22404372830578545,
211
+ "scr_metric_threshold_50": 0.621093800931695,
212
+ "scr_dir2_threshold_50": 0.621093800931695,
213
+ "scr_dir1_threshold_100": -0.20218576387249038,
214
+ "scr_metric_threshold_100": 0.6679686554125666,
215
+ "scr_dir2_threshold_100": 0.6679686554125666,
216
+ "scr_dir1_threshold_500": -0.09836067709563222,
217
+ "scr_metric_threshold_500": 0.753906199068305,
218
+ "scr_dir2_threshold_500": 0.753906199068305
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.27179474482639304,
223
+ "scr_metric_threshold_2": 0.08467752208135552,
224
+ "scr_dir2_threshold_2": 0.08467752208135552,
225
+ "scr_dir1_threshold_5": 0.29743574695473746,
226
+ "scr_metric_threshold_5": 0.08870972781349516,
227
+ "scr_dir2_threshold_5": 0.08870972781349516,
228
+ "scr_dir1_threshold_10": 0.3179486097903842,
229
+ "scr_metric_threshold_10": 0.10887099681547797,
230
+ "scr_dir2_threshold_10": 0.10887099681547797,
231
+ "scr_dir1_threshold_20": 0.38974347688271976,
232
+ "scr_metric_threshold_20": 0.15322574055158325,
233
+ "scr_dir2_threshold_20": 0.15322574055158325,
234
+ "scr_dir1_threshold_50": 0.4256410632613156,
235
+ "scr_metric_threshold_50": 0.2862904524424683,
236
+ "scr_dir2_threshold_50": 0.2862904524424683,
237
+ "scr_dir1_threshold_100": 0.4102563397183665,
238
+ "scr_metric_threshold_100": 0.39112900318452204,
239
+ "scr_dir2_threshold_100": 0.39112900318452204,
240
+ "scr_dir1_threshold_500": 0.45641020468235766,
241
+ "scr_metric_threshold_500": 0.5403225380039657,
242
+ "scr_dir2_threshold_500": 0.5403225380039657
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.2567566696791176,
247
+ "scr_metric_threshold_2": 0.0803571594879004,
248
+ "scr_dir2_threshold_2": 0.0803571594879004,
249
+ "scr_dir1_threshold_5": 0.297297224732598,
250
+ "scr_metric_threshold_5": 0.20982135373301966,
251
+ "scr_dir2_threshold_5": 0.20982135373301966,
252
+ "scr_dir1_threshold_10": 0.34684694118095594,
253
+ "scr_metric_threshold_10": 0.30357146183294365,
254
+ "scr_dir2_threshold_10": 0.30357146183294365,
255
+ "scr_dir1_threshold_20": 0.3963963891399265,
256
+ "scr_metric_threshold_20": 0.415178479959405,
257
+ "scr_dir2_threshold_20": 0.415178479959405,
258
+ "scr_dir1_threshold_50": 0.5180180543003676,
259
+ "scr_metric_threshold_50": 0.28125005820765137,
260
+ "scr_dir2_threshold_50": 0.28125005820765137,
261
+ "scr_dir1_threshold_100": 0.5225225007531127,
262
+ "scr_metric_threshold_100": 0.4062500249461363,
263
+ "scr_dir2_threshold_100": 0.4062500249461363,
264
+ "scr_dir1_threshold_500": 0.3108108325802206,
265
+ "scr_metric_threshold_500": 0.5267857641779868,
266
+ "scr_dir2_threshold_500": 0.5267857641779868
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.12017164088233277,
271
+ "scr_metric_threshold_2": 0.12017164088233277,
272
+ "scr_dir2_threshold_2": 0.07619033292310208,
273
+ "scr_dir1_threshold_5": 0.13304726519611204,
274
+ "scr_metric_threshold_5": 0.13304726519611204,
275
+ "scr_dir2_threshold_5": 0.10476195612190681,
276
+ "scr_dir1_threshold_10": 0.1673818370097199,
277
+ "scr_metric_threshold_10": 0.1673818370097199,
278
+ "scr_dir2_threshold_10": 0.1333332954891213,
279
+ "scr_dir1_threshold_20": 0.1888412961373127,
280
+ "scr_metric_threshold_20": 0.1888412961373127,
281
+ "scr_dir2_threshold_20": 0.18571413163427958,
282
+ "scr_dir1_threshold_50": 0.266094274578342,
283
+ "scr_metric_threshold_50": 0.266094274578342,
284
+ "scr_dir2_threshold_50": 0.22857128260089646,
285
+ "scr_dir1_threshold_100": 0.31759651601957695,
286
+ "scr_metric_threshold_100": 0.31759651601957695,
287
+ "scr_dir2_threshold_100": 0.17619044645573817,
288
+ "scr_dir1_threshold_500": 0.45493557071565466,
289
+ "scr_metric_threshold_500": 0.45493557071565466,
290
+ "scr_dir2_threshold_500": 0.24285709420029883
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_2",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 65536,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "topk",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "cc88f5a4-1780-4191-979a-21c0cab5d36b",
73
+ "datetime_epoch_millis": 1740122360677,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.2701845188928173,
77
+ "scr_metric_threshold_2": 0.12994588847821195,
78
+ "scr_dir2_threshold_2": 0.1173641467776676,
79
+ "scr_dir1_threshold_5": 0.2945725141177775,
80
+ "scr_metric_threshold_5": 0.17511282755823263,
81
+ "scr_dir2_threshold_5": 0.16705028042365472,
82
+ "scr_dir1_threshold_10": 0.3182844393441291,
83
+ "scr_metric_threshold_10": 0.21966858279274512,
84
+ "scr_dir2_threshold_10": 0.21952040621106783,
85
+ "scr_dir1_threshold_20": 0.30558760619036257,
86
+ "scr_metric_threshold_20": 0.26512977827962403,
87
+ "scr_dir2_threshold_20": 0.2618214877346138,
88
+ "scr_dir1_threshold_50": 0.3319916049223971,
89
+ "scr_metric_threshold_50": 0.370513973715845,
90
+ "scr_dir2_threshold_50": 0.3638028467240865,
91
+ "scr_dir1_threshold_100": 0.21622144005118285,
92
+ "scr_metric_threshold_100": 0.4050907277779767,
93
+ "scr_dir2_threshold_100": 0.3906925998203695,
94
+ "scr_dir1_threshold_500": -0.25841149964580884,
95
+ "scr_metric_threshold_500": 0.3396409889933258,
96
+ "scr_dir2_threshold_500": 0.3101038926531008
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.4920631166245965,
103
+ "scr_metric_threshold_2": 0.009828028179079856,
104
+ "scr_dir2_threshold_2": 0.009828028179079856,
105
+ "scr_dir1_threshold_5": 0.4920631166245965,
106
+ "scr_metric_threshold_5": 0.022113026790745845,
107
+ "scr_dir2_threshold_5": 0.022113026790745845,
108
+ "scr_dir1_threshold_10": 0.5396825246649839,
109
+ "scr_metric_threshold_10": 0.061425139507065275,
110
+ "scr_dir2_threshold_10": 0.061425139507065275,
111
+ "scr_dir1_threshold_20": 0.4920631166245965,
112
+ "scr_metric_threshold_20": 0.07616710855131739,
113
+ "scr_dir2_threshold_20": 0.07616710855131739,
114
+ "scr_dir1_threshold_50": 0.5396825246649839,
115
+ "scr_metric_threshold_50": 0.11056513395372923,
116
+ "scr_dir2_threshold_50": 0.11056513395372923,
117
+ "scr_dir1_threshold_100": -0.5079368833754035,
118
+ "scr_metric_threshold_100": 0.18918921293763274,
119
+ "scr_dir2_threshold_100": 0.18918921293763274,
120
+ "scr_dir1_threshold_500": -1.6507951074965657,
121
+ "scr_metric_threshold_500": 0.2309582960865383,
122
+ "scr_dir2_threshold_500": 0.2309582960865383
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.1818176892176979,
127
+ "scr_metric_threshold_2": 0.11898016279667031,
128
+ "scr_dir2_threshold_2": 0.11898016279667031,
129
+ "scr_dir1_threshold_5": 0.22222182084405012,
130
+ "scr_metric_threshold_5": 0.1671388604233078,
131
+ "scr_dir2_threshold_5": 0.1671388604233078,
132
+ "scr_dir1_threshold_10": 0.2626259524704024,
133
+ "scr_metric_threshold_10": 0.20396592100070968,
134
+ "scr_dir2_threshold_10": 0.20396592100070968,
135
+ "scr_dir1_threshold_20": 0.20202005606450307,
136
+ "scr_metric_threshold_20": 0.2322946759204148,
137
+ "scr_dir2_threshold_20": 0.2322946759204148,
138
+ "scr_dir1_threshold_50": 0.3737374649596856,
139
+ "scr_metric_threshold_50": 0.38526907445802405,
140
+ "scr_dir2_threshold_50": 0.38526907445802405,
141
+ "scr_dir1_threshold_100": 0.5252525070080629,
142
+ "scr_metric_threshold_100": 0.47308782635021834,
143
+ "scr_dir2_threshold_100": 0.47308782635021834,
144
+ "scr_dir1_threshold_500": -0.5454548738548681,
145
+ "scr_metric_threshold_500": 0.0481586976266375,
146
+ "scr_dir2_threshold_500": 0.0481586976266375
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.5483869727270193,
151
+ "scr_metric_threshold_2": 0.027777869760247858,
152
+ "scr_dir2_threshold_2": 0.027777869760247858,
153
+ "scr_dir1_threshold_5": 0.5483869727270193,
154
+ "scr_metric_threshold_5": 0.03787889962608624,
155
+ "scr_dir2_threshold_5": 0.03787889962608624,
156
+ "scr_dir1_threshold_10": 0.5483869727270193,
157
+ "scr_metric_threshold_10": 0.06060602867826107,
158
+ "scr_dir2_threshold_10": 0.06060602867826107,
159
+ "scr_dir1_threshold_20": 0.5645158034084256,
160
+ "scr_metric_threshold_20": 0.12121220787329137,
161
+ "scr_dir2_threshold_20": 0.12121220787329137,
162
+ "scr_dir1_threshold_50": 0.5806446340898319,
163
+ "scr_metric_threshold_50": 0.22474757585217325,
164
+ "scr_dir2_threshold_50": 0.22474757585217325,
165
+ "scr_dir1_threshold_100": 0.43548323522597376,
166
+ "scr_metric_threshold_100": 0.2904040442049689,
167
+ "scr_dir2_threshold_100": 0.2904040442049689,
168
+ "scr_dir1_threshold_500": -1.9354856386399752,
169
+ "scr_metric_threshold_500": -0.007575659511801869,
170
+ "scr_dir2_threshold_500": -0.007575659511801869
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.2845529991791037,
175
+ "scr_metric_threshold_2": 0.058650977184178374,
176
+ "scr_dir2_threshold_2": 0.058650977184178374,
177
+ "scr_dir1_threshold_5": 0.2926829741063909,
178
+ "scr_metric_threshold_5": 0.140762415159508,
179
+ "scr_dir2_threshold_5": 0.140762415159508,
180
+ "scr_dir1_threshold_10": 0.2764230242518165,
181
+ "scr_metric_threshold_10": 0.2434018000255199,
182
+ "scr_dir2_threshold_10": 0.2434018000255199,
183
+ "scr_dir1_threshold_20": 0.19512182120742508,
184
+ "scr_metric_threshold_20": 0.2991202633092293,
185
+ "scr_dir2_threshold_20": 0.2991202633092293,
186
+ "scr_dir1_threshold_50": 0.24390263995216122,
187
+ "scr_metric_threshold_50": 0.3812317012845589,
188
+ "scr_dir2_threshold_50": 0.3812317012845589,
189
+ "scr_dir1_threshold_100": 0.1869918462801379,
190
+ "scr_metric_threshold_100": 0.3401759822968941,
191
+ "scr_dir2_threshold_100": 0.3401759822968941,
192
+ "scr_dir1_threshold_500": 0.47967482038652876,
193
+ "scr_metric_threshold_500": 0.023460285997451508,
194
+ "scr_dir2_threshold_500": 0.023460285997451508
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.04918033854781611,
199
+ "scr_metric_threshold_2": 0.4335936845163922,
200
+ "scr_dir2_threshold_2": 0.4335936845163922,
201
+ "scr_dir1_threshold_5": 0.07103830298111119,
202
+ "scr_metric_threshold_5": 0.4726563736912592,
203
+ "scr_dir2_threshold_5": 0.4726563736912592,
204
+ "scr_dir1_threshold_10": 0.1092894964580841,
205
+ "scr_metric_threshold_10": 0.5195312281721307,
206
+ "scr_dir2_threshold_10": 0.5195312281721307,
207
+ "scr_dir1_threshold_20": 0.07650271266233713,
208
+ "scr_metric_threshold_20": 0.5507812863797821,
209
+ "scr_dir2_threshold_20": 0.5507812863797821,
210
+ "scr_dir1_threshold_50": -0.26775965717237565,
211
+ "scr_metric_threshold_50": 0.6640624563442614,
212
+ "scr_dir2_threshold_50": 0.6640624563442614,
213
+ "scr_dir1_threshold_100": -0.1912569445100385,
214
+ "scr_metric_threshold_100": 0.7382811699644793,
215
+ "scr_dir2_threshold_100": 0.7382811699644793,
216
+ "scr_dir1_threshold_500": -0.027322374114521025,
217
+ "scr_metric_threshold_500": 0.8124998835846973,
218
+ "scr_dir2_threshold_500": 0.8124998835846973
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.2512818819907463,
223
+ "scr_metric_threshold_2": 0.056451601273808806,
224
+ "scr_dir2_threshold_2": 0.056451601273808806,
225
+ "scr_dir1_threshold_5": 0.28717946836934216,
226
+ "scr_metric_threshold_5": 0.08870972781349516,
227
+ "scr_dir2_threshold_5": 0.08870972781349516,
228
+ "scr_dir1_threshold_10": 0.3179486097903842,
229
+ "scr_metric_threshold_10": 0.11693564862104187,
230
+ "scr_dir2_threshold_10": 0.11693564862104187,
231
+ "scr_dir1_threshold_20": 0.338461472626031,
232
+ "scr_metric_threshold_20": 0.1572581866250075,
233
+ "scr_dir2_threshold_20": 0.1572581866250075,
234
+ "scr_dir1_threshold_50": 0.369230614047073,
235
+ "scr_metric_threshold_50": 0.3064517214444511,
236
+ "scr_dir2_threshold_50": 0.3064517214444511,
237
+ "scr_dir1_threshold_100": 0.35897433546167773,
238
+ "scr_metric_threshold_100": 0.39112900318452204,
239
+ "scr_dir2_threshold_100": 0.39112900318452204,
240
+ "scr_dir1_threshold_500": 0.5692307974459867,
241
+ "scr_metric_threshold_500": 0.5322581265396864,
242
+ "scr_dir2_threshold_500": 0.5322581265396864
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.2297297224732598,
247
+ "scr_metric_threshold_2": 0.20982135373301966,
248
+ "scr_dir2_threshold_2": 0.20982135373301966,
249
+ "scr_dir1_threshold_5": 0.29279277827985295,
250
+ "scr_metric_threshold_5": 0.3214286379516016,
251
+ "scr_dir2_threshold_5": 0.3214286379516016,
252
+ "scr_dir1_threshold_10": 0.32882888688058826,
253
+ "scr_metric_threshold_10": 0.3883928488274784,
254
+ "scr_dir2_threshold_10": 0.3883928488274784,
255
+ "scr_dir1_threshold_20": 0.38288278129230385,
256
+ "scr_metric_threshold_20": 0.49107141194067105,
257
+ "scr_dir2_threshold_20": 0.49107141194067105,
258
+ "scr_dir1_threshold_50": 0.5675675022593382,
259
+ "scr_metric_threshold_50": 0.6428570098110826,
260
+ "scr_dir2_threshold_50": 0.6428570098110826,
261
+ "scr_dir1_threshold_100": 0.6261261116131863,
262
+ "scr_metric_threshold_100": 0.5223212705792319,
263
+ "scr_dir2_threshold_100": 0.5223212705792319,
264
+ "scr_dir1_threshold_500": 0.6351350045186764,
265
+ "scr_metric_threshold_500": 0.6696429070351297,
266
+ "scr_dir2_threshold_500": 0.6696429070351297
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.1244634303822985,
271
+ "scr_metric_threshold_2": 0.1244634303822985,
272
+ "scr_dir2_threshold_2": 0.02380949677794379,
273
+ "scr_dir1_threshold_5": 0.15021467900985702,
274
+ "scr_metric_threshold_5": 0.15021467900985702,
275
+ "scr_dir2_threshold_5": 0.08571430193323373,
276
+ "scr_dir1_threshold_10": 0.16309004750975417,
277
+ "scr_metric_threshold_10": 0.16309004750975417,
278
+ "scr_dir2_threshold_10": 0.1619046348563358,
279
+ "scr_dir1_threshold_20": 0.19313308563727843,
280
+ "scr_metric_threshold_20": 0.19313308563727843,
281
+ "scr_dir2_threshold_20": 0.16666676127719673,
282
+ "scr_dir1_threshold_50": 0.2489271165784791,
283
+ "scr_metric_threshold_50": 0.2489271165784791,
284
+ "scr_dir2_threshold_50": 0.19523810064441124,
285
+ "scr_dir1_threshold_100": 0.29613731270586624,
286
+ "scr_metric_threshold_100": 0.29613731270586624,
287
+ "scr_dir2_threshold_100": 0.18095228904500887,
288
+ "scr_dir1_threshold_500": 0.4077253745882676,
289
+ "scr_metric_threshold_500": 0.4077253745882676,
290
+ "scr_dir2_threshold_500": 0.17142860386646747
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_3",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 65536,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "topk",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "e8426775-6429-4477-a0fd-3fb3792766b3",
73
+ "datetime_epoch_millis": 1740121033098,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.27531980516861043,
77
+ "scr_metric_threshold_2": 0.11625656598210435,
78
+ "scr_dir2_threshold_2": 0.11004615889121543,
79
+ "scr_dir1_threshold_5": 0.2505735568665822,
80
+ "scr_metric_threshold_5": 0.14549762572304692,
81
+ "scr_dir2_threshold_5": 0.14351265849183054,
82
+ "scr_dir1_threshold_10": 0.2756888819063589,
83
+ "scr_metric_threshold_10": 0.23586375851638186,
84
+ "scr_dir2_threshold_10": 0.2320266512414765,
85
+ "scr_dir1_threshold_20": -0.032264118106704306,
86
+ "scr_metric_threshold_20": 0.2742611620063659,
87
+ "scr_dir2_threshold_20": 0.27201816187874656,
88
+ "scr_dir1_threshold_50": -0.2643479797829179,
89
+ "scr_metric_threshold_50": 0.3475836590498552,
90
+ "scr_dir2_threshold_50": 0.32598390413533257,
91
+ "scr_dir1_threshold_100": -0.2446569626537838,
92
+ "scr_metric_threshold_100": 0.2594261901493591,
93
+ "scr_dir2_threshold_100": 0.23948699145483487,
94
+ "scr_dir1_threshold_500": -0.3111348856302424,
95
+ "scr_metric_threshold_500": 0.30072637389446877,
96
+ "scr_dir2_threshold_500": 0.2734756884260898
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.5079359372693867,
103
+ "scr_metric_threshold_2": 0.007371057746493725,
104
+ "scr_dir2_threshold_2": 0.007371057746493725,
105
+ "scr_dir1_threshold_5": 0.5396825246649839,
106
+ "scr_metric_threshold_5": 0.05405408176057155,
107
+ "scr_dir2_threshold_5": 0.05405408176057155,
108
+ "scr_dir1_threshold_10": 0.5714281659545642,
109
+ "scr_metric_threshold_10": 0.12039316213280908,
110
+ "scr_dir2_threshold_10": 0.12039316213280908,
111
+ "scr_dir1_threshold_20": 0.6031747533501614,
112
+ "scr_metric_threshold_20": 0.1719902734607945,
113
+ "scr_dir2_threshold_20": 0.1719902734607945,
114
+ "scr_dir1_threshold_50": 0.42857088793941894,
115
+ "scr_metric_threshold_50": 0.2555282933098703,
116
+ "scr_dir2_threshold_50": 0.2555282933098703,
117
+ "scr_dir1_threshold_100": 0.6031747533501614,
118
+ "scr_metric_threshold_100": 0.24078632426561816,
119
+ "scr_dir2_threshold_100": 0.24078632426561816,
120
+ "scr_dir1_threshold_500": 0.5079359372693867,
121
+ "scr_metric_threshold_500": 0.0638821099396514,
122
+ "scr_dir2_threshold_500": 0.0638821099396514
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.2323233053010818,
127
+ "scr_metric_threshold_2": 0.10764869459912667,
128
+ "scr_dir2_threshold_2": 0.10764869459912667,
129
+ "scr_dir1_threshold_5": 0.16161592443815084,
130
+ "scr_metric_threshold_5": 0.06515581549710699,
131
+ "scr_dir2_threshold_5": 0.06515581549710699,
132
+ "scr_dir1_threshold_10": 0.20202005606450307,
133
+ "scr_metric_threshold_10": 0.17847032862085146,
134
+ "scr_dir2_threshold_10": 0.17847032862085146,
135
+ "scr_dir1_threshold_20": 0.2828283193172076,
136
+ "scr_metric_threshold_20": 0.24362614411795844,
137
+ "scr_dir2_threshold_20": 0.24362614411795844,
138
+ "scr_dir1_threshold_50": -1.0202023668468052,
139
+ "scr_metric_threshold_50": 0.21246456436179043,
140
+ "scr_dir2_threshold_50": 0.21246456436179043,
141
+ "scr_dir1_threshold_100": -0.7878790615457234,
142
+ "scr_metric_threshold_100": 0.31444760928799126,
143
+ "scr_dir2_threshold_100": 0.31444760928799126,
144
+ "scr_dir1_threshold_500": -0.27272743692743406,
145
+ "scr_metric_threshold_500": 0.6855523907120088,
146
+ "scr_dir2_threshold_500": 0.6855523907120088
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.532258142045613,
151
+ "scr_metric_threshold_2": 0.03282830943478244,
152
+ "scr_dir2_threshold_2": 0.03282830943478244,
153
+ "scr_dir1_threshold_5": 0.46774185795438705,
154
+ "scr_metric_threshold_5": 0.058080808840993786,
155
+ "scr_dir2_threshold_5": 0.058080808840993786,
156
+ "scr_dir1_threshold_10": 0.5806446340898319,
157
+ "scr_metric_threshold_10": 0.23232323536397514,
158
+ "scr_dir2_threshold_10": 0.23232323536397514,
159
+ "scr_dir1_threshold_20": -2.290323759093317,
160
+ "scr_metric_threshold_20": 0.24242426522981353,
161
+ "scr_dir2_threshold_20": 0.24242426522981353,
162
+ "scr_dir1_threshold_50": -2.629034010230853,
163
+ "scr_metric_threshold_50": 0.3434344133714281,
164
+ "scr_dir2_threshold_50": 0.3434344133714281,
165
+ "scr_dir1_threshold_100": -3.4516149500041817,
166
+ "scr_metric_threshold_100": 0.09343433811304351,
167
+ "scr_dir2_threshold_100": 0.09343433811304351,
168
+ "scr_dir1_threshold_500": -4.290324720458917,
169
+ "scr_metric_threshold_500": -0.06818183870683217,
170
+ "scr_dir2_threshold_500": -0.06818183870683217
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.34146330826062055,
175
+ "scr_metric_threshold_2": 0.06158349108464734,
176
+ "scr_dir2_threshold_2": 0.06158349108464734,
177
+ "scr_dir1_threshold_5": 0.20325228072521878,
178
+ "scr_metric_threshold_5": 0.12023446826882572,
179
+ "scr_dir2_threshold_5": 0.12023446826882572,
180
+ "scr_dir1_threshold_10": 0.23577218043436754,
181
+ "scr_metric_threshold_10": 0.22580636703530657,
182
+ "scr_dir2_threshold_10": 0.22580636703530657,
183
+ "scr_dir1_threshold_20": 0.3658537176329886,
184
+ "scr_metric_threshold_20": 0.281524830319016,
185
+ "scr_dir2_threshold_20": 0.281524830319016,
186
+ "scr_dir1_threshold_50": 0.07317074352659772,
187
+ "scr_metric_threshold_50": 0.46041062535941957,
188
+ "scr_dir2_threshold_50": 0.46041062535941957,
189
+ "scr_dir1_threshold_100": 0.4390244611595863,
190
+ "scr_metric_threshold_100": -0.04398840768183351,
191
+ "scr_dir2_threshold_100": -0.04398840768183351,
192
+ "scr_dir1_threshold_500": 0.39837410193264383,
193
+ "scr_metric_threshold_500": -0.26392974691620213,
194
+ "scr_dir2_threshold_500": -0.26392974691620213
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.03825119347697291,
199
+ "scr_metric_threshold_2": 0.4414063154836078,
200
+ "scr_dir2_threshold_2": 0.4414063154836078,
201
+ "scr_dir1_threshold_5": -0.03278678379574697,
202
+ "scr_metric_threshold_5": 0.46874994179234863,
203
+ "scr_dir2_threshold_5": 0.46874994179234863,
204
+ "scr_dir1_threshold_10": -0.1202186415289273,
205
+ "scr_metric_threshold_10": 0.5078126309672156,
206
+ "scr_dir2_threshold_10": 0.5078126309672156,
207
+ "scr_dir1_threshold_20": -0.00546440968122594,
208
+ "scr_metric_threshold_20": 0.5585936845163922,
209
+ "scr_dir2_threshold_20": 0.5585936845163922,
210
+ "scr_dir1_threshold_50": -0.13661187057260513,
211
+ "scr_metric_threshold_50": 0.5859375436557386,
212
+ "scr_dir2_threshold_50": 0.5859375436557386,
213
+ "scr_dir1_threshold_100": -0.14754101564344832,
214
+ "scr_metric_threshold_100": 0.5546874854480871,
215
+ "scr_dir2_threshold_100": 0.5546874854480871,
216
+ "scr_dir1_threshold_500": 0.01092881936245188,
217
+ "scr_metric_threshold_500": 0.8593749708961743,
218
+ "scr_dir2_threshold_500": 0.8593749708961743
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.1999998777340575,
223
+ "scr_metric_threshold_2": 0.060483807005948444,
224
+ "scr_dir2_threshold_2": 0.060483807005948444,
225
+ "scr_dir1_threshold_5": 0.2564100212834439,
226
+ "scr_metric_threshold_5": 0.036290332271825994,
227
+ "scr_dir2_threshold_5": 0.036290332271825994,
228
+ "scr_dir1_threshold_10": 0.2615384662409978,
229
+ "scr_metric_threshold_10": 0.05241939554166917,
230
+ "scr_dir2_threshold_10": 0.05241939554166917,
231
+ "scr_dir1_threshold_20": 0.22051274056970427,
232
+ "scr_metric_threshold_20": 0.04435498407738989,
233
+ "scr_dir2_threshold_20": 0.04435498407738989,
234
+ "scr_dir1_threshold_50": 0.399999755468115,
235
+ "scr_metric_threshold_50": 0.06854845881151235,
236
+ "scr_dir2_threshold_50": 0.06854845881151235,
237
+ "scr_dir1_threshold_100": 0.4307692025540133,
238
+ "scr_metric_threshold_100": 0.16129039235714715,
239
+ "scr_dir2_threshold_100": 0.16129039235714715,
240
+ "scr_dir1_threshold_500": 0.4358973418467109,
241
+ "scr_metric_threshold_500": 0.31854833864087,
242
+ "scr_dir2_threshold_500": 0.31854833864087
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.23873861537874994,
247
+ "scr_metric_threshold_2": 0.107142790619827,
248
+ "scr_dir2_threshold_2": 0.107142790619827,
249
+ "scr_dir1_threshold_5": 0.29279277827985295,
250
+ "scr_metric_threshold_5": 0.24553570597033553,
251
+ "scr_dir2_threshold_5": 0.24553570597033553,
252
+ "scr_dir1_threshold_10": 0.31981972548571075,
253
+ "scr_metric_threshold_10": 0.415178479959405,
254
+ "scr_dir2_threshold_10": 0.415178479959405,
255
+ "scr_dir1_threshold_20": 0.3378377797860784,
256
+ "scr_metric_threshold_20": 0.4241072010647942,
257
+ "scr_dir2_threshold_20": 0.4241072010647942,
258
+ "scr_dir1_threshold_50": 0.47747749924688726,
259
+ "scr_metric_threshold_50": 0.5624998503231822,
260
+ "scr_dir2_threshold_50": 0.5624998503231822,
261
+ "scr_dir1_threshold_100": 0.6306305580659314,
262
+ "scr_metric_threshold_100": 0.42857142857142855,
263
+ "scr_dir2_threshold_100": 0.42857142857142855,
264
+ "scr_dir1_threshold_500": 0.45045055204102946,
265
+ "scr_metric_threshold_500": 0.54017844669789,
266
+ "scr_dir2_threshold_500": 0.54017844669789
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.11158806188240133,
271
+ "scr_metric_threshold_2": 0.11158806188240133,
272
+ "scr_dir2_threshold_2": 0.06190480515528994,
273
+ "scr_dir1_threshold_5": 0.11587985138236706,
274
+ "scr_metric_threshold_5": 0.11587985138236706,
275
+ "scr_dir2_threshold_5": 0.10000011353263609,
276
+ "scr_dir1_threshold_10": 0.15450646850982275,
277
+ "scr_metric_threshold_10": 0.15450646850982275,
278
+ "scr_dir2_threshold_10": 0.12380961031057988,
279
+ "scr_dir1_threshold_20": 0.2274679132647684,
280
+ "scr_metric_threshold_20": 0.2274679132647684,
281
+ "scr_dir2_threshold_20": 0.20952391224381361,
282
+ "scr_dir1_threshold_50": 0.2918455232059005,
283
+ "scr_metric_threshold_50": 0.2918455232059005,
284
+ "scr_dir2_threshold_50": 0.11904748388971893,
285
+ "scr_dir1_threshold_100": 0.3261803508333905,
286
+ "scr_metric_threshold_100": 0.3261803508333905,
287
+ "scr_dir2_threshold_100": 0.16666676127719673,
288
+ "scr_dir1_threshold_500": 0.2703863198921898,
289
+ "scr_metric_threshold_500": 0.2703863198921898,
290
+ "scr_dir2_threshold_500": 0.05238083614515829
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_4",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 65536,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "topk",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/scr/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "83470215-4900-437c-9356-3fa662b83aa0",
73
+ "datetime_epoch_millis": 1740120587099,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.2743706757570637,
77
+ "scr_metric_threshold_2": 0.1170832512694581,
78
+ "scr_dir2_threshold_2": 0.11230602380708869,
79
+ "scr_dir1_threshold_5": -0.026983470340403576,
80
+ "scr_metric_threshold_5": 0.2017416551001056,
81
+ "scr_dir2_threshold_5": 0.19218716819863155,
82
+ "scr_dir1_threshold_10": 0.06657564793496801,
83
+ "scr_metric_threshold_10": 0.16606669980864144,
84
+ "scr_dir2_threshold_10": 0.15346195525856013,
85
+ "scr_dir1_threshold_20": 0.2669406447863606,
86
+ "scr_metric_threshold_20": 0.2100337791782751,
87
+ "scr_dir2_threshold_20": 0.20033880813167307,
88
+ "scr_dir1_threshold_50": -0.40881801592369577,
89
+ "scr_metric_threshold_50": 0.3539019004315113,
90
+ "scr_dir2_threshold_50": 0.35480370035202374,
91
+ "scr_dir1_threshold_100": -1.0461834504326797,
92
+ "scr_metric_threshold_100": 0.2949556138670396,
93
+ "scr_dir2_threshold_100": 0.26846877517542184,
94
+ "scr_dir1_threshold_500": -1.2810619734502477,
95
+ "scr_metric_threshold_500": 0.45101377863826764,
96
+ "scr_dir2_threshold_500": 0.34383774589216004
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.5079359372693867,
103
+ "scr_metric_threshold_2": 0.0638821099396514,
104
+ "scr_dir2_threshold_2": 0.0638821099396514,
105
+ "scr_dir1_threshold_5": 0.46031747533501616,
106
+ "scr_metric_threshold_5": 0.13759224805838266,
107
+ "scr_dir2_threshold_5": 0.13759224805838266,
108
+ "scr_dir1_threshold_10": -0.2539689147407102,
109
+ "scr_metric_threshold_10": 0.13022119031188895,
110
+ "scr_dir2_threshold_10": 0.13022119031188895,
111
+ "scr_dir1_threshold_20": -0.14285727801514528,
112
+ "scr_metric_threshold_20": -0.16461906926556544,
113
+ "scr_dir2_threshold_20": -0.16461906926556544,
114
+ "scr_dir1_threshold_50": -0.30158832278109754,
115
+ "scr_metric_threshold_50": 0.21375921016096475,
116
+ "scr_dir2_threshold_50": 0.21375921016096475,
117
+ "scr_dir1_threshold_100": -0.3174611434258877,
118
+ "scr_metric_threshold_100": 0.08108119586522498,
119
+ "scr_dir2_threshold_100": 0.08108119586522498,
120
+ "scr_dir1_threshold_500": -1.2539698608467271,
121
+ "scr_metric_threshold_500": -0.041768936700170234,
122
+ "scr_dir2_threshold_500": -0.041768936700170234
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.31313096648652816,
127
+ "scr_metric_threshold_2": 0.09915005123804593,
128
+ "scr_dir2_threshold_2": 0.09915005123804593,
129
+ "scr_dir1_threshold_5": -1.9191929408818118,
130
+ "scr_metric_threshold_5": 0.11331451312374448,
131
+ "scr_dir2_threshold_5": 0.11331451312374448,
132
+ "scr_dir1_threshold_10": -1.1212123948790567,
133
+ "scr_metric_threshold_10": 0.06232299066064406,
134
+ "scr_dir2_threshold_10": 0.06232299066064406,
135
+ "scr_dir1_threshold_20": 0.40404011212900615,
136
+ "scr_metric_threshold_20": 0.15297456738930126,
137
+ "scr_dir2_threshold_20": 0.15297456738930126,
138
+ "scr_dir1_threshold_50": -0.21212154052153473,
139
+ "scr_metric_threshold_50": 0.3937677178191048,
140
+ "scr_dir2_threshold_50": 0.3937677178191048,
141
+ "scr_dir1_threshold_100": -1.0606064984731574,
142
+ "scr_metric_threshold_100": 0.4277621224117357,
143
+ "scr_dir2_threshold_100": 0.4277621224117357,
144
+ "scr_dir1_threshold_500": -0.7373740475295977,
145
+ "scr_metric_threshold_500": 0.6402266867735261,
146
+ "scr_dir2_threshold_500": 0.6402266867735261
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.4999995193171997,
151
+ "scr_metric_threshold_2": 0.03535352927204973,
152
+ "scr_dir2_threshold_2": 0.03535352927204973,
153
+ "scr_dir1_threshold_5": 0.3225804590905289,
154
+ "scr_metric_threshold_5": 0.20959595579503107,
155
+ "scr_dir2_threshold_5": 0.20959595579503107,
156
+ "scr_dir1_threshold_10": -0.11290373750104549,
157
+ "scr_metric_threshold_10": 0.18939404658012354,
158
+ "scr_dir2_threshold_10": 0.18939404658012354,
159
+ "scr_dir1_threshold_20": 0.40322557386316116,
160
+ "scr_metric_threshold_20": 0.2045455161204965,
161
+ "scr_dir2_threshold_20": 0.2045455161204965,
162
+ "scr_dir1_threshold_50": -3.032259584094014,
163
+ "scr_metric_threshold_50": 0.2803030143391305,
164
+ "scr_dir2_threshold_50": 0.2803030143391305,
165
+ "scr_dir1_threshold_100": -6.096776829550841,
166
+ "scr_metric_threshold_100": 0.03030308959751515,
167
+ "scr_dir2_threshold_100": 0.03030308959751515,
168
+ "scr_dir1_threshold_500": -6.354841965915744,
169
+ "scr_metric_threshold_500": -0.010101029865838383,
170
+ "scr_dir2_threshold_500": -0.010101029865838383
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.40650407685993106,
175
+ "scr_metric_threshold_2": 0.04692074678860275,
176
+ "scr_dir2_threshold_2": 0.04692074678860275,
177
+ "scr_dir1_threshold_5": 0.008129974927287195,
178
+ "scr_metric_threshold_5": 0.1114369265674188,
179
+ "scr_dir2_threshold_5": 0.1114369265674188,
180
+ "scr_dir1_threshold_10": 0.6747966415939539,
181
+ "scr_metric_threshold_10": 0.08211143797532962,
182
+ "scr_dir2_threshold_10": 0.08211143797532962,
183
+ "scr_dir1_threshold_20": 0.3333333333333333,
184
+ "scr_metric_threshold_20": 0.23460408353041323,
185
+ "scr_dir2_threshold_20": 0.23460408353041323,
186
+ "scr_dir1_threshold_50": 0.6504067168120923,
187
+ "scr_metric_threshold_50": 0.12316715696299442,
188
+ "scr_dir2_threshold_50": 0.12316715696299442,
189
+ "scr_dir1_threshold_100": 0.6260163074397243,
190
+ "scr_metric_threshold_100": -0.33431095449595616,
191
+ "scr_dir2_threshold_100": -0.33431095449595616,
192
+ "scr_dir1_threshold_500": 0.23577218043436754,
193
+ "scr_metric_threshold_500": 0.27565980251807803,
194
+ "scr_dir2_threshold_500": 0.27565980251807803
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": -0.03278678379574697,
199
+ "scr_metric_threshold_2": 0.3984374272404358,
200
+ "scr_dir2_threshold_2": 0.3984374272404358,
201
+ "scr_dir1_threshold_5": 0.10382508677685816,
202
+ "scr_metric_threshold_5": 0.4843749708961743,
203
+ "scr_dir2_threshold_5": 0.4843749708961743,
204
+ "scr_dir1_threshold_10": 0.3224044054014177,
205
+ "scr_metric_threshold_10": 0.30078128637978213,
206
+ "scr_dir2_threshold_10": 0.30078128637978213,
207
+ "scr_dir1_threshold_20": -0.03278678379574697,
208
+ "scr_metric_threshold_20": 0.48828116996447934,
209
+ "scr_dir2_threshold_20": 0.48828116996447934,
210
+ "scr_dir1_threshold_50": -0.6502732204840613,
211
+ "scr_metric_threshold_50": 0.7070313445874334,
212
+ "scr_dir2_threshold_50": 0.7070313445874334,
213
+ "scr_dir1_threshold_100": -1.1256830512101532,
214
+ "scr_metric_threshold_100": 0.6875001164153027,
215
+ "scr_dir2_threshold_100": 0.6875001164153027,
216
+ "scr_dir1_threshold_500": -1.584699327184176,
217
+ "scr_metric_threshold_500": 0.871093800931695,
218
+ "scr_dir2_threshold_500": 0.871093800931695
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.18461515419110838,
223
+ "scr_metric_threshold_2": 0.10887099681547797,
224
+ "scr_dir2_threshold_2": 0.10887099681547797,
225
+ "scr_dir1_threshold_5": 0.2358974641126534,
226
+ "scr_metric_threshold_5": 0.18951607282340924,
227
+ "scr_dir2_threshold_5": 0.18951607282340924,
228
+ "scr_dir1_threshold_10": 0.3333333333333333,
229
+ "scr_metric_threshold_10": 0.060483807005948444,
230
+ "scr_dir2_threshold_10": 0.060483807005948444,
231
+ "scr_dir1_threshold_20": 0.4102563397183665,
232
+ "scr_metric_threshold_20": 0.2137097878988163,
233
+ "scr_dir2_threshold_20": 0.2137097878988163,
234
+ "scr_dir1_threshold_50": -0.4358976475115671,
235
+ "scr_metric_threshold_50": 0.5040322057321397,
236
+ "scr_dir2_threshold_50": 0.5040322057321397,
237
+ "scr_dir1_threshold_100": -1.2256411855272582,
238
+ "scr_metric_threshold_100": 0.6411291233551644,
239
+ "scr_dir2_threshold_100": 0.6411291233551644,
240
+ "scr_dir1_threshold_500": -1.3384617782908872,
241
+ "scr_metric_threshold_500": 0.6572581866250075,
242
+ "scr_dir2_threshold_500": 0.6572581866250075
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.2297297224732598,
247
+ "scr_metric_threshold_2": 0.09821433560655832,
248
+ "scr_dir2_threshold_2": 0.09821433560655832,
249
+ "scr_dir1_threshold_5": 0.40090083559267153,
250
+ "scr_metric_threshold_5": 0.19642867121311663,
251
+ "scr_dir2_threshold_5": 0.19642867121311663,
252
+ "scr_dir1_threshold_10": 0.43693694419340684,
253
+ "scr_metric_threshold_10": 0.24999993347696986,
254
+ "scr_dir2_threshold_10": 0.24999993347696986,
255
+ "scr_dir1_threshold_20": 0.4684683378520097,
256
+ "scr_metric_threshold_20": 0.2589286545823591,
257
+ "scr_dir2_threshold_20": 0.2589286545823591,
258
+ "scr_dir1_threshold_50": 0.42792778279852933,
259
+ "scr_metric_threshold_50": 0.3258928654582359,
260
+ "scr_dir2_threshold_50": 0.3258928654582359,
261
+ "scr_dir1_threshold_100": 0.5045044464527451,
262
+ "scr_metric_threshold_100": 0.4999998669539397,
263
+ "scr_dir2_threshold_100": 0.4999998669539397,
264
+ "scr_dir1_threshold_500": 0.26576583107399515,
265
+ "scr_metric_threshold_500": 0.6964285381670564,
266
+ "scr_dir2_threshold_500": 0.6964285381670564
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.08583681325484281,
271
+ "scr_metric_threshold_2": 0.08583681325484281,
272
+ "scr_dir2_threshold_2": 0.04761899355588758,
273
+ "scr_dir1_threshold_5": 0.17167388232356773,
274
+ "scr_metric_threshold_5": 0.17167388232356773,
275
+ "scr_dir2_threshold_5": 0.09523798711177515,
276
+ "scr_dir1_threshold_10": 0.2532189060784448,
277
+ "scr_metric_threshold_10": 0.2532189060784448,
278
+ "scr_dir2_threshold_10": 0.1523809496777944,
279
+ "scr_dir1_threshold_20": 0.2918455232059005,
280
+ "scr_metric_threshold_20": 0.2918455232059005,
281
+ "scr_dir2_threshold_20": 0.21428575483308432,
282
+ "scr_dir1_threshold_50": 0.283261688392087,
283
+ "scr_metric_threshold_50": 0.283261688392087,
284
+ "scr_dir2_threshold_50": 0.2904760877561864,
285
+ "scr_dir1_threshold_100": 0.3261803508333905,
286
+ "scr_metric_threshold_100": 0.3261803508333905,
287
+ "scr_dir2_threshold_100": 0.11428564130044823,
288
+ "scr_dir1_threshold_500": 0.5193131806567868,
289
+ "scr_metric_threshold_500": 0.5193131806567868,
290
+ "scr_dir2_threshold_500": -0.33809508131207394
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_5",
296
+ "sae_lens_version": "5.4.2",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 65536,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "topk",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "e0adcf36-8676-40fc-8840-fd7c2720822d",
30
+ "datetime_epoch_millis": 1740086005480,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9588312957435847,
34
+ "llm_top_1_test_accuracy": 0.6504687499999999,
35
+ "llm_top_2_test_accuracy": 0.7214187500000001,
36
+ "llm_top_5_test_accuracy": 0.7812625,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9600562900304794,
44
+ "sae_top_1_test_accuracy": 0.82594375,
45
+ "sae_top_2_test_accuracy": 0.8600562499999999,
46
+ "sae_top_5_test_accuracy": 0.8994500000000001,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9684000372886657,
65
+ "sae_top_1_test_accuracy": 0.8145999999999999,
66
+ "sae_top_2_test_accuracy": 0.8417999999999999,
67
+ "sae_top_5_test_accuracy": 0.9124000000000001,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9542000532150269,
76
+ "llm_top_1_test_accuracy": 0.6686,
77
+ "llm_top_2_test_accuracy": 0.7194,
78
+ "llm_top_5_test_accuracy": 0.763,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9542000412940979,
84
+ "sae_top_1_test_accuracy": 0.7876,
85
+ "sae_top_2_test_accuracy": 0.8343999999999999,
86
+ "sae_top_5_test_accuracy": 0.8808,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9332000374794006,
95
+ "llm_top_1_test_accuracy": 0.6826000000000001,
96
+ "llm_top_2_test_accuracy": 0.7456,
97
+ "llm_top_5_test_accuracy": 0.7732,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9312000513076782,
103
+ "sae_top_1_test_accuracy": 0.8114000000000001,
104
+ "sae_top_2_test_accuracy": 0.8432000000000001,
105
+ "sae_top_5_test_accuracy": 0.8654,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9140000343322754,
114
+ "llm_top_1_test_accuracy": 0.6006,
115
+ "llm_top_2_test_accuracy": 0.6432,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9210000514984131,
122
+ "sae_top_1_test_accuracy": 0.7072,
123
+ "sae_top_2_test_accuracy": 0.8009999999999999,
124
+ "sae_top_5_test_accuracy": 0.8416,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9800000190734863,
141
+ "sae_top_1_test_accuracy": 0.935,
142
+ "sae_top_2_test_accuracy": 0.94,
143
+ "sae_top_5_test_accuracy": 0.951,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9708000421524048,
152
+ "llm_top_1_test_accuracy": 0.6612,
153
+ "llm_top_2_test_accuracy": 0.6961999999999999,
154
+ "llm_top_5_test_accuracy": 0.7626,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9730000495910645,
160
+ "sae_top_1_test_accuracy": 0.7988,
161
+ "sae_top_2_test_accuracy": 0.808,
162
+ "sae_top_5_test_accuracy": 0.8657999999999999,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9512500613927841,
171
+ "llm_top_1_test_accuracy": 0.6367499999999999,
172
+ "llm_top_2_test_accuracy": 0.76075,
173
+ "llm_top_5_test_accuracy": 0.8255,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9532500505447388,
179
+ "sae_top_1_test_accuracy": 0.7927500000000001,
180
+ "sae_top_2_test_accuracy": 0.8422499999999999,
181
+ "sae_top_5_test_accuracy": 0.881,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000434875489,
190
+ "llm_top_1_test_accuracy": 0.6411999999999999,
191
+ "llm_top_2_test_accuracy": 0.7868,
192
+ "llm_top_5_test_accuracy": 0.9,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9994000196456909,
198
+ "sae_top_1_test_accuracy": 0.9602,
199
+ "sae_top_2_test_accuracy": 0.9698,
200
+ "sae_top_5_test_accuracy": 0.9975999999999999,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_0",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 16384,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "standard_april_update",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9550000429153442,
240
+ "1": 0.9610000252723694,
241
+ "2": 0.9520000219345093,
242
+ "6": 0.9920000433921814,
243
+ "9": 0.9820000529289246
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.618,
275
+ "1": 0.665,
276
+ "2": 0.864,
277
+ "6": 0.983,
278
+ "9": 0.943
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.585,
282
+ "1": 0.819,
283
+ "2": 0.883,
284
+ "6": 0.983,
285
+ "9": 0.939
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.856,
289
+ "1": 0.88,
290
+ "2": 0.884,
291
+ "6": 0.985,
292
+ "9": 0.957
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9630000591278076,
298
+ "13": 0.9550000429153442,
299
+ "14": 0.9520000219345093,
300
+ "18": 0.9330000281333923,
301
+ "19": 0.968000054359436
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.968000054359436,
305
+ "13": 0.9500000476837158,
306
+ "14": 0.956000030040741,
307
+ "18": 0.9350000619888306,
308
+ "19": 0.9620000720024109
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.545,
312
+ "13": 0.666,
313
+ "14": 0.649,
314
+ "18": 0.693,
315
+ "19": 0.79
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.692,
319
+ "13": 0.724,
320
+ "14": 0.68,
321
+ "18": 0.732,
322
+ "19": 0.769
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.793,
326
+ "13": 0.751,
327
+ "14": 0.718,
328
+ "18": 0.723,
329
+ "19": 0.83
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.731,
333
+ "13": 0.764,
334
+ "14": 0.869,
335
+ "18": 0.727,
336
+ "19": 0.847
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.872,
340
+ "13": 0.701,
341
+ "14": 0.878,
342
+ "18": 0.876,
343
+ "19": 0.845
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.937,
347
+ "13": 0.865,
348
+ "14": 0.882,
349
+ "18": 0.87,
350
+ "19": 0.85
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.956000030040741,
356
+ "21": 0.9270000457763672,
357
+ "22": 0.9180000424385071,
358
+ "25": 0.9620000720024109,
359
+ "26": 0.893000066280365
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9580000638961792,
363
+ "21": 0.9240000247955322,
364
+ "22": 0.9200000166893005,
365
+ "25": 0.9630000591278076,
366
+ "26": 0.9010000228881836
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.696,
370
+ "21": 0.757,
371
+ "22": 0.637,
372
+ "25": 0.692,
373
+ "26": 0.631
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.818,
377
+ "21": 0.774,
378
+ "22": 0.688,
379
+ "25": 0.762,
380
+ "26": 0.686
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.862,
384
+ "21": 0.792,
385
+ "22": 0.748,
386
+ "25": 0.791,
387
+ "26": 0.673
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.858,
391
+ "21": 0.728,
392
+ "22": 0.871,
393
+ "25": 0.898,
394
+ "26": 0.702
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.887,
398
+ "21": 0.789,
399
+ "22": 0.883,
400
+ "25": 0.895,
401
+ "26": 0.762
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.923,
405
+ "21": 0.844,
406
+ "22": 0.892,
407
+ "25": 0.882,
408
+ "26": 0.786
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9490000605583191,
414
+ "2": 0.9320000410079956,
415
+ "3": 0.9230000376701355,
416
+ "5": 0.9260000586509705,
417
+ "6": 0.8750000596046448
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.940000057220459,
421
+ "2": 0.9340000152587891,
422
+ "3": 0.9200000166893005,
423
+ "5": 0.9150000214576721,
424
+ "6": 0.8610000610351562
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.662,
428
+ "2": 0.599,
429
+ "3": 0.592,
430
+ "5": 0.57,
431
+ "6": 0.58
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.748,
435
+ "2": 0.642,
436
+ "3": 0.6,
437
+ "5": 0.625,
438
+ "6": 0.601
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.764,
442
+ "2": 0.646,
443
+ "3": 0.639,
444
+ "5": 0.638,
445
+ "6": 0.677
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.86,
449
+ "2": 0.619,
450
+ "3": 0.656,
451
+ "5": 0.646,
452
+ "6": 0.755
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.873,
456
+ "2": 0.859,
457
+ "3": 0.709,
458
+ "5": 0.796,
459
+ "6": 0.768
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.917,
463
+ "2": 0.873,
464
+ "3": 0.771,
465
+ "5": 0.873,
466
+ "6": 0.774
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9800000190734863,
472
+ "5.0": 0.9800000190734863
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9800000190734863,
476
+ "5.0": 0.9820000529289246
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.935,
492
+ "5.0": 0.935
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.94,
496
+ "5.0": 0.94
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.951,
500
+ "5.0": 0.951
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9580000638961792,
506
+ "Python": 0.9910000562667847,
507
+ "HTML": 0.9910000562667847,
508
+ "Java": 0.9700000286102295,
509
+ "PHP": 0.9550000429153442
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9580000638961792,
513
+ "Python": 0.9850000739097595,
514
+ "HTML": 0.9890000224113464,
515
+ "Java": 0.9660000205039978,
516
+ "PHP": 0.956000030040741
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.672,
520
+ "Python": 0.643,
521
+ "HTML": 0.788,
522
+ "Java": 0.616,
523
+ "PHP": 0.587
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.653,
527
+ "Python": 0.675,
528
+ "HTML": 0.826,
529
+ "Java": 0.685,
530
+ "PHP": 0.642
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.758,
534
+ "Python": 0.731,
535
+ "HTML": 0.898,
536
+ "Java": 0.722,
537
+ "PHP": 0.704
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.631,
541
+ "Python": 0.92,
542
+ "HTML": 0.893,
543
+ "Java": 0.657,
544
+ "PHP": 0.893
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.629,
548
+ "Python": 0.935,
549
+ "HTML": 0.922,
550
+ "Java": 0.644,
551
+ "PHP": 0.91
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.73,
555
+ "Python": 0.947,
556
+ "HTML": 0.935,
557
+ "Java": 0.811,
558
+ "PHP": 0.906
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9470000267028809,
564
+ "1": 0.9900000691413879,
565
+ "2": 0.9300000667572021,
566
+ "3": 0.9460000395774841
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.940000057220459,
570
+ "1": 0.9850000739097595,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9500000476837158
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.568,
576
+ "1": 0.671,
577
+ "2": 0.667,
578
+ "3": 0.641
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.802,
582
+ "1": 0.802,
583
+ "2": 0.701,
584
+ "3": 0.738
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.813,
588
+ "1": 0.884,
589
+ "2": 0.762,
590
+ "3": 0.843
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.718,
594
+ "1": 0.971,
595
+ "2": 0.793,
596
+ "3": 0.689
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.845,
600
+ "1": 0.971,
601
+ "2": 0.842,
602
+ "3": 0.711
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.866,
606
+ "1": 0.981,
607
+ "2": 0.848,
608
+ "3": 0.829
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.9980000257492065,
614
+ "fr": 1.0,
615
+ "de": 1.0,
616
+ "es": 0.999000072479248,
617
+ "nl": 1.0
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 0.999000072479248,
621
+ "fr": 0.999000072479248,
622
+ "de": 1.0,
623
+ "es": 1.0,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.732,
628
+ "fr": 0.587,
629
+ "de": 0.759,
630
+ "es": 0.489,
631
+ "nl": 0.639
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.834,
635
+ "fr": 0.604,
636
+ "de": 0.84,
637
+ "es": 0.907,
638
+ "nl": 0.749
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.889,
642
+ "fr": 0.927,
643
+ "de": 0.834,
644
+ "es": 0.977,
645
+ "nl": 0.873
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.999,
649
+ "fr": 0.992,
650
+ "de": 0.91,
651
+ "es": 0.903,
652
+ "nl": 0.997
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.999,
656
+ "fr": 0.993,
657
+ "de": 0.902,
658
+ "es": 0.957,
659
+ "nl": 0.998
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.998,
663
+ "fr": 0.998,
664
+ "de": 0.995,
665
+ "es": 0.998,
666
+ "nl": 0.999
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "895ffd8f-dfef-4e0f-9629-e2b5c78981a4",
30
+ "datetime_epoch_millis": 1740085746659,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9588312957435847,
34
+ "llm_top_1_test_accuracy": 0.6504687499999999,
35
+ "llm_top_2_test_accuracy": 0.7214187500000001,
36
+ "llm_top_5_test_accuracy": 0.7812625,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.960212542116642,
44
+ "sae_top_1_test_accuracy": 0.8354937499999999,
45
+ "sae_top_2_test_accuracy": 0.8662187500000001,
46
+ "sae_top_5_test_accuracy": 0.9031,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9698000431060791,
65
+ "sae_top_1_test_accuracy": 0.8148,
66
+ "sae_top_2_test_accuracy": 0.8844,
67
+ "sae_top_5_test_accuracy": 0.9256,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9542000532150269,
76
+ "llm_top_1_test_accuracy": 0.6686,
77
+ "llm_top_2_test_accuracy": 0.7194,
78
+ "llm_top_5_test_accuracy": 0.763,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9566000461578369,
84
+ "sae_top_1_test_accuracy": 0.7938,
85
+ "sae_top_2_test_accuracy": 0.8140000000000001,
86
+ "sae_top_5_test_accuracy": 0.8708,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9332000374794006,
95
+ "llm_top_1_test_accuracy": 0.6826000000000001,
96
+ "llm_top_2_test_accuracy": 0.7456,
97
+ "llm_top_5_test_accuracy": 0.7732,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9334000468254089,
103
+ "sae_top_1_test_accuracy": 0.8116,
104
+ "sae_top_2_test_accuracy": 0.8458,
105
+ "sae_top_5_test_accuracy": 0.8672000000000001,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9140000343322754,
114
+ "llm_top_1_test_accuracy": 0.6006,
115
+ "llm_top_2_test_accuracy": 0.6432,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9208000540733338,
122
+ "sae_top_1_test_accuracy": 0.7614,
123
+ "sae_top_2_test_accuracy": 0.8004000000000001,
124
+ "sae_top_5_test_accuracy": 0.8416,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.979500025510788,
141
+ "sae_top_1_test_accuracy": 0.949,
142
+ "sae_top_2_test_accuracy": 0.949,
143
+ "sae_top_5_test_accuracy": 0.951,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9708000421524048,
152
+ "llm_top_1_test_accuracy": 0.6612,
153
+ "llm_top_2_test_accuracy": 0.6961999999999999,
154
+ "llm_top_5_test_accuracy": 0.7626,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9690000534057617,
160
+ "sae_top_1_test_accuracy": 0.8091999999999999,
161
+ "sae_top_2_test_accuracy": 0.8448,
162
+ "sae_top_5_test_accuracy": 0.8916000000000001,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9512500613927841,
171
+ "llm_top_1_test_accuracy": 0.6367499999999999,
172
+ "llm_top_2_test_accuracy": 0.76075,
173
+ "llm_top_5_test_accuracy": 0.8255,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9530000388622284,
179
+ "sae_top_1_test_accuracy": 0.78975,
180
+ "sae_top_2_test_accuracy": 0.83675,
181
+ "sae_top_5_test_accuracy": 0.882,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000434875489,
190
+ "llm_top_1_test_accuracy": 0.6411999999999999,
191
+ "llm_top_2_test_accuracy": 0.7868,
192
+ "llm_top_5_test_accuracy": 0.9,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9996000289916992,
198
+ "sae_top_1_test_accuracy": 0.9544,
199
+ "sae_top_2_test_accuracy": 0.9545999999999999,
200
+ "sae_top_5_test_accuracy": 0.9949999999999999,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_1",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 16384,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "standard_april_update",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9500000476837158,
240
+ "1": 0.9700000286102295,
241
+ "2": 0.9540000557899475,
242
+ "6": 0.9890000224113464,
243
+ "9": 0.9860000610351562
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.653,
275
+ "1": 0.65,
276
+ "2": 0.868,
277
+ "6": 0.977,
278
+ "9": 0.926
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.876,
282
+ "1": 0.756,
283
+ "2": 0.879,
284
+ "6": 0.977,
285
+ "9": 0.934
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.891,
289
+ "1": 0.9,
290
+ "2": 0.896,
291
+ "6": 0.987,
292
+ "9": 0.954
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9620000720024109,
298
+ "13": 0.9600000381469727,
299
+ "14": 0.9540000557899475,
300
+ "18": 0.9330000281333923,
301
+ "19": 0.9740000367164612
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.968000054359436,
305
+ "13": 0.9500000476837158,
306
+ "14": 0.956000030040741,
307
+ "18": 0.9350000619888306,
308
+ "19": 0.9620000720024109
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.545,
312
+ "13": 0.666,
313
+ "14": 0.649,
314
+ "18": 0.693,
315
+ "19": 0.79
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.692,
319
+ "13": 0.724,
320
+ "14": 0.68,
321
+ "18": 0.732,
322
+ "19": 0.769
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.793,
326
+ "13": 0.751,
327
+ "14": 0.718,
328
+ "18": 0.723,
329
+ "19": 0.83
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.858,
333
+ "13": 0.691,
334
+ "14": 0.856,
335
+ "18": 0.726,
336
+ "19": 0.838
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.863,
340
+ "13": 0.735,
341
+ "14": 0.895,
342
+ "18": 0.728,
343
+ "19": 0.849
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.946,
347
+ "13": 0.768,
348
+ "14": 0.893,
349
+ "18": 0.873,
350
+ "19": 0.874
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9610000252723694,
356
+ "21": 0.9250000715255737,
357
+ "22": 0.9180000424385071,
358
+ "25": 0.968000054359436,
359
+ "26": 0.8950000405311584
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9580000638961792,
363
+ "21": 0.9240000247955322,
364
+ "22": 0.9200000166893005,
365
+ "25": 0.9630000591278076,
366
+ "26": 0.9010000228881836
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.696,
370
+ "21": 0.757,
371
+ "22": 0.637,
372
+ "25": 0.692,
373
+ "26": 0.631
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.818,
377
+ "21": 0.774,
378
+ "22": 0.688,
379
+ "25": 0.762,
380
+ "26": 0.686
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.862,
384
+ "21": 0.792,
385
+ "22": 0.748,
386
+ "25": 0.791,
387
+ "26": 0.673
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.86,
391
+ "21": 0.773,
392
+ "22": 0.86,
393
+ "25": 0.876,
394
+ "26": 0.689
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.904,
398
+ "21": 0.807,
399
+ "22": 0.895,
400
+ "25": 0.886,
401
+ "26": 0.737
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.92,
405
+ "21": 0.856,
406
+ "22": 0.889,
407
+ "25": 0.896,
408
+ "26": 0.775
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9540000557899475,
414
+ "2": 0.937000036239624,
415
+ "3": 0.9270000457763672,
416
+ "5": 0.921000063419342,
417
+ "6": 0.8650000691413879
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.940000057220459,
421
+ "2": 0.9340000152587891,
422
+ "3": 0.9200000166893005,
423
+ "5": 0.9150000214576721,
424
+ "6": 0.8610000610351562
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.662,
428
+ "2": 0.599,
429
+ "3": 0.592,
430
+ "5": 0.57,
431
+ "6": 0.58
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.748,
435
+ "2": 0.642,
436
+ "3": 0.6,
437
+ "5": 0.625,
438
+ "6": 0.601
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.764,
442
+ "2": 0.646,
443
+ "3": 0.639,
444
+ "5": 0.638,
445
+ "6": 0.677
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.916,
449
+ "2": 0.773,
450
+ "3": 0.61,
451
+ "5": 0.759,
452
+ "6": 0.749
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.923,
456
+ "2": 0.846,
457
+ "3": 0.689,
458
+ "5": 0.773,
459
+ "6": 0.771
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.931,
463
+ "2": 0.861,
464
+ "3": 0.758,
465
+ "5": 0.865,
466
+ "6": 0.793
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9790000319480896,
472
+ "5.0": 0.9800000190734863
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9800000190734863,
476
+ "5.0": 0.9820000529289246
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.949,
492
+ "5.0": 0.949
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.949,
496
+ "5.0": 0.949
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.951,
500
+ "5.0": 0.951
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9580000638961792,
506
+ "Python": 0.9830000400543213,
507
+ "HTML": 0.9860000610351562,
508
+ "Java": 0.9670000672340393,
509
+ "PHP": 0.9510000348091125
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9580000638961792,
513
+ "Python": 0.9850000739097595,
514
+ "HTML": 0.9890000224113464,
515
+ "Java": 0.9660000205039978,
516
+ "PHP": 0.956000030040741
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.672,
520
+ "Python": 0.643,
521
+ "HTML": 0.788,
522
+ "Java": 0.616,
523
+ "PHP": 0.587
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.653,
527
+ "Python": 0.675,
528
+ "HTML": 0.826,
529
+ "Java": 0.685,
530
+ "PHP": 0.642
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.758,
534
+ "Python": 0.731,
535
+ "HTML": 0.898,
536
+ "Java": 0.722,
537
+ "PHP": 0.704
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.636,
541
+ "Python": 0.917,
542
+ "HTML": 0.876,
543
+ "Java": 0.706,
544
+ "PHP": 0.911
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.726,
548
+ "Python": 0.917,
549
+ "HTML": 0.887,
550
+ "Java": 0.783,
551
+ "PHP": 0.911
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.757,
555
+ "Python": 0.967,
556
+ "HTML": 0.953,
557
+ "Java": 0.865,
558
+ "PHP": 0.916
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9380000233650208,
564
+ "1": 0.9890000224113464,
565
+ "2": 0.9310000538825989,
566
+ "3": 0.9540000557899475
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.940000057220459,
570
+ "1": 0.9850000739097595,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9500000476837158
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.568,
576
+ "1": 0.671,
577
+ "2": 0.667,
578
+ "3": 0.641
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.802,
582
+ "1": 0.802,
583
+ "2": 0.701,
584
+ "3": 0.738
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.813,
588
+ "1": 0.884,
589
+ "2": 0.762,
590
+ "3": 0.843
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.853,
594
+ "1": 0.862,
595
+ "2": 0.726,
596
+ "3": 0.718
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.868,
600
+ "1": 0.942,
601
+ "2": 0.834,
602
+ "3": 0.703
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.864,
606
+ "1": 0.96,
607
+ "2": 0.842,
608
+ "3": 0.862
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.999000072479248,
614
+ "fr": 1.0,
615
+ "de": 1.0,
616
+ "es": 0.999000072479248,
617
+ "nl": 1.0
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 0.999000072479248,
621
+ "fr": 0.999000072479248,
622
+ "de": 1.0,
623
+ "es": 1.0,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.732,
628
+ "fr": 0.587,
629
+ "de": 0.759,
630
+ "es": 0.489,
631
+ "nl": 0.639
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.834,
635
+ "fr": 0.604,
636
+ "de": 0.84,
637
+ "es": 0.907,
638
+ "nl": 0.749
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.889,
642
+ "fr": 0.927,
643
+ "de": 0.834,
644
+ "es": 0.977,
645
+ "nl": 0.873
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.999,
649
+ "fr": 0.99,
650
+ "de": 0.918,
651
+ "es": 0.946,
652
+ "nl": 0.919
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.999,
656
+ "fr": 0.991,
657
+ "de": 0.917,
658
+ "es": 0.96,
659
+ "nl": 0.906
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.995,
663
+ "fr": 0.991,
664
+ "de": 0.995,
665
+ "es": 0.994,
666
+ "nl": 1.0
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "77eb61f6-a3ac-4e04-b23f-0f16bdfcb296",
30
+ "datetime_epoch_millis": 1740086099814,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9588312957435847,
34
+ "llm_top_1_test_accuracy": 0.6504687499999999,
35
+ "llm_top_2_test_accuracy": 0.7214187500000001,
36
+ "llm_top_5_test_accuracy": 0.7812625,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9590937919914722,
44
+ "sae_top_1_test_accuracy": 0.7994374999999999,
45
+ "sae_top_2_test_accuracy": 0.8531625,
46
+ "sae_top_5_test_accuracy": 0.8944437500000001,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.966800045967102,
65
+ "sae_top_1_test_accuracy": 0.8177999999999999,
66
+ "sae_top_2_test_accuracy": 0.8380000000000001,
67
+ "sae_top_5_test_accuracy": 0.9085999999999999,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9542000532150269,
76
+ "llm_top_1_test_accuracy": 0.6686,
77
+ "llm_top_2_test_accuracy": 0.7194,
78
+ "llm_top_5_test_accuracy": 0.763,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.955400037765503,
84
+ "sae_top_1_test_accuracy": 0.7726,
85
+ "sae_top_2_test_accuracy": 0.8,
86
+ "sae_top_5_test_accuracy": 0.8831999999999999,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9332000374794006,
95
+ "llm_top_1_test_accuracy": 0.6826000000000001,
96
+ "llm_top_2_test_accuracy": 0.7456,
97
+ "llm_top_5_test_accuracy": 0.7732,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9342000484466553,
103
+ "sae_top_1_test_accuracy": 0.7976,
104
+ "sae_top_2_test_accuracy": 0.8497999999999999,
105
+ "sae_top_5_test_accuracy": 0.8697999999999999,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9140000343322754,
114
+ "llm_top_1_test_accuracy": 0.6006,
115
+ "llm_top_2_test_accuracy": 0.6432,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9176000356674194,
122
+ "sae_top_1_test_accuracy": 0.7346,
123
+ "sae_top_2_test_accuracy": 0.748,
124
+ "sae_top_5_test_accuracy": 0.8295999999999999,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9770000278949738,
141
+ "sae_top_1_test_accuracy": 0.899,
142
+ "sae_top_2_test_accuracy": 0.922,
143
+ "sae_top_5_test_accuracy": 0.934,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9708000421524048,
152
+ "llm_top_1_test_accuracy": 0.6612,
153
+ "llm_top_2_test_accuracy": 0.6961999999999999,
154
+ "llm_top_5_test_accuracy": 0.7626,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9714000582695007,
160
+ "sae_top_1_test_accuracy": 0.8156000000000001,
161
+ "sae_top_2_test_accuracy": 0.849,
162
+ "sae_top_5_test_accuracy": 0.8626000000000001,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9512500613927841,
171
+ "llm_top_1_test_accuracy": 0.6367499999999999,
172
+ "llm_top_2_test_accuracy": 0.76075,
173
+ "llm_top_5_test_accuracy": 0.8255,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9507500529289246,
179
+ "sae_top_1_test_accuracy": 0.6825,
180
+ "sae_top_2_test_accuracy": 0.8315,
181
+ "sae_top_5_test_accuracy": 0.87275,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000434875489,
190
+ "llm_top_1_test_accuracy": 0.6411999999999999,
191
+ "llm_top_2_test_accuracy": 0.7868,
192
+ "llm_top_5_test_accuracy": 0.9,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9996000289916992,
198
+ "sae_top_1_test_accuracy": 0.8757999999999999,
199
+ "sae_top_2_test_accuracy": 0.9869999999999999,
200
+ "sae_top_5_test_accuracy": 0.9949999999999999,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_2",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 16384,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "standard_april_update",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9500000476837158,
240
+ "1": 0.9650000333786011,
241
+ "2": 0.9530000686645508,
242
+ "6": 0.9860000610351562,
243
+ "9": 0.9800000190734863
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.62,
275
+ "1": 0.688,
276
+ "2": 0.877,
277
+ "6": 0.982,
278
+ "9": 0.922
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.645,
282
+ "1": 0.764,
283
+ "2": 0.878,
284
+ "6": 0.982,
285
+ "9": 0.921
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.847,
289
+ "1": 0.872,
290
+ "2": 0.897,
291
+ "6": 0.986,
292
+ "9": 0.941
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9660000205039978,
298
+ "13": 0.956000030040741,
299
+ "14": 0.9530000686645508,
300
+ "18": 0.9420000314712524,
301
+ "19": 0.9600000381469727
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.968000054359436,
305
+ "13": 0.9500000476837158,
306
+ "14": 0.956000030040741,
307
+ "18": 0.9350000619888306,
308
+ "19": 0.9620000720024109
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.545,
312
+ "13": 0.666,
313
+ "14": 0.649,
314
+ "18": 0.693,
315
+ "19": 0.79
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.692,
319
+ "13": 0.724,
320
+ "14": 0.68,
321
+ "18": 0.732,
322
+ "19": 0.769
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.793,
326
+ "13": 0.751,
327
+ "14": 0.718,
328
+ "18": 0.723,
329
+ "19": 0.83
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.734,
333
+ "13": 0.691,
334
+ "14": 0.855,
335
+ "18": 0.736,
336
+ "19": 0.847
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.865,
340
+ "13": 0.698,
341
+ "14": 0.859,
342
+ "18": 0.729,
343
+ "19": 0.849
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.952,
347
+ "13": 0.848,
348
+ "14": 0.879,
349
+ "18": 0.887,
350
+ "19": 0.85
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9610000252723694,
356
+ "21": 0.9140000343322754,
357
+ "22": 0.9270000457763672,
358
+ "25": 0.971000075340271,
359
+ "26": 0.8980000615119934
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9580000638961792,
363
+ "21": 0.9240000247955322,
364
+ "22": 0.9200000166893005,
365
+ "25": 0.9630000591278076,
366
+ "26": 0.9010000228881836
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.696,
370
+ "21": 0.757,
371
+ "22": 0.637,
372
+ "25": 0.692,
373
+ "26": 0.631
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.818,
377
+ "21": 0.774,
378
+ "22": 0.688,
379
+ "25": 0.762,
380
+ "26": 0.686
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.862,
384
+ "21": 0.792,
385
+ "22": 0.748,
386
+ "25": 0.791,
387
+ "26": 0.673
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.88,
391
+ "21": 0.753,
392
+ "22": 0.76,
393
+ "25": 0.89,
394
+ "26": 0.705
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.911,
398
+ "21": 0.816,
399
+ "22": 0.884,
400
+ "25": 0.893,
401
+ "26": 0.745
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.941,
405
+ "21": 0.847,
406
+ "22": 0.892,
407
+ "25": 0.896,
408
+ "26": 0.773
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9470000267028809,
414
+ "2": 0.9320000410079956,
415
+ "3": 0.9230000376701355,
416
+ "5": 0.9180000424385071,
417
+ "6": 0.8680000305175781
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.940000057220459,
421
+ "2": 0.9340000152587891,
422
+ "3": 0.9200000166893005,
423
+ "5": 0.9150000214576721,
424
+ "6": 0.8610000610351562
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.662,
428
+ "2": 0.599,
429
+ "3": 0.592,
430
+ "5": 0.57,
431
+ "6": 0.58
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.748,
435
+ "2": 0.642,
436
+ "3": 0.6,
437
+ "5": 0.625,
438
+ "6": 0.601
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.764,
442
+ "2": 0.646,
443
+ "3": 0.639,
444
+ "5": 0.638,
445
+ "6": 0.677
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.872,
449
+ "2": 0.605,
450
+ "3": 0.686,
451
+ "5": 0.784,
452
+ "6": 0.726
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.892,
456
+ "2": 0.63,
457
+ "3": 0.695,
458
+ "5": 0.787,
459
+ "6": 0.736
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.909,
463
+ "2": 0.827,
464
+ "3": 0.782,
465
+ "5": 0.861,
466
+ "6": 0.769
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9790000319480896,
472
+ "5.0": 0.9750000238418579
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9800000190734863,
476
+ "5.0": 0.9820000529289246
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.899,
492
+ "5.0": 0.899
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.922,
496
+ "5.0": 0.922
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.934,
500
+ "5.0": 0.934
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9570000171661377,
506
+ "Python": 0.9850000739097595,
507
+ "HTML": 0.9850000739097595,
508
+ "Java": 0.9720000624656677,
509
+ "PHP": 0.9580000638961792
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9580000638961792,
513
+ "Python": 0.9850000739097595,
514
+ "HTML": 0.9890000224113464,
515
+ "Java": 0.9660000205039978,
516
+ "PHP": 0.956000030040741
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.672,
520
+ "Python": 0.643,
521
+ "HTML": 0.788,
522
+ "Java": 0.616,
523
+ "PHP": 0.587
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.653,
527
+ "Python": 0.675,
528
+ "HTML": 0.826,
529
+ "Java": 0.685,
530
+ "PHP": 0.642
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.758,
534
+ "Python": 0.731,
535
+ "HTML": 0.898,
536
+ "Java": 0.722,
537
+ "PHP": 0.704
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.617,
541
+ "Python": 0.931,
542
+ "HTML": 0.891,
543
+ "Java": 0.717,
544
+ "PHP": 0.922
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.69,
548
+ "Python": 0.932,
549
+ "HTML": 0.905,
550
+ "Java": 0.798,
551
+ "PHP": 0.92
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.732,
555
+ "Python": 0.957,
556
+ "HTML": 0.912,
557
+ "Java": 0.789,
558
+ "PHP": 0.923
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.937000036239624,
564
+ "1": 0.987000048160553,
565
+ "2": 0.9260000586509705,
566
+ "3": 0.9530000686645508
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.940000057220459,
570
+ "1": 0.9850000739097595,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9500000476837158
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.568,
576
+ "1": 0.671,
577
+ "2": 0.667,
578
+ "3": 0.641
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.802,
582
+ "1": 0.802,
583
+ "2": 0.701,
584
+ "3": 0.738
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.813,
588
+ "1": 0.884,
589
+ "2": 0.762,
590
+ "3": 0.843
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.719,
594
+ "1": 0.637,
595
+ "2": 0.785,
596
+ "3": 0.589
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.847,
600
+ "1": 0.842,
601
+ "2": 0.835,
602
+ "3": 0.802
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.86,
606
+ "1": 0.95,
607
+ "2": 0.829,
608
+ "3": 0.852
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.999000072479248,
614
+ "fr": 1.0,
615
+ "de": 1.0,
616
+ "es": 0.999000072479248,
617
+ "nl": 1.0
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 0.999000072479248,
621
+ "fr": 0.999000072479248,
622
+ "de": 1.0,
623
+ "es": 1.0,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.732,
628
+ "fr": 0.587,
629
+ "de": 0.759,
630
+ "es": 0.489,
631
+ "nl": 0.639
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.834,
635
+ "fr": 0.604,
636
+ "de": 0.84,
637
+ "es": 0.907,
638
+ "nl": 0.749
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.889,
642
+ "fr": 0.927,
643
+ "de": 0.834,
644
+ "es": 0.977,
645
+ "nl": 0.873
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.599,
649
+ "fr": 0.991,
650
+ "de": 0.914,
651
+ "es": 0.961,
652
+ "nl": 0.914
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.982,
656
+ "fr": 0.99,
657
+ "de": 0.992,
658
+ "es": 0.973,
659
+ "nl": 0.998
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.999,
663
+ "fr": 0.989,
664
+ "de": 0.992,
665
+ "es": 0.995,
666
+ "nl": 1.0
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "7262e7f4-4f8b-4e3a-9ec7-d12b95ffd7d7",
30
+ "datetime_epoch_millis": 1740086196772,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9588312957435847,
34
+ "llm_top_1_test_accuracy": 0.6504687499999999,
35
+ "llm_top_2_test_accuracy": 0.7214187500000001,
36
+ "llm_top_5_test_accuracy": 0.7812625,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9566500425338744,
44
+ "sae_top_1_test_accuracy": 0.78530625,
45
+ "sae_top_2_test_accuracy": 0.8511187499999999,
46
+ "sae_top_5_test_accuracy": 0.8921937500000001,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9648000597953796,
65
+ "sae_top_1_test_accuracy": 0.8404,
66
+ "sae_top_2_test_accuracy": 0.8585999999999998,
67
+ "sae_top_5_test_accuracy": 0.9042,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9542000532150269,
76
+ "llm_top_1_test_accuracy": 0.6686,
77
+ "llm_top_2_test_accuracy": 0.7194,
78
+ "llm_top_5_test_accuracy": 0.763,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9496000528335571,
84
+ "sae_top_1_test_accuracy": 0.7438,
85
+ "sae_top_2_test_accuracy": 0.7909999999999999,
86
+ "sae_top_5_test_accuracy": 0.8672000000000001,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9332000374794006,
95
+ "llm_top_1_test_accuracy": 0.6826000000000001,
96
+ "llm_top_2_test_accuracy": 0.7456,
97
+ "llm_top_5_test_accuracy": 0.7732,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9284000396728516,
103
+ "sae_top_1_test_accuracy": 0.8187999999999999,
104
+ "sae_top_2_test_accuracy": 0.8392,
105
+ "sae_top_5_test_accuracy": 0.8692,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9140000343322754,
114
+ "llm_top_1_test_accuracy": 0.6006,
115
+ "llm_top_2_test_accuracy": 0.6432,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9182000398635864,
122
+ "sae_top_1_test_accuracy": 0.692,
123
+ "sae_top_2_test_accuracy": 0.7906000000000001,
124
+ "sae_top_5_test_accuracy": 0.826,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9735000431537628,
141
+ "sae_top_1_test_accuracy": 0.831,
142
+ "sae_top_2_test_accuracy": 0.926,
143
+ "sae_top_5_test_accuracy": 0.939,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9708000421524048,
152
+ "llm_top_1_test_accuracy": 0.6612,
153
+ "llm_top_2_test_accuracy": 0.6961999999999999,
154
+ "llm_top_5_test_accuracy": 0.7626,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.969200050830841,
160
+ "sae_top_1_test_accuracy": 0.7878000000000001,
161
+ "sae_top_2_test_accuracy": 0.8324,
162
+ "sae_top_5_test_accuracy": 0.8664000000000002,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9512500613927841,
171
+ "llm_top_1_test_accuracy": 0.6367499999999999,
172
+ "llm_top_2_test_accuracy": 0.76075,
173
+ "llm_top_5_test_accuracy": 0.8255,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9505000412464142,
179
+ "sae_top_1_test_accuracy": 0.67225,
180
+ "sae_top_2_test_accuracy": 0.79175,
181
+ "sae_top_5_test_accuracy": 0.86875,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000434875489,
190
+ "llm_top_1_test_accuracy": 0.6411999999999999,
191
+ "llm_top_2_test_accuracy": 0.7868,
192
+ "llm_top_5_test_accuracy": 0.9,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9990000128746033,
198
+ "sae_top_1_test_accuracy": 0.8964000000000001,
199
+ "sae_top_2_test_accuracy": 0.9794,
200
+ "sae_top_5_test_accuracy": 0.9968,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_3",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 16384,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "standard_april_update",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9480000734329224,
240
+ "1": 0.9670000672340393,
241
+ "2": 0.9450000524520874,
242
+ "6": 0.9850000739097595,
243
+ "9": 0.9790000319480896
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.616,
275
+ "1": 0.816,
276
+ "2": 0.881,
277
+ "6": 0.977,
278
+ "9": 0.912
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.641,
282
+ "1": 0.853,
283
+ "2": 0.891,
284
+ "6": 0.978,
285
+ "9": 0.93
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.846,
289
+ "1": 0.86,
290
+ "2": 0.894,
291
+ "6": 0.991,
292
+ "9": 0.93
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9580000638961792,
298
+ "13": 0.9520000219345093,
299
+ "14": 0.9490000605583191,
300
+ "18": 0.9310000538825989,
301
+ "19": 0.9580000638961792
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.968000054359436,
305
+ "13": 0.9500000476837158,
306
+ "14": 0.956000030040741,
307
+ "18": 0.9350000619888306,
308
+ "19": 0.9620000720024109
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.545,
312
+ "13": 0.666,
313
+ "14": 0.649,
314
+ "18": 0.693,
315
+ "19": 0.79
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.692,
319
+ "13": 0.724,
320
+ "14": 0.68,
321
+ "18": 0.732,
322
+ "19": 0.769
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.793,
326
+ "13": 0.751,
327
+ "14": 0.718,
328
+ "18": 0.723,
329
+ "19": 0.83
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.726,
333
+ "13": 0.683,
334
+ "14": 0.749,
335
+ "18": 0.723,
336
+ "19": 0.838
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.847,
340
+ "13": 0.697,
341
+ "14": 0.86,
342
+ "18": 0.72,
343
+ "19": 0.831
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.942,
347
+ "13": 0.756,
348
+ "14": 0.894,
349
+ "18": 0.903,
350
+ "19": 0.841
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9690000414848328,
356
+ "21": 0.9240000247955322,
357
+ "22": 0.9130000472068787,
358
+ "25": 0.9620000720024109,
359
+ "26": 0.8740000128746033
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9580000638961792,
363
+ "21": 0.9240000247955322,
364
+ "22": 0.9200000166893005,
365
+ "25": 0.9630000591278076,
366
+ "26": 0.9010000228881836
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.696,
370
+ "21": 0.757,
371
+ "22": 0.637,
372
+ "25": 0.692,
373
+ "26": 0.631
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.818,
377
+ "21": 0.774,
378
+ "22": 0.688,
379
+ "25": 0.762,
380
+ "26": 0.686
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.862,
384
+ "21": 0.792,
385
+ "22": 0.748,
386
+ "25": 0.791,
387
+ "26": 0.673
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.878,
391
+ "21": 0.745,
392
+ "22": 0.882,
393
+ "25": 0.884,
394
+ "26": 0.705
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.907,
398
+ "21": 0.75,
399
+ "22": 0.896,
400
+ "25": 0.892,
401
+ "26": 0.751
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.939,
405
+ "21": 0.844,
406
+ "22": 0.897,
407
+ "25": 0.903,
408
+ "26": 0.763
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9430000185966492,
414
+ "2": 0.9390000700950623,
415
+ "3": 0.9240000247955322,
416
+ "5": 0.9190000295639038,
417
+ "6": 0.8660000562667847
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.940000057220459,
421
+ "2": 0.9340000152587891,
422
+ "3": 0.9200000166893005,
423
+ "5": 0.9150000214576721,
424
+ "6": 0.8610000610351562
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.662,
428
+ "2": 0.599,
429
+ "3": 0.592,
430
+ "5": 0.57,
431
+ "6": 0.58
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.748,
435
+ "2": 0.642,
436
+ "3": 0.6,
437
+ "5": 0.625,
438
+ "6": 0.601
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.764,
442
+ "2": 0.646,
443
+ "3": 0.639,
444
+ "5": 0.638,
445
+ "6": 0.677
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.89,
449
+ "2": 0.605,
450
+ "3": 0.606,
451
+ "5": 0.657,
452
+ "6": 0.702
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.921,
456
+ "2": 0.8,
457
+ "3": 0.64,
458
+ "5": 0.854,
459
+ "6": 0.738
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.925,
463
+ "2": 0.823,
464
+ "3": 0.744,
465
+ "5": 0.878,
466
+ "6": 0.76
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9730000495910645,
472
+ "5.0": 0.9740000367164612
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9800000190734863,
476
+ "5.0": 0.9820000529289246
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.831,
492
+ "5.0": 0.831
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.926,
496
+ "5.0": 0.926
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.939,
500
+ "5.0": 0.939
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9550000429153442,
506
+ "Python": 0.9860000610351562,
507
+ "HTML": 0.9890000224113464,
508
+ "Java": 0.9630000591278076,
509
+ "PHP": 0.9530000686645508
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9580000638961792,
513
+ "Python": 0.9850000739097595,
514
+ "HTML": 0.9890000224113464,
515
+ "Java": 0.9660000205039978,
516
+ "PHP": 0.956000030040741
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.672,
520
+ "Python": 0.643,
521
+ "HTML": 0.788,
522
+ "Java": 0.616,
523
+ "PHP": 0.587
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.653,
527
+ "Python": 0.675,
528
+ "HTML": 0.826,
529
+ "Java": 0.685,
530
+ "PHP": 0.642
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.758,
534
+ "Python": 0.731,
535
+ "HTML": 0.898,
536
+ "Java": 0.722,
537
+ "PHP": 0.704
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.603,
541
+ "Python": 0.919,
542
+ "HTML": 0.832,
543
+ "Java": 0.707,
544
+ "PHP": 0.878
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.698,
548
+ "Python": 0.941,
549
+ "HTML": 0.904,
550
+ "Java": 0.706,
551
+ "PHP": 0.913
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.788,
555
+ "Python": 0.946,
556
+ "HTML": 0.903,
557
+ "Java": 0.784,
558
+ "PHP": 0.911
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9320000410079956,
564
+ "1": 0.9890000224113464,
565
+ "2": 0.9310000538825989,
566
+ "3": 0.9500000476837158
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.940000057220459,
570
+ "1": 0.9850000739097595,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9500000476837158
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.568,
576
+ "1": 0.671,
577
+ "2": 0.667,
578
+ "3": 0.641
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.802,
582
+ "1": 0.802,
583
+ "2": 0.701,
584
+ "3": 0.738
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.813,
588
+ "1": 0.884,
589
+ "2": 0.762,
590
+ "3": 0.843
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.715,
594
+ "1": 0.624,
595
+ "2": 0.742,
596
+ "3": 0.608
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.829,
600
+ "1": 0.751,
601
+ "2": 0.813,
602
+ "3": 0.774
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.855,
606
+ "1": 0.974,
607
+ "2": 0.823,
608
+ "3": 0.823
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.9970000386238098,
614
+ "fr": 1.0,
615
+ "de": 1.0,
616
+ "es": 1.0,
617
+ "nl": 0.9980000257492065
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 0.999000072479248,
621
+ "fr": 0.999000072479248,
622
+ "de": 1.0,
623
+ "es": 1.0,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.732,
628
+ "fr": 0.587,
629
+ "de": 0.759,
630
+ "es": 0.489,
631
+ "nl": 0.639
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.834,
635
+ "fr": 0.604,
636
+ "de": 0.84,
637
+ "es": 0.907,
638
+ "nl": 0.749
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.889,
642
+ "fr": 0.927,
643
+ "de": 0.834,
644
+ "es": 0.977,
645
+ "nl": 0.873
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.602,
649
+ "fr": 0.99,
650
+ "de": 0.923,
651
+ "es": 0.971,
652
+ "nl": 0.996
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.992,
656
+ "fr": 0.99,
657
+ "de": 0.926,
658
+ "es": 0.994,
659
+ "nl": 0.995
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.997,
663
+ "fr": 0.995,
664
+ "de": 0.996,
665
+ "es": 0.996,
666
+ "nl": 1.0
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "9d5c53b8-e2df-4697-8b9c-ace7c3a76589",
30
+ "datetime_epoch_millis": 1740085921105,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9588312957435847,
34
+ "llm_top_1_test_accuracy": 0.6504687499999999,
35
+ "llm_top_2_test_accuracy": 0.7214187500000001,
36
+ "llm_top_5_test_accuracy": 0.7812625,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9546750392764807,
44
+ "sae_top_1_test_accuracy": 0.776225,
45
+ "sae_top_2_test_accuracy": 0.8350624999999998,
46
+ "sae_top_5_test_accuracy": 0.889525,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9640000462532043,
65
+ "sae_top_1_test_accuracy": 0.787,
66
+ "sae_top_2_test_accuracy": 0.8835999999999998,
67
+ "sae_top_5_test_accuracy": 0.9074,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9542000532150269,
76
+ "llm_top_1_test_accuracy": 0.6686,
77
+ "llm_top_2_test_accuracy": 0.7194,
78
+ "llm_top_5_test_accuracy": 0.763,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9454000353813171,
84
+ "sae_top_1_test_accuracy": 0.7412000000000001,
85
+ "sae_top_2_test_accuracy": 0.7724,
86
+ "sae_top_5_test_accuracy": 0.8852,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9332000374794006,
95
+ "llm_top_1_test_accuracy": 0.6826000000000001,
96
+ "llm_top_2_test_accuracy": 0.7456,
97
+ "llm_top_5_test_accuracy": 0.7732,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9232000470161438,
103
+ "sae_top_1_test_accuracy": 0.8214,
104
+ "sae_top_2_test_accuracy": 0.8386000000000001,
105
+ "sae_top_5_test_accuracy": 0.8676,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9140000343322754,
114
+ "llm_top_1_test_accuracy": 0.6006,
115
+ "llm_top_2_test_accuracy": 0.6432,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9134000420570374,
122
+ "sae_top_1_test_accuracy": 0.6828000000000001,
123
+ "sae_top_2_test_accuracy": 0.7731999999999999,
124
+ "sae_top_5_test_accuracy": 0.8162,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9695000350475311,
141
+ "sae_top_1_test_accuracy": 0.81,
142
+ "sae_top_2_test_accuracy": 0.821,
143
+ "sae_top_5_test_accuracy": 0.922,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9708000421524048,
152
+ "llm_top_1_test_accuracy": 0.6612,
153
+ "llm_top_2_test_accuracy": 0.6961999999999999,
154
+ "llm_top_5_test_accuracy": 0.7626,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.968600046634674,
160
+ "sae_top_1_test_accuracy": 0.8032,
161
+ "sae_top_2_test_accuracy": 0.8301999999999999,
162
+ "sae_top_5_test_accuracy": 0.8710000000000001,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9512500613927841,
171
+ "llm_top_1_test_accuracy": 0.6367499999999999,
172
+ "llm_top_2_test_accuracy": 0.76075,
173
+ "llm_top_5_test_accuracy": 0.8255,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.953500047326088,
179
+ "sae_top_1_test_accuracy": 0.6629999999999999,
180
+ "sae_top_2_test_accuracy": 0.7795,
181
+ "sae_top_5_test_accuracy": 0.85,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000434875489,
190
+ "llm_top_1_test_accuracy": 0.6411999999999999,
191
+ "llm_top_2_test_accuracy": 0.7868,
192
+ "llm_top_5_test_accuracy": 0.9,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9998000144958497,
198
+ "sae_top_1_test_accuracy": 0.9012,
199
+ "sae_top_2_test_accuracy": 0.982,
200
+ "sae_top_5_test_accuracy": 0.9968,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_4",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 16384,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "standard_april_update",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9450000524520874,
240
+ "1": 0.9690000414848328,
241
+ "2": 0.9510000348091125,
242
+ "6": 0.9830000400543213,
243
+ "9": 0.9720000624656677
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.6,
275
+ "1": 0.668,
276
+ "2": 0.879,
277
+ "6": 0.986,
278
+ "9": 0.802
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.852,
282
+ "1": 0.835,
283
+ "2": 0.891,
284
+ "6": 0.981,
285
+ "9": 0.859
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.864,
289
+ "1": 0.851,
290
+ "2": 0.903,
291
+ "6": 0.986,
292
+ "9": 0.933
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9630000591278076,
298
+ "13": 0.9450000524520874,
299
+ "14": 0.9470000267028809,
300
+ "18": 0.9150000214576721,
301
+ "19": 0.9570000171661377
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.968000054359436,
305
+ "13": 0.9500000476837158,
306
+ "14": 0.956000030040741,
307
+ "18": 0.9350000619888306,
308
+ "19": 0.9620000720024109
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.545,
312
+ "13": 0.666,
313
+ "14": 0.649,
314
+ "18": 0.693,
315
+ "19": 0.79
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.692,
319
+ "13": 0.724,
320
+ "14": 0.68,
321
+ "18": 0.732,
322
+ "19": 0.769
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.793,
326
+ "13": 0.751,
327
+ "14": 0.718,
328
+ "18": 0.723,
329
+ "19": 0.83
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.67,
333
+ "13": 0.689,
334
+ "14": 0.846,
335
+ "18": 0.671,
336
+ "19": 0.83
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.725,
340
+ "13": 0.704,
341
+ "14": 0.863,
342
+ "18": 0.73,
343
+ "19": 0.84
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.878,
347
+ "13": 0.881,
348
+ "14": 0.891,
349
+ "18": 0.914,
350
+ "19": 0.862
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9540000557899475,
356
+ "21": 0.9120000600814819,
357
+ "22": 0.9000000357627869,
358
+ "25": 0.9550000429153442,
359
+ "26": 0.8950000405311584
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9580000638961792,
363
+ "21": 0.9240000247955322,
364
+ "22": 0.9200000166893005,
365
+ "25": 0.9630000591278076,
366
+ "26": 0.9010000228881836
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.696,
370
+ "21": 0.757,
371
+ "22": 0.637,
372
+ "25": 0.692,
373
+ "26": 0.631
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.818,
377
+ "21": 0.774,
378
+ "22": 0.688,
379
+ "25": 0.762,
380
+ "26": 0.686
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.862,
384
+ "21": 0.792,
385
+ "22": 0.748,
386
+ "25": 0.791,
387
+ "26": 0.673
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.879,
391
+ "21": 0.738,
392
+ "22": 0.889,
393
+ "25": 0.892,
394
+ "26": 0.709
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.903,
398
+ "21": 0.79,
399
+ "22": 0.885,
400
+ "25": 0.898,
401
+ "26": 0.717
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.932,
405
+ "21": 0.861,
406
+ "22": 0.891,
407
+ "25": 0.896,
408
+ "26": 0.758
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9380000233650208,
414
+ "2": 0.9300000667572021,
415
+ "3": 0.921000063419342,
416
+ "5": 0.9140000343322754,
417
+ "6": 0.8640000224113464
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.940000057220459,
421
+ "2": 0.9340000152587891,
422
+ "3": 0.9200000166893005,
423
+ "5": 0.9150000214576721,
424
+ "6": 0.8610000610351562
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.662,
428
+ "2": 0.599,
429
+ "3": 0.592,
430
+ "5": 0.57,
431
+ "6": 0.58
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.748,
435
+ "2": 0.642,
436
+ "3": 0.6,
437
+ "5": 0.625,
438
+ "6": 0.601
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.764,
442
+ "2": 0.646,
443
+ "3": 0.639,
444
+ "5": 0.638,
445
+ "6": 0.677
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.873,
449
+ "2": 0.597,
450
+ "3": 0.669,
451
+ "5": 0.611,
452
+ "6": 0.664
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.899,
456
+ "2": 0.738,
457
+ "3": 0.692,
458
+ "5": 0.817,
459
+ "6": 0.72
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.914,
463
+ "2": 0.807,
464
+ "3": 0.739,
465
+ "5": 0.882,
466
+ "6": 0.739
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9700000286102295,
472
+ "5.0": 0.9690000414848328
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9800000190734863,
476
+ "5.0": 0.9820000529289246
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.81,
492
+ "5.0": 0.81
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.821,
496
+ "5.0": 0.821
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.922,
500
+ "5.0": 0.922
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9600000381469727,
506
+ "Python": 0.9820000529289246,
507
+ "HTML": 0.9780000448226929,
508
+ "Java": 0.9650000333786011,
509
+ "PHP": 0.9580000638961792
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9580000638961792,
513
+ "Python": 0.9850000739097595,
514
+ "HTML": 0.9890000224113464,
515
+ "Java": 0.9660000205039978,
516
+ "PHP": 0.956000030040741
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.672,
520
+ "Python": 0.643,
521
+ "HTML": 0.788,
522
+ "Java": 0.616,
523
+ "PHP": 0.587
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.653,
527
+ "Python": 0.675,
528
+ "HTML": 0.826,
529
+ "Java": 0.685,
530
+ "PHP": 0.642
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.758,
534
+ "Python": 0.731,
535
+ "HTML": 0.898,
536
+ "Java": 0.722,
537
+ "PHP": 0.704
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.614,
541
+ "Python": 0.911,
542
+ "HTML": 0.855,
543
+ "Java": 0.715,
544
+ "PHP": 0.921
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.737,
548
+ "Python": 0.913,
549
+ "HTML": 0.865,
550
+ "Java": 0.717,
551
+ "PHP": 0.919
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.804,
555
+ "Python": 0.928,
556
+ "HTML": 0.905,
557
+ "Java": 0.8,
558
+ "PHP": 0.918
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9320000410079956,
564
+ "1": 0.9850000739097595,
565
+ "2": 0.9420000314712524,
566
+ "3": 0.9550000429153442
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.940000057220459,
570
+ "1": 0.9850000739097595,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9500000476837158
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.568,
576
+ "1": 0.671,
577
+ "2": 0.667,
578
+ "3": 0.641
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.802,
582
+ "1": 0.802,
583
+ "2": 0.701,
584
+ "3": 0.738
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.813,
588
+ "1": 0.884,
589
+ "2": 0.762,
590
+ "3": 0.843
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.693,
594
+ "1": 0.587,
595
+ "2": 0.742,
596
+ "3": 0.63
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.814,
600
+ "1": 0.75,
601
+ "2": 0.783,
602
+ "3": 0.771
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.855,
606
+ "1": 0.884,
607
+ "2": 0.825,
608
+ "3": 0.836
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 1.0,
614
+ "fr": 0.999000072479248,
615
+ "de": 1.0,
616
+ "es": 1.0,
617
+ "nl": 1.0
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 0.999000072479248,
621
+ "fr": 0.999000072479248,
622
+ "de": 1.0,
623
+ "es": 1.0,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.732,
628
+ "fr": 0.587,
629
+ "de": 0.759,
630
+ "es": 0.489,
631
+ "nl": 0.639
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.834,
635
+ "fr": 0.604,
636
+ "de": 0.84,
637
+ "es": 0.907,
638
+ "nl": 0.749
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.889,
642
+ "fr": 0.927,
643
+ "de": 0.834,
644
+ "es": 0.977,
645
+ "nl": 0.873
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.604,
649
+ "fr": 0.991,
650
+ "de": 0.923,
651
+ "es": 0.991,
652
+ "nl": 0.997
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.991,
656
+ "fr": 0.996,
657
+ "de": 0.934,
658
+ "es": 0.991,
659
+ "nl": 0.998
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.997,
663
+ "fr": 0.995,
664
+ "de": 0.996,
665
+ "es": 0.997,
666
+ "nl": 0.999
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "a2bcbe91-6e21-4cab-87df-ed4e14d5f7fc",
30
+ "datetime_epoch_millis": 1740085832733,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9588312957435847,
34
+ "llm_top_1_test_accuracy": 0.6504687499999999,
35
+ "llm_top_2_test_accuracy": 0.7214187500000001,
36
+ "llm_top_5_test_accuracy": 0.7812625,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9541125372052193,
44
+ "sae_top_1_test_accuracy": 0.78889375,
45
+ "sae_top_2_test_accuracy": 0.8315375,
46
+ "sae_top_5_test_accuracy": 0.8818187500000001,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9612000465393067,
65
+ "sae_top_1_test_accuracy": 0.7706,
66
+ "sae_top_2_test_accuracy": 0.8032,
67
+ "sae_top_5_test_accuracy": 0.8876,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9542000532150269,
76
+ "llm_top_1_test_accuracy": 0.6686,
77
+ "llm_top_2_test_accuracy": 0.7194,
78
+ "llm_top_5_test_accuracy": 0.763,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9470000386238098,
84
+ "sae_top_1_test_accuracy": 0.7704000000000001,
85
+ "sae_top_2_test_accuracy": 0.7737999999999999,
86
+ "sae_top_5_test_accuracy": 0.8942,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9332000374794006,
95
+ "llm_top_1_test_accuracy": 0.6826000000000001,
96
+ "llm_top_2_test_accuracy": 0.7456,
97
+ "llm_top_5_test_accuracy": 0.7732,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9278000354766845,
103
+ "sae_top_1_test_accuracy": 0.825,
104
+ "sae_top_2_test_accuracy": 0.8388,
105
+ "sae_top_5_test_accuracy": 0.8602000000000001,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9140000343322754,
114
+ "llm_top_1_test_accuracy": 0.6006,
115
+ "llm_top_2_test_accuracy": 0.6432,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9106000304222107,
122
+ "sae_top_1_test_accuracy": 0.7372,
123
+ "sae_top_2_test_accuracy": 0.7939999999999999,
124
+ "sae_top_5_test_accuracy": 0.828,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9700000286102295,
141
+ "sae_top_1_test_accuracy": 0.881,
142
+ "sae_top_2_test_accuracy": 0.895,
143
+ "sae_top_5_test_accuracy": 0.906,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9708000421524048,
152
+ "llm_top_1_test_accuracy": 0.6612,
153
+ "llm_top_2_test_accuracy": 0.6961999999999999,
154
+ "llm_top_5_test_accuracy": 0.7626,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9688000559806824,
160
+ "sae_top_1_test_accuracy": 0.7914,
161
+ "sae_top_2_test_accuracy": 0.8168,
162
+ "sae_top_5_test_accuracy": 0.8657999999999999,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9512500613927841,
171
+ "llm_top_1_test_accuracy": 0.6367499999999999,
172
+ "llm_top_2_test_accuracy": 0.76075,
173
+ "llm_top_5_test_accuracy": 0.8255,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9485000371932983,
179
+ "sae_top_1_test_accuracy": 0.67075,
180
+ "sae_top_2_test_accuracy": 0.7625,
181
+ "sae_top_5_test_accuracy": 0.81975,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000434875489,
190
+ "llm_top_1_test_accuracy": 0.6411999999999999,
191
+ "llm_top_2_test_accuracy": 0.7868,
192
+ "llm_top_5_test_accuracy": 0.9,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9990000247955322,
198
+ "sae_top_1_test_accuracy": 0.8648,
199
+ "sae_top_2_test_accuracy": 0.9682000000000001,
200
+ "sae_top_5_test_accuracy": 0.993,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_5",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 16384,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "standard_april_update",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9450000524520874,
240
+ "1": 0.9580000638961792,
241
+ "2": 0.9460000395774841,
242
+ "6": 0.9880000352859497,
243
+ "9": 0.9690000414848328
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.61,
275
+ "1": 0.673,
276
+ "2": 0.859,
277
+ "6": 0.977,
278
+ "9": 0.734
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.603,
282
+ "1": 0.687,
283
+ "2": 0.848,
284
+ "6": 0.977,
285
+ "9": 0.901
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.855,
289
+ "1": 0.784,
290
+ "2": 0.869,
291
+ "6": 0.989,
292
+ "9": 0.941
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9640000462532043,
298
+ "13": 0.9500000476837158,
299
+ "14": 0.9500000476837158,
300
+ "18": 0.9190000295639038,
301
+ "19": 0.9520000219345093
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.968000054359436,
305
+ "13": 0.9500000476837158,
306
+ "14": 0.956000030040741,
307
+ "18": 0.9350000619888306,
308
+ "19": 0.9620000720024109
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.545,
312
+ "13": 0.666,
313
+ "14": 0.649,
314
+ "18": 0.693,
315
+ "19": 0.79
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.692,
319
+ "13": 0.724,
320
+ "14": 0.68,
321
+ "18": 0.732,
322
+ "19": 0.769
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.793,
326
+ "13": 0.751,
327
+ "14": 0.718,
328
+ "18": 0.723,
329
+ "19": 0.83
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.732,
333
+ "13": 0.685,
334
+ "14": 0.873,
335
+ "18": 0.724,
336
+ "19": 0.838
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.732,
340
+ "13": 0.701,
341
+ "14": 0.868,
342
+ "18": 0.722,
343
+ "19": 0.846
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.939,
347
+ "13": 0.827,
348
+ "14": 0.932,
349
+ "18": 0.915,
350
+ "19": 0.858
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9550000429153442,
356
+ "21": 0.9340000152587891,
357
+ "22": 0.9140000343322754,
358
+ "25": 0.9580000638961792,
359
+ "26": 0.878000020980835
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9580000638961792,
363
+ "21": 0.9240000247955322,
364
+ "22": 0.9200000166893005,
365
+ "25": 0.9630000591278076,
366
+ "26": 0.9010000228881836
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.696,
370
+ "21": 0.757,
371
+ "22": 0.637,
372
+ "25": 0.692,
373
+ "26": 0.631
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.818,
377
+ "21": 0.774,
378
+ "22": 0.688,
379
+ "25": 0.762,
380
+ "26": 0.686
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.862,
384
+ "21": 0.792,
385
+ "22": 0.748,
386
+ "25": 0.791,
387
+ "26": 0.673
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.907,
391
+ "21": 0.749,
392
+ "22": 0.879,
393
+ "25": 0.885,
394
+ "26": 0.705
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.898,
398
+ "21": 0.765,
399
+ "22": 0.885,
400
+ "25": 0.896,
401
+ "26": 0.75
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.929,
405
+ "21": 0.84,
406
+ "22": 0.869,
407
+ "25": 0.888,
408
+ "26": 0.775
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.937000036239624,
414
+ "2": 0.9190000295639038,
415
+ "3": 0.9150000214576721,
416
+ "5": 0.9140000343322754,
417
+ "6": 0.8680000305175781
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.940000057220459,
421
+ "2": 0.9340000152587891,
422
+ "3": 0.9200000166893005,
423
+ "5": 0.9150000214576721,
424
+ "6": 0.8610000610351562
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.662,
428
+ "2": 0.599,
429
+ "3": 0.592,
430
+ "5": 0.57,
431
+ "6": 0.58
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.748,
435
+ "2": 0.642,
436
+ "3": 0.6,
437
+ "5": 0.625,
438
+ "6": 0.601
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.764,
442
+ "2": 0.646,
443
+ "3": 0.639,
444
+ "5": 0.638,
445
+ "6": 0.677
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.843,
449
+ "2": 0.814,
450
+ "3": 0.668,
451
+ "5": 0.706,
452
+ "6": 0.655
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.918,
456
+ "2": 0.836,
457
+ "3": 0.671,
458
+ "5": 0.849,
459
+ "6": 0.696
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.92,
463
+ "2": 0.863,
464
+ "3": 0.775,
465
+ "5": 0.86,
466
+ "6": 0.722
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9700000286102295,
472
+ "5.0": 0.9700000286102295
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9800000190734863,
476
+ "5.0": 0.9820000529289246
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.881,
492
+ "5.0": 0.881
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.895,
496
+ "5.0": 0.895
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.906,
500
+ "5.0": 0.906
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9640000462532043,
506
+ "Python": 0.9850000739097595,
507
+ "HTML": 0.9810000658035278,
508
+ "Java": 0.9620000720024109,
509
+ "PHP": 0.9520000219345093
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9580000638961792,
513
+ "Python": 0.9850000739097595,
514
+ "HTML": 0.9890000224113464,
515
+ "Java": 0.9660000205039978,
516
+ "PHP": 0.956000030040741
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.672,
520
+ "Python": 0.643,
521
+ "HTML": 0.788,
522
+ "Java": 0.616,
523
+ "PHP": 0.587
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.653,
527
+ "Python": 0.675,
528
+ "HTML": 0.826,
529
+ "Java": 0.685,
530
+ "PHP": 0.642
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.758,
534
+ "Python": 0.731,
535
+ "HTML": 0.898,
536
+ "Java": 0.722,
537
+ "PHP": 0.704
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.665,
541
+ "Python": 0.76,
542
+ "HTML": 0.885,
543
+ "Java": 0.723,
544
+ "PHP": 0.924
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.693,
548
+ "Python": 0.771,
549
+ "HTML": 0.884,
550
+ "Java": 0.815,
551
+ "PHP": 0.921
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.758,
555
+ "Python": 0.922,
556
+ "HTML": 0.92,
557
+ "Java": 0.812,
558
+ "PHP": 0.917
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9340000152587891,
564
+ "1": 0.9780000448226929,
565
+ "2": 0.9300000667572021,
566
+ "3": 0.9520000219345093
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.940000057220459,
570
+ "1": 0.9850000739097595,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9500000476837158
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.568,
576
+ "1": 0.671,
577
+ "2": 0.667,
578
+ "3": 0.641
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.802,
582
+ "1": 0.802,
583
+ "2": 0.701,
584
+ "3": 0.738
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.813,
588
+ "1": 0.884,
589
+ "2": 0.762,
590
+ "3": 0.843
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.701,
594
+ "1": 0.707,
595
+ "2": 0.621,
596
+ "3": 0.654
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.816,
600
+ "1": 0.761,
601
+ "2": 0.769,
602
+ "3": 0.704
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.827,
606
+ "1": 0.819,
607
+ "2": 0.802,
608
+ "3": 0.831
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 1.0,
614
+ "fr": 0.999000072479248,
615
+ "de": 1.0,
616
+ "es": 0.9980000257492065,
617
+ "nl": 0.9980000257492065
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 0.999000072479248,
621
+ "fr": 0.999000072479248,
622
+ "de": 1.0,
623
+ "es": 1.0,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.732,
628
+ "fr": 0.587,
629
+ "de": 0.759,
630
+ "es": 0.489,
631
+ "nl": 0.639
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.834,
635
+ "fr": 0.604,
636
+ "de": 0.84,
637
+ "es": 0.907,
638
+ "nl": 0.749
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.889,
642
+ "fr": 0.927,
643
+ "de": 0.834,
644
+ "es": 0.977,
645
+ "nl": 0.873
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.992,
649
+ "fr": 0.992,
650
+ "de": 0.909,
651
+ "es": 0.865,
652
+ "nl": 0.566
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.99,
656
+ "fr": 0.996,
657
+ "de": 0.983,
658
+ "es": 0.994,
659
+ "nl": 0.878
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.995,
663
+ "fr": 0.995,
664
+ "de": 0.991,
665
+ "es": 0.993,
666
+ "nl": 0.991
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "bbc1c144-af2f-4f73-8fcd-f4fe429b65cf",
30
+ "datetime_epoch_millis": 1740125267006,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9571125406771899,
34
+ "llm_top_1_test_accuracy": 0.6527562499999999,
35
+ "llm_top_2_test_accuracy": 0.7210875,
36
+ "llm_top_5_test_accuracy": 0.7801125,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9612687967717647,
44
+ "sae_top_1_test_accuracy": 0.8083937500000001,
45
+ "sae_top_2_test_accuracy": 0.8614124999999999,
46
+ "sae_top_5_test_accuracy": 0.8983,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9670000433921814,
65
+ "sae_top_1_test_accuracy": 0.8502000000000001,
66
+ "sae_top_2_test_accuracy": 0.9038,
67
+ "sae_top_5_test_accuracy": 0.9186,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9502000451087952,
76
+ "llm_top_1_test_accuracy": 0.6718,
77
+ "llm_top_2_test_accuracy": 0.7230000000000001,
78
+ "llm_top_5_test_accuracy": 0.7615999999999999,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9550000548362731,
84
+ "sae_top_1_test_accuracy": 0.752,
85
+ "sae_top_2_test_accuracy": 0.7998,
86
+ "sae_top_5_test_accuracy": 0.8614,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9292000293731689,
95
+ "llm_top_1_test_accuracy": 0.687,
96
+ "llm_top_2_test_accuracy": 0.7306000000000001,
97
+ "llm_top_5_test_accuracy": 0.7644,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9390000581741333,
103
+ "sae_top_1_test_accuracy": 0.7942,
104
+ "sae_top_2_test_accuracy": 0.8298,
105
+ "sae_top_5_test_accuracy": 0.8688,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9116000413894654,
114
+ "llm_top_1_test_accuracy": 0.6076,
115
+ "llm_top_2_test_accuracy": 0.6492,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9280000448226928,
122
+ "sae_top_1_test_accuracy": 0.7971999999999999,
123
+ "sae_top_2_test_accuracy": 0.8219999999999998,
124
+ "sae_top_5_test_accuracy": 0.8523999999999999,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9785000383853912,
141
+ "sae_top_1_test_accuracy": 0.908,
142
+ "sae_top_2_test_accuracy": 0.922,
143
+ "sae_top_5_test_accuracy": 0.962,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9672000527381897,
152
+ "llm_top_1_test_accuracy": 0.6634,
153
+ "llm_top_2_test_accuracy": 0.6894,
154
+ "llm_top_5_test_accuracy": 0.7562,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9712000370025635,
160
+ "sae_top_1_test_accuracy": 0.8055999999999999,
161
+ "sae_top_2_test_accuracy": 0.8324,
162
+ "sae_top_5_test_accuracy": 0.8836,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9515000432729721,
171
+ "llm_top_1_test_accuracy": 0.63925,
172
+ "llm_top_2_test_accuracy": 0.7785,
173
+ "llm_top_5_test_accuracy": 0.8225,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.952250063419342,
179
+ "sae_top_1_test_accuracy": 0.7567499999999999,
180
+ "sae_top_2_test_accuracy": 0.7995,
181
+ "sae_top_5_test_accuracy": 0.847,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000196456909,
190
+ "llm_top_1_test_accuracy": 0.6401999999999999,
191
+ "llm_top_2_test_accuracy": 0.7786000000000001,
192
+ "llm_top_5_test_accuracy": 0.9103999999999999,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9992000341415406,
198
+ "sae_top_1_test_accuracy": 0.8032,
199
+ "sae_top_2_test_accuracy": 0.982,
200
+ "sae_top_5_test_accuracy": 0.9926,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_0",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 65536,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "standard_april_update",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9490000605583191,
240
+ "1": 0.9640000462532043,
241
+ "2": 0.9540000557899475,
242
+ "6": 0.9890000224113464,
243
+ "9": 0.9790000319480896
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.862,
275
+ "1": 0.815,
276
+ "2": 0.903,
277
+ "6": 0.763,
278
+ "9": 0.908
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.863,
282
+ "1": 0.86,
283
+ "2": 0.896,
284
+ "6": 0.948,
285
+ "9": 0.952
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.877,
289
+ "1": 0.876,
290
+ "2": 0.902,
291
+ "6": 0.986,
292
+ "9": 0.952
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9620000720024109,
298
+ "13": 0.9600000381469727,
299
+ "14": 0.9540000557899475,
300
+ "18": 0.9350000619888306,
301
+ "19": 0.9640000462532043
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.9550000429153442,
305
+ "13": 0.9550000429153442,
306
+ "14": 0.9550000429153442,
307
+ "18": 0.9330000281333923,
308
+ "19": 0.9530000686645508
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.557,
312
+ "13": 0.673,
313
+ "14": 0.645,
314
+ "18": 0.697,
315
+ "19": 0.787
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.705,
319
+ "13": 0.718,
320
+ "14": 0.679,
321
+ "18": 0.73,
322
+ "19": 0.783
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.794,
326
+ "13": 0.744,
327
+ "14": 0.724,
328
+ "18": 0.713,
329
+ "19": 0.833
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.669,
333
+ "13": 0.691,
334
+ "14": 0.864,
335
+ "18": 0.671,
336
+ "19": 0.865
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.822,
340
+ "13": 0.697,
341
+ "14": 0.87,
342
+ "18": 0.725,
343
+ "19": 0.885
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.857,
347
+ "13": 0.755,
348
+ "14": 0.893,
349
+ "18": 0.902,
350
+ "19": 0.9
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.971000075340271,
356
+ "21": 0.9310000538825989,
357
+ "22": 0.9260000586509705,
358
+ "25": 0.9620000720024109,
359
+ "26": 0.9050000309944153
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9570000171661377,
363
+ "21": 0.9150000214576721,
364
+ "22": 0.9230000376701355,
365
+ "25": 0.9610000252723694,
366
+ "26": 0.89000004529953
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.716,
370
+ "21": 0.761,
371
+ "22": 0.648,
372
+ "25": 0.692,
373
+ "26": 0.618
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.805,
377
+ "21": 0.762,
378
+ "22": 0.649,
379
+ "25": 0.766,
380
+ "26": 0.671
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.875,
384
+ "21": 0.783,
385
+ "22": 0.711,
386
+ "25": 0.782,
387
+ "26": 0.671
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.887,
391
+ "21": 0.736,
392
+ "22": 0.796,
393
+ "25": 0.853,
394
+ "26": 0.699
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.906,
398
+ "21": 0.801,
399
+ "22": 0.885,
400
+ "25": 0.852,
401
+ "26": 0.705
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.924,
405
+ "21": 0.873,
406
+ "22": 0.887,
407
+ "25": 0.898,
408
+ "26": 0.762
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9480000734329224,
414
+ "2": 0.9410000443458557,
415
+ "3": 0.9290000200271606,
416
+ "5": 0.9310000538825989,
417
+ "6": 0.8910000324249268
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.9460000395774841,
421
+ "2": 0.9330000281333923,
422
+ "3": 0.9130000472068787,
423
+ "5": 0.9160000681877136,
424
+ "6": 0.8500000238418579
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.674,
428
+ "2": 0.587,
429
+ "3": 0.601,
430
+ "5": 0.583,
431
+ "6": 0.593
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.737,
435
+ "2": 0.632,
436
+ "3": 0.605,
437
+ "5": 0.634,
438
+ "6": 0.638
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.763,
442
+ "2": 0.626,
443
+ "3": 0.63,
444
+ "5": 0.656,
445
+ "6": 0.689
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.859,
449
+ "2": 0.857,
450
+ "3": 0.669,
451
+ "5": 0.88,
452
+ "6": 0.721
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.915,
456
+ "2": 0.868,
457
+ "3": 0.714,
458
+ "5": 0.879,
459
+ "6": 0.734
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.925,
463
+ "2": 0.882,
464
+ "3": 0.807,
465
+ "5": 0.883,
466
+ "6": 0.765
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9790000319480896,
472
+ "5.0": 0.9780000448226929
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9820000529289246,
476
+ "5.0": 0.9800000190734863
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.908,
492
+ "5.0": 0.908
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.922,
496
+ "5.0": 0.922
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.962,
500
+ "5.0": 0.962
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.956000030040741,
506
+ "Python": 0.9890000224113464,
507
+ "HTML": 0.984000027179718,
508
+ "Java": 0.9640000462532043,
509
+ "PHP": 0.9630000591278076
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9450000524520874,
513
+ "Python": 0.9890000224113464,
514
+ "HTML": 0.987000048160553,
515
+ "Java": 0.9620000720024109,
516
+ "PHP": 0.9530000686645508
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.669,
520
+ "Python": 0.638,
521
+ "HTML": 0.788,
522
+ "Java": 0.621,
523
+ "PHP": 0.601
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.656,
527
+ "Python": 0.671,
528
+ "HTML": 0.811,
529
+ "Java": 0.678,
530
+ "PHP": 0.631
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.744,
534
+ "Python": 0.735,
535
+ "HTML": 0.904,
536
+ "Java": 0.726,
537
+ "PHP": 0.672
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.601,
541
+ "Python": 0.917,
542
+ "HTML": 0.89,
543
+ "Java": 0.73,
544
+ "PHP": 0.89
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.615,
548
+ "Python": 0.926,
549
+ "HTML": 0.896,
550
+ "Java": 0.812,
551
+ "PHP": 0.913
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.764,
555
+ "Python": 0.971,
556
+ "HTML": 0.919,
557
+ "Java": 0.847,
558
+ "PHP": 0.917
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.940000057220459,
564
+ "1": 0.9860000610351562,
565
+ "2": 0.9300000667572021,
566
+ "3": 0.9530000686645508
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.937000036239624,
570
+ "1": 0.987000048160553,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9520000219345093
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.586,
576
+ "1": 0.658,
577
+ "2": 0.669,
578
+ "3": 0.644
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.809,
582
+ "1": 0.8,
583
+ "2": 0.691,
584
+ "3": 0.814
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.827,
588
+ "1": 0.877,
589
+ "2": 0.748,
590
+ "3": 0.838
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.703,
594
+ "1": 0.934,
595
+ "2": 0.732,
596
+ "3": 0.658
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.754,
600
+ "1": 0.939,
601
+ "2": 0.816,
602
+ "3": 0.689
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.805,
606
+ "1": 0.937,
607
+ "2": 0.824,
608
+ "3": 0.822
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.9980000257492065,
614
+ "fr": 0.999000072479248,
615
+ "de": 1.0,
616
+ "es": 1.0,
617
+ "nl": 0.999000072479248
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 1.0,
621
+ "fr": 1.0,
622
+ "de": 1.0,
623
+ "es": 0.9980000257492065,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.742,
628
+ "fr": 0.575,
629
+ "de": 0.737,
630
+ "es": 0.504,
631
+ "nl": 0.643
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.822,
635
+ "fr": 0.593,
636
+ "de": 0.83,
637
+ "es": 0.905,
638
+ "nl": 0.743
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.898,
642
+ "fr": 0.908,
643
+ "de": 0.908,
644
+ "es": 0.982,
645
+ "nl": 0.856
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.578,
649
+ "fr": 0.99,
650
+ "de": 0.925,
651
+ "es": 0.986,
652
+ "nl": 0.537
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.997,
656
+ "fr": 0.993,
657
+ "de": 0.929,
658
+ "es": 0.991,
659
+ "nl": 1.0
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.997,
663
+ "fr": 0.992,
664
+ "de": 0.99,
665
+ "es": 0.989,
666
+ "nl": 0.995
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "ea3ada0d-e3d6-4183-9c00-8fd89c2750cb",
30
+ "datetime_epoch_millis": 1740124909209,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9571125406771899,
34
+ "llm_top_1_test_accuracy": 0.6527562499999999,
35
+ "llm_top_2_test_accuracy": 0.7210875,
36
+ "llm_top_5_test_accuracy": 0.7801125,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9598937928676605,
44
+ "sae_top_1_test_accuracy": 0.79299375,
45
+ "sae_top_2_test_accuracy": 0.867525,
46
+ "sae_top_5_test_accuracy": 0.8909125,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9646000385284423,
65
+ "sae_top_1_test_accuracy": 0.8478,
66
+ "sae_top_2_test_accuracy": 0.8906000000000001,
67
+ "sae_top_5_test_accuracy": 0.9102,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9502000451087952,
76
+ "llm_top_1_test_accuracy": 0.6718,
77
+ "llm_top_2_test_accuracy": 0.7230000000000001,
78
+ "llm_top_5_test_accuracy": 0.7615999999999999,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9556000590324402,
84
+ "sae_top_1_test_accuracy": 0.7789999999999999,
85
+ "sae_top_2_test_accuracy": 0.792,
86
+ "sae_top_5_test_accuracy": 0.8584000000000002,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9292000293731689,
95
+ "llm_top_1_test_accuracy": 0.687,
96
+ "llm_top_2_test_accuracy": 0.7306000000000001,
97
+ "llm_top_5_test_accuracy": 0.7644,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9356000542640686,
103
+ "sae_top_1_test_accuracy": 0.8004000000000001,
104
+ "sae_top_2_test_accuracy": 0.8089999999999999,
105
+ "sae_top_5_test_accuracy": 0.8522000000000001,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9116000413894654,
114
+ "llm_top_1_test_accuracy": 0.6076,
115
+ "llm_top_2_test_accuracy": 0.6492,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9236000418663025,
122
+ "sae_top_1_test_accuracy": 0.769,
123
+ "sae_top_2_test_accuracy": 0.8236000000000001,
124
+ "sae_top_5_test_accuracy": 0.835,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9785000383853912,
141
+ "sae_top_1_test_accuracy": 0.802,
142
+ "sae_top_2_test_accuracy": 0.944,
143
+ "sae_top_5_test_accuracy": 0.939,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9672000527381897,
152
+ "llm_top_1_test_accuracy": 0.6634,
153
+ "llm_top_2_test_accuracy": 0.6894,
154
+ "llm_top_5_test_accuracy": 0.7562,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9702000498771668,
160
+ "sae_top_1_test_accuracy": 0.82,
161
+ "sae_top_2_test_accuracy": 0.8577999999999999,
162
+ "sae_top_5_test_accuracy": 0.8868,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9515000432729721,
171
+ "llm_top_1_test_accuracy": 0.63925,
172
+ "llm_top_2_test_accuracy": 0.7785,
173
+ "llm_top_5_test_accuracy": 0.8225,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9512500464916229,
179
+ "sae_top_1_test_accuracy": 0.7147500000000001,
180
+ "sae_top_2_test_accuracy": 0.8300000000000001,
181
+ "sae_top_5_test_accuracy": 0.8495,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000196456909,
190
+ "llm_top_1_test_accuracy": 0.6401999999999999,
191
+ "llm_top_2_test_accuracy": 0.7786000000000001,
192
+ "llm_top_5_test_accuracy": 0.9103999999999999,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9998000144958497,
198
+ "sae_top_1_test_accuracy": 0.8109999999999999,
199
+ "sae_top_2_test_accuracy": 0.9932000000000001,
200
+ "sae_top_5_test_accuracy": 0.9962,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_1",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 65536,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "standard_april_update",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9430000185966492,
240
+ "1": 0.9730000495910645,
241
+ "2": 0.9430000185966492,
242
+ "6": 0.9920000433921814,
243
+ "9": 0.9720000624656677
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.865,
275
+ "1": 0.694,
276
+ "2": 0.904,
277
+ "6": 0.959,
278
+ "9": 0.817
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.877,
282
+ "1": 0.818,
283
+ "2": 0.909,
284
+ "6": 0.96,
285
+ "9": 0.889
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.88,
289
+ "1": 0.83,
290
+ "2": 0.916,
291
+ "6": 0.977,
292
+ "9": 0.948
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9590000510215759,
298
+ "13": 0.9580000638961792,
299
+ "14": 0.9580000638961792,
300
+ "18": 0.940000057220459,
301
+ "19": 0.9630000591278076
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.9550000429153442,
305
+ "13": 0.9550000429153442,
306
+ "14": 0.9550000429153442,
307
+ "18": 0.9330000281333923,
308
+ "19": 0.9530000686645508
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.557,
312
+ "13": 0.673,
313
+ "14": 0.645,
314
+ "18": 0.697,
315
+ "19": 0.787
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.705,
319
+ "13": 0.718,
320
+ "14": 0.679,
321
+ "18": 0.73,
322
+ "19": 0.783
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.794,
326
+ "13": 0.744,
327
+ "14": 0.724,
328
+ "18": 0.713,
329
+ "19": 0.833
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.824,
333
+ "13": 0.7,
334
+ "14": 0.845,
335
+ "18": 0.678,
336
+ "19": 0.848
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.838,
340
+ "13": 0.694,
341
+ "14": 0.876,
342
+ "18": 0.689,
343
+ "19": 0.863
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.844,
347
+ "13": 0.804,
348
+ "14": 0.893,
349
+ "18": 0.847,
350
+ "19": 0.904
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9640000462532043,
356
+ "21": 0.9260000586509705,
357
+ "22": 0.921000063419342,
358
+ "25": 0.9690000414848328,
359
+ "26": 0.8980000615119934
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9570000171661377,
363
+ "21": 0.9150000214576721,
364
+ "22": 0.9230000376701355,
365
+ "25": 0.9610000252723694,
366
+ "26": 0.89000004529953
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.716,
370
+ "21": 0.761,
371
+ "22": 0.648,
372
+ "25": 0.692,
373
+ "26": 0.618
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.805,
377
+ "21": 0.762,
378
+ "22": 0.649,
379
+ "25": 0.766,
380
+ "26": 0.671
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.875,
384
+ "21": 0.783,
385
+ "22": 0.711,
386
+ "25": 0.782,
387
+ "26": 0.671
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.893,
391
+ "21": 0.794,
392
+ "22": 0.799,
393
+ "25": 0.861,
394
+ "26": 0.655
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.909,
398
+ "21": 0.795,
399
+ "22": 0.832,
400
+ "25": 0.869,
401
+ "26": 0.64
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.929,
405
+ "21": 0.862,
406
+ "22": 0.854,
407
+ "25": 0.881,
408
+ "26": 0.735
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9510000348091125,
414
+ "2": 0.937000036239624,
415
+ "3": 0.9190000295639038,
416
+ "5": 0.921000063419342,
417
+ "6": 0.89000004529953
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.9460000395774841,
421
+ "2": 0.9330000281333923,
422
+ "3": 0.9130000472068787,
423
+ "5": 0.9160000681877136,
424
+ "6": 0.8500000238418579
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.674,
428
+ "2": 0.587,
429
+ "3": 0.601,
430
+ "5": 0.583,
431
+ "6": 0.593
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.737,
435
+ "2": 0.632,
436
+ "3": 0.605,
437
+ "5": 0.634,
438
+ "6": 0.638
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.763,
442
+ "2": 0.626,
443
+ "3": 0.63,
444
+ "5": 0.656,
445
+ "6": 0.689
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.876,
449
+ "2": 0.758,
450
+ "3": 0.627,
451
+ "5": 0.863,
452
+ "6": 0.721
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.908,
456
+ "2": 0.86,
457
+ "3": 0.737,
458
+ "5": 0.889,
459
+ "6": 0.724
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.905,
463
+ "2": 0.861,
464
+ "3": 0.778,
465
+ "5": 0.886,
466
+ "6": 0.745
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9790000319480896,
472
+ "5.0": 0.9780000448226929
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9820000529289246,
476
+ "5.0": 0.9800000190734863
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.802,
492
+ "5.0": 0.802
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.944,
496
+ "5.0": 0.944
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.939,
500
+ "5.0": 0.939
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9540000557899475,
506
+ "Python": 0.9880000352859497,
507
+ "HTML": 0.9860000610351562,
508
+ "Java": 0.9640000462532043,
509
+ "PHP": 0.9590000510215759
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9450000524520874,
513
+ "Python": 0.9890000224113464,
514
+ "HTML": 0.987000048160553,
515
+ "Java": 0.9620000720024109,
516
+ "PHP": 0.9530000686645508
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.669,
520
+ "Python": 0.638,
521
+ "HTML": 0.788,
522
+ "Java": 0.621,
523
+ "PHP": 0.601
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.656,
527
+ "Python": 0.671,
528
+ "HTML": 0.811,
529
+ "Java": 0.678,
530
+ "PHP": 0.631
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.744,
534
+ "Python": 0.735,
535
+ "HTML": 0.904,
536
+ "Java": 0.726,
537
+ "PHP": 0.672
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.617,
541
+ "Python": 0.928,
542
+ "HTML": 0.928,
543
+ "Java": 0.735,
544
+ "PHP": 0.892
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.645,
548
+ "Python": 0.941,
549
+ "HTML": 0.936,
550
+ "Java": 0.846,
551
+ "PHP": 0.921
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.804,
555
+ "Python": 0.934,
556
+ "HTML": 0.936,
557
+ "Java": 0.835,
558
+ "PHP": 0.925
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9430000185966492,
564
+ "1": 0.9860000610351562,
565
+ "2": 0.9350000619888306,
566
+ "3": 0.9410000443458557
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.937000036239624,
570
+ "1": 0.987000048160553,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9520000219345093
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.586,
576
+ "1": 0.658,
577
+ "2": 0.669,
578
+ "3": 0.644
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.809,
582
+ "1": 0.8,
583
+ "2": 0.691,
584
+ "3": 0.814
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.827,
588
+ "1": 0.877,
589
+ "2": 0.748,
590
+ "3": 0.838
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.732,
594
+ "1": 0.783,
595
+ "2": 0.664,
596
+ "3": 0.68
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.763,
600
+ "1": 0.942,
601
+ "2": 0.801,
602
+ "3": 0.814
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.777,
606
+ "1": 0.949,
607
+ "2": 0.833,
608
+ "3": 0.839
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.999000072479248,
614
+ "fr": 1.0,
615
+ "de": 1.0,
616
+ "es": 1.0,
617
+ "nl": 1.0
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 1.0,
621
+ "fr": 1.0,
622
+ "de": 1.0,
623
+ "es": 0.9980000257492065,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.742,
628
+ "fr": 0.575,
629
+ "de": 0.737,
630
+ "es": 0.504,
631
+ "nl": 0.643
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.822,
635
+ "fr": 0.593,
636
+ "de": 0.83,
637
+ "es": 0.905,
638
+ "nl": 0.743
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.898,
642
+ "fr": 0.908,
643
+ "de": 0.908,
644
+ "es": 0.982,
645
+ "nl": 0.856
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.592,
649
+ "fr": 0.996,
650
+ "de": 0.941,
651
+ "es": 0.984,
652
+ "nl": 0.542
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.999,
656
+ "fr": 0.995,
657
+ "de": 0.989,
658
+ "es": 0.984,
659
+ "nl": 0.999
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.998,
663
+ "fr": 0.995,
664
+ "de": 0.992,
665
+ "es": 0.998,
666
+ "nl": 0.998
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "0c3396ff-678d-434f-a55d-e45d1dbeb325",
30
+ "datetime_epoch_millis": 1740125381433,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9571125406771899,
34
+ "llm_top_1_test_accuracy": 0.6527562499999999,
35
+ "llm_top_2_test_accuracy": 0.7210875,
36
+ "llm_top_5_test_accuracy": 0.7801125,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9591125443577766,
44
+ "sae_top_1_test_accuracy": 0.7999624999999999,
45
+ "sae_top_2_test_accuracy": 0.8544125,
46
+ "sae_top_5_test_accuracy": 0.89533125,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9630000591278076,
65
+ "sae_top_1_test_accuracy": 0.8278000000000001,
66
+ "sae_top_2_test_accuracy": 0.9036,
67
+ "sae_top_5_test_accuracy": 0.9108,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9502000451087952,
76
+ "llm_top_1_test_accuracy": 0.6718,
77
+ "llm_top_2_test_accuracy": 0.7230000000000001,
78
+ "llm_top_5_test_accuracy": 0.7615999999999999,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9506000399589538,
84
+ "sae_top_1_test_accuracy": 0.7116,
85
+ "sae_top_2_test_accuracy": 0.7498,
86
+ "sae_top_5_test_accuracy": 0.8626000000000001,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9292000293731689,
95
+ "llm_top_1_test_accuracy": 0.687,
96
+ "llm_top_2_test_accuracy": 0.7306000000000001,
97
+ "llm_top_5_test_accuracy": 0.7644,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.934000039100647,
103
+ "sae_top_1_test_accuracy": 0.7852,
104
+ "sae_top_2_test_accuracy": 0.8220000000000001,
105
+ "sae_top_5_test_accuracy": 0.8664,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9116000413894654,
114
+ "llm_top_1_test_accuracy": 0.6076,
115
+ "llm_top_2_test_accuracy": 0.6492,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9258000493049622,
122
+ "sae_top_1_test_accuracy": 0.7925999999999999,
123
+ "sae_top_2_test_accuracy": 0.8088000000000001,
124
+ "sae_top_5_test_accuracy": 0.8413999999999999,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9785000383853912,
141
+ "sae_top_1_test_accuracy": 0.885,
142
+ "sae_top_2_test_accuracy": 0.942,
143
+ "sae_top_5_test_accuracy": 0.95,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9672000527381897,
152
+ "llm_top_1_test_accuracy": 0.6634,
153
+ "llm_top_2_test_accuracy": 0.6894,
154
+ "llm_top_5_test_accuracy": 0.7562,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9696000576019287,
160
+ "sae_top_1_test_accuracy": 0.7905999999999999,
161
+ "sae_top_2_test_accuracy": 0.8058,
162
+ "sae_top_5_test_accuracy": 0.8725999999999999,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9515000432729721,
171
+ "llm_top_1_test_accuracy": 0.63925,
172
+ "llm_top_2_test_accuracy": 0.7785,
173
+ "llm_top_5_test_accuracy": 0.8225,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9520000517368317,
179
+ "sae_top_1_test_accuracy": 0.7035,
180
+ "sae_top_2_test_accuracy": 0.8295,
181
+ "sae_top_5_test_accuracy": 0.86225,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000196456909,
190
+ "llm_top_1_test_accuracy": 0.6401999999999999,
191
+ "llm_top_2_test_accuracy": 0.7786000000000001,
192
+ "llm_top_5_test_accuracy": 0.9103999999999999,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9994000196456909,
198
+ "sae_top_1_test_accuracy": 0.9033999999999999,
199
+ "sae_top_2_test_accuracy": 0.9738,
200
+ "sae_top_5_test_accuracy": 0.9965999999999999,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_2",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 65536,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "standard_april_update",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9450000524520874,
240
+ "1": 0.9620000720024109,
241
+ "2": 0.9540000557899475,
242
+ "6": 0.9850000739097595,
243
+ "9": 0.9690000414848328
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.873,
275
+ "1": 0.699,
276
+ "2": 0.796,
277
+ "6": 0.969,
278
+ "9": 0.802
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.867,
282
+ "1": 0.821,
283
+ "2": 0.919,
284
+ "6": 0.973,
285
+ "9": 0.938
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.88,
289
+ "1": 0.83,
290
+ "2": 0.929,
291
+ "6": 0.971,
292
+ "9": 0.944
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9520000219345093,
298
+ "13": 0.956000030040741,
299
+ "14": 0.9580000638961792,
300
+ "18": 0.9220000505447388,
301
+ "19": 0.9650000333786011
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.9550000429153442,
305
+ "13": 0.9550000429153442,
306
+ "14": 0.9550000429153442,
307
+ "18": 0.9330000281333923,
308
+ "19": 0.9530000686645508
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.557,
312
+ "13": 0.673,
313
+ "14": 0.645,
314
+ "18": 0.697,
315
+ "19": 0.787
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.705,
319
+ "13": 0.718,
320
+ "14": 0.679,
321
+ "18": 0.73,
322
+ "19": 0.783
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.794,
326
+ "13": 0.744,
327
+ "14": 0.724,
328
+ "18": 0.713,
329
+ "19": 0.833
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.672,
333
+ "13": 0.695,
334
+ "14": 0.677,
335
+ "18": 0.669,
336
+ "19": 0.845
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.741,
340
+ "13": 0.681,
341
+ "14": 0.732,
342
+ "18": 0.701,
343
+ "19": 0.894
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.862,
347
+ "13": 0.798,
348
+ "14": 0.897,
349
+ "18": 0.85,
350
+ "19": 0.906
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9600000381469727,
356
+ "21": 0.9310000538825989,
357
+ "22": 0.909000039100647,
358
+ "25": 0.9650000333786011,
359
+ "26": 0.9050000309944153
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9570000171661377,
363
+ "21": 0.9150000214576721,
364
+ "22": 0.9230000376701355,
365
+ "25": 0.9610000252723694,
366
+ "26": 0.89000004529953
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.716,
370
+ "21": 0.761,
371
+ "22": 0.648,
372
+ "25": 0.692,
373
+ "26": 0.618
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.805,
377
+ "21": 0.762,
378
+ "22": 0.649,
379
+ "25": 0.766,
380
+ "26": 0.671
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.875,
384
+ "21": 0.783,
385
+ "22": 0.711,
386
+ "25": 0.782,
387
+ "26": 0.671
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.899,
391
+ "21": 0.627,
392
+ "22": 0.863,
393
+ "25": 0.87,
394
+ "26": 0.667
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.909,
398
+ "21": 0.806,
399
+ "22": 0.853,
400
+ "25": 0.867,
401
+ "26": 0.675
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.912,
405
+ "21": 0.826,
406
+ "22": 0.873,
407
+ "25": 0.876,
408
+ "26": 0.845
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9530000686645508,
414
+ "2": 0.9450000524520874,
415
+ "3": 0.9240000247955322,
416
+ "5": 0.9310000538825989,
417
+ "6": 0.8760000467300415
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.9460000395774841,
421
+ "2": 0.9330000281333923,
422
+ "3": 0.9130000472068787,
423
+ "5": 0.9160000681877136,
424
+ "6": 0.8500000238418579
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.674,
428
+ "2": 0.587,
429
+ "3": 0.601,
430
+ "5": 0.583,
431
+ "6": 0.593
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.737,
435
+ "2": 0.632,
436
+ "3": 0.605,
437
+ "5": 0.634,
438
+ "6": 0.638
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.763,
442
+ "2": 0.626,
443
+ "3": 0.63,
444
+ "5": 0.656,
445
+ "6": 0.689
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.835,
449
+ "2": 0.852,
450
+ "3": 0.692,
451
+ "5": 0.86,
452
+ "6": 0.724
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.906,
456
+ "2": 0.855,
457
+ "3": 0.679,
458
+ "5": 0.871,
459
+ "6": 0.733
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.907,
463
+ "2": 0.885,
464
+ "3": 0.76,
465
+ "5": 0.891,
466
+ "6": 0.764
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9780000448226929,
472
+ "5.0": 0.9790000319480896
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9820000529289246,
476
+ "5.0": 0.9800000190734863
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.885,
492
+ "5.0": 0.885
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.942,
496
+ "5.0": 0.942
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.95,
500
+ "5.0": 0.95
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9520000219345093,
506
+ "Python": 0.9900000691413879,
507
+ "HTML": 0.9850000739097595,
508
+ "Java": 0.9620000720024109,
509
+ "PHP": 0.9590000510215759
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9450000524520874,
513
+ "Python": 0.9890000224113464,
514
+ "HTML": 0.987000048160553,
515
+ "Java": 0.9620000720024109,
516
+ "PHP": 0.9530000686645508
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.669,
520
+ "Python": 0.638,
521
+ "HTML": 0.788,
522
+ "Java": 0.621,
523
+ "PHP": 0.601
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.656,
527
+ "Python": 0.671,
528
+ "HTML": 0.811,
529
+ "Java": 0.678,
530
+ "PHP": 0.631
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.744,
534
+ "Python": 0.735,
535
+ "HTML": 0.904,
536
+ "Java": 0.726,
537
+ "PHP": 0.672
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.613,
541
+ "Python": 0.919,
542
+ "HTML": 0.909,
543
+ "Java": 0.667,
544
+ "PHP": 0.845
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.62,
548
+ "Python": 0.917,
549
+ "HTML": 0.918,
550
+ "Java": 0.678,
551
+ "PHP": 0.896
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.771,
555
+ "Python": 0.92,
556
+ "HTML": 0.92,
557
+ "Java": 0.849,
558
+ "PHP": 0.903
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9410000443458557,
564
+ "1": 0.9860000610351562,
565
+ "2": 0.9320000410079956,
566
+ "3": 0.9490000605583191
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.937000036239624,
570
+ "1": 0.987000048160553,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9520000219345093
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.586,
576
+ "1": 0.658,
577
+ "2": 0.669,
578
+ "3": 0.644
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.809,
582
+ "1": 0.8,
583
+ "2": 0.691,
584
+ "3": 0.814
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.827,
588
+ "1": 0.877,
589
+ "2": 0.748,
590
+ "3": 0.838
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.699,
594
+ "1": 0.791,
595
+ "2": 0.664,
596
+ "3": 0.66
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.771,
600
+ "1": 0.93,
601
+ "2": 0.801,
602
+ "3": 0.816
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.824,
606
+ "1": 0.937,
607
+ "2": 0.815,
608
+ "3": 0.873
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 1.0,
614
+ "fr": 1.0,
615
+ "de": 1.0,
616
+ "es": 0.999000072479248,
617
+ "nl": 0.9980000257492065
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 1.0,
621
+ "fr": 1.0,
622
+ "de": 1.0,
623
+ "es": 0.9980000257492065,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.742,
628
+ "fr": 0.575,
629
+ "de": 0.737,
630
+ "es": 0.504,
631
+ "nl": 0.643
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.822,
635
+ "fr": 0.593,
636
+ "de": 0.83,
637
+ "es": 0.905,
638
+ "nl": 0.743
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.898,
642
+ "fr": 0.908,
643
+ "de": 0.908,
644
+ "es": 0.982,
645
+ "nl": 0.856
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.625,
649
+ "fr": 0.992,
650
+ "de": 0.924,
651
+ "es": 0.976,
652
+ "nl": 1.0
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.978,
656
+ "fr": 0.987,
657
+ "de": 0.925,
658
+ "es": 0.981,
659
+ "nl": 0.998
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.998,
663
+ "fr": 0.995,
664
+ "de": 0.996,
665
+ "es": 0.995,
666
+ "nl": 0.999
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "66fb8796-995b-4926-9e12-56e842e84aea",
30
+ "datetime_epoch_millis": 1740125497759,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9571125406771899,
34
+ "llm_top_1_test_accuracy": 0.6527562499999999,
35
+ "llm_top_2_test_accuracy": 0.7210875,
36
+ "llm_top_5_test_accuracy": 0.7801125,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9568437989801168,
44
+ "sae_top_1_test_accuracy": 0.80621875,
45
+ "sae_top_2_test_accuracy": 0.856725,
46
+ "sae_top_5_test_accuracy": 0.8963875000000001,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.962000060081482,
65
+ "sae_top_1_test_accuracy": 0.8282,
66
+ "sae_top_2_test_accuracy": 0.8806,
67
+ "sae_top_5_test_accuracy": 0.9039999999999999,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9502000451087952,
76
+ "llm_top_1_test_accuracy": 0.6718,
77
+ "llm_top_2_test_accuracy": 0.7230000000000001,
78
+ "llm_top_5_test_accuracy": 0.7615999999999999,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9482000470161438,
84
+ "sae_top_1_test_accuracy": 0.7472,
85
+ "sae_top_2_test_accuracy": 0.7638,
86
+ "sae_top_5_test_accuracy": 0.8672000000000001,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9292000293731689,
95
+ "llm_top_1_test_accuracy": 0.687,
96
+ "llm_top_2_test_accuracy": 0.7306000000000001,
97
+ "llm_top_5_test_accuracy": 0.7644,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9358000516891479,
103
+ "sae_top_1_test_accuracy": 0.8134,
104
+ "sae_top_2_test_accuracy": 0.8245999999999999,
105
+ "sae_top_5_test_accuracy": 0.875,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9116000413894654,
114
+ "llm_top_1_test_accuracy": 0.6076,
115
+ "llm_top_2_test_accuracy": 0.6492,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9162000298500061,
122
+ "sae_top_1_test_accuracy": 0.7706000000000001,
123
+ "sae_top_2_test_accuracy": 0.8220000000000001,
124
+ "sae_top_5_test_accuracy": 0.853,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9745000302791595,
141
+ "sae_top_1_test_accuracy": 0.906,
142
+ "sae_top_2_test_accuracy": 0.95,
143
+ "sae_top_5_test_accuracy": 0.95,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9672000527381897,
152
+ "llm_top_1_test_accuracy": 0.6634,
153
+ "llm_top_2_test_accuracy": 0.6894,
154
+ "llm_top_5_test_accuracy": 0.7562,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9696000576019287,
160
+ "sae_top_1_test_accuracy": 0.8046,
161
+ "sae_top_2_test_accuracy": 0.849,
162
+ "sae_top_5_test_accuracy": 0.8756,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9515000432729721,
171
+ "llm_top_1_test_accuracy": 0.63925,
172
+ "llm_top_2_test_accuracy": 0.7785,
173
+ "llm_top_5_test_accuracy": 0.8225,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9492500573396683,
179
+ "sae_top_1_test_accuracy": 0.70175,
180
+ "sae_top_2_test_accuracy": 0.8,
181
+ "sae_top_5_test_accuracy": 0.8544999999999999,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000196456909,
190
+ "llm_top_1_test_accuracy": 0.6401999999999999,
191
+ "llm_top_2_test_accuracy": 0.7786000000000001,
192
+ "llm_top_5_test_accuracy": 0.9103999999999999,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9992000579833984,
198
+ "sae_top_1_test_accuracy": 0.8779999999999999,
199
+ "sae_top_2_test_accuracy": 0.9638,
200
+ "sae_top_5_test_accuracy": 0.9918000000000001,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_3",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 65536,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "standard_april_update",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9440000653266907,
240
+ "1": 0.9590000510215759,
241
+ "2": 0.9500000476837158,
242
+ "6": 0.9860000610351562,
243
+ "9": 0.971000075340271
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.88,
275
+ "1": 0.672,
276
+ "2": 0.809,
277
+ "6": 0.979,
278
+ "9": 0.801
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.887,
282
+ "1": 0.814,
283
+ "2": 0.855,
284
+ "6": 0.976,
285
+ "9": 0.871
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.885,
289
+ "1": 0.809,
290
+ "2": 0.92,
291
+ "6": 0.986,
292
+ "9": 0.92
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9590000510215759,
298
+ "13": 0.9500000476837158,
299
+ "14": 0.9460000395774841,
300
+ "18": 0.9220000505447388,
301
+ "19": 0.9640000462532043
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.9550000429153442,
305
+ "13": 0.9550000429153442,
306
+ "14": 0.9550000429153442,
307
+ "18": 0.9330000281333923,
308
+ "19": 0.9530000686645508
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.557,
312
+ "13": 0.673,
313
+ "14": 0.645,
314
+ "18": 0.697,
315
+ "19": 0.787
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.705,
319
+ "13": 0.718,
320
+ "14": 0.679,
321
+ "18": 0.73,
322
+ "19": 0.783
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.794,
326
+ "13": 0.744,
327
+ "14": 0.724,
328
+ "18": 0.713,
329
+ "19": 0.833
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.748,
333
+ "13": 0.692,
334
+ "14": 0.748,
335
+ "18": 0.705,
336
+ "19": 0.843
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.735,
340
+ "13": 0.692,
341
+ "14": 0.789,
342
+ "18": 0.704,
343
+ "19": 0.899
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.9,
347
+ "13": 0.756,
348
+ "14": 0.866,
349
+ "18": 0.914,
350
+ "19": 0.9
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9550000429153442,
356
+ "21": 0.9220000505447388,
357
+ "22": 0.9270000457763672,
358
+ "25": 0.9630000591278076,
359
+ "26": 0.9120000600814819
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9570000171661377,
363
+ "21": 0.9150000214576721,
364
+ "22": 0.9230000376701355,
365
+ "25": 0.9610000252723694,
366
+ "26": 0.89000004529953
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.716,
370
+ "21": 0.761,
371
+ "22": 0.648,
372
+ "25": 0.692,
373
+ "26": 0.618
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.805,
377
+ "21": 0.762,
378
+ "22": 0.649,
379
+ "25": 0.766,
380
+ "26": 0.671
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.875,
384
+ "21": 0.783,
385
+ "22": 0.711,
386
+ "25": 0.782,
387
+ "26": 0.671
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.895,
391
+ "21": 0.759,
392
+ "22": 0.873,
393
+ "25": 0.869,
394
+ "26": 0.671
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.904,
398
+ "21": 0.798,
399
+ "22": 0.876,
400
+ "25": 0.873,
401
+ "26": 0.672
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.936,
405
+ "21": 0.846,
406
+ "22": 0.886,
407
+ "25": 0.877,
408
+ "26": 0.83
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9520000219345093,
414
+ "2": 0.9350000619888306,
415
+ "3": 0.9150000214576721,
416
+ "5": 0.9050000309944153,
417
+ "6": 0.8740000128746033
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.9460000395774841,
421
+ "2": 0.9330000281333923,
422
+ "3": 0.9130000472068787,
423
+ "5": 0.9160000681877136,
424
+ "6": 0.8500000238418579
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.674,
428
+ "2": 0.587,
429
+ "3": 0.601,
430
+ "5": 0.583,
431
+ "6": 0.593
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.737,
435
+ "2": 0.632,
436
+ "3": 0.605,
437
+ "5": 0.634,
438
+ "6": 0.638
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.763,
442
+ "2": 0.626,
443
+ "3": 0.63,
444
+ "5": 0.656,
445
+ "6": 0.689
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.856,
449
+ "2": 0.791,
450
+ "3": 0.689,
451
+ "5": 0.877,
452
+ "6": 0.64
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.867,
456
+ "2": 0.867,
457
+ "3": 0.765,
458
+ "5": 0.876,
459
+ "6": 0.735
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.916,
463
+ "2": 0.882,
464
+ "3": 0.813,
465
+ "5": 0.888,
466
+ "6": 0.766
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9750000238418579,
472
+ "5.0": 0.9740000367164612
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9820000529289246,
476
+ "5.0": 0.9800000190734863
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.906,
492
+ "5.0": 0.906
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.95,
496
+ "5.0": 0.95
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.95,
500
+ "5.0": 0.95
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9580000638961792,
506
+ "Python": 0.9820000529289246,
507
+ "HTML": 0.9860000610351562,
508
+ "Java": 0.9640000462532043,
509
+ "PHP": 0.9580000638961792
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9450000524520874,
513
+ "Python": 0.9890000224113464,
514
+ "HTML": 0.987000048160553,
515
+ "Java": 0.9620000720024109,
516
+ "PHP": 0.9530000686645508
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.669,
520
+ "Python": 0.638,
521
+ "HTML": 0.788,
522
+ "Java": 0.621,
523
+ "PHP": 0.601
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.656,
527
+ "Python": 0.671,
528
+ "HTML": 0.811,
529
+ "Java": 0.678,
530
+ "PHP": 0.631
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.744,
534
+ "Python": 0.735,
535
+ "HTML": 0.904,
536
+ "Java": 0.726,
537
+ "PHP": 0.672
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.61,
541
+ "Python": 0.945,
542
+ "HTML": 0.894,
543
+ "Java": 0.676,
544
+ "PHP": 0.898
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.689,
548
+ "Python": 0.944,
549
+ "HTML": 0.892,
550
+ "Java": 0.804,
551
+ "PHP": 0.916
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.751,
555
+ "Python": 0.95,
556
+ "HTML": 0.914,
557
+ "Java": 0.848,
558
+ "PHP": 0.915
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9360000491142273,
564
+ "1": 0.9810000658035278,
565
+ "2": 0.9320000410079956,
566
+ "3": 0.9480000734329224
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.937000036239624,
570
+ "1": 0.987000048160553,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9520000219345093
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.586,
576
+ "1": 0.658,
577
+ "2": 0.669,
578
+ "3": 0.644
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.809,
582
+ "1": 0.8,
583
+ "2": 0.691,
584
+ "3": 0.814
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.827,
588
+ "1": 0.877,
589
+ "2": 0.748,
590
+ "3": 0.838
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.783,
594
+ "1": 0.681,
595
+ "2": 0.658,
596
+ "3": 0.685
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.803,
600
+ "1": 0.939,
601
+ "2": 0.655,
602
+ "3": 0.803
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.828,
606
+ "1": 0.936,
607
+ "2": 0.778,
608
+ "3": 0.876
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.999000072479248,
614
+ "fr": 0.999000072479248,
615
+ "de": 0.999000072479248,
616
+ "es": 0.999000072479248,
617
+ "nl": 1.0
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 1.0,
621
+ "fr": 1.0,
622
+ "de": 1.0,
623
+ "es": 0.9980000257492065,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.742,
628
+ "fr": 0.575,
629
+ "de": 0.737,
630
+ "es": 0.504,
631
+ "nl": 0.643
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.822,
635
+ "fr": 0.593,
636
+ "de": 0.83,
637
+ "es": 0.905,
638
+ "nl": 0.743
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.898,
642
+ "fr": 0.908,
643
+ "de": 0.908,
644
+ "es": 0.982,
645
+ "nl": 0.856
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.6,
649
+ "fr": 0.991,
650
+ "de": 0.962,
651
+ "es": 0.84,
652
+ "nl": 0.997
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.904,
656
+ "fr": 0.988,
657
+ "de": 0.96,
658
+ "es": 0.97,
659
+ "nl": 0.997
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 1.0,
663
+ "fr": 0.996,
664
+ "de": 0.97,
665
+ "es": 0.995,
666
+ "nl": 0.998
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "1ac64c53-0cba-4715-a78d-04f957ab7e92",
30
+ "datetime_epoch_millis": 1740125143517,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9571125406771899,
34
+ "llm_top_1_test_accuracy": 0.6527562499999999,
35
+ "llm_top_2_test_accuracy": 0.7210875,
36
+ "llm_top_5_test_accuracy": 0.7801125,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9553937904536725,
44
+ "sae_top_1_test_accuracy": 0.8052124999999999,
45
+ "sae_top_2_test_accuracy": 0.8443125,
46
+ "sae_top_5_test_accuracy": 0.89501875,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.962600028514862,
65
+ "sae_top_1_test_accuracy": 0.8286,
66
+ "sae_top_2_test_accuracy": 0.8746,
67
+ "sae_top_5_test_accuracy": 0.9122,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9502000451087952,
76
+ "llm_top_1_test_accuracy": 0.6718,
77
+ "llm_top_2_test_accuracy": 0.7230000000000001,
78
+ "llm_top_5_test_accuracy": 0.7615999999999999,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9468000411987305,
84
+ "sae_top_1_test_accuracy": 0.7358,
85
+ "sae_top_2_test_accuracy": 0.7529999999999999,
86
+ "sae_top_5_test_accuracy": 0.8615999999999999,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9292000293731689,
95
+ "llm_top_1_test_accuracy": 0.687,
96
+ "llm_top_2_test_accuracy": 0.7306000000000001,
97
+ "llm_top_5_test_accuracy": 0.7644,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9276000499725342,
103
+ "sae_top_1_test_accuracy": 0.8093999999999999,
104
+ "sae_top_2_test_accuracy": 0.8384,
105
+ "sae_top_5_test_accuracy": 0.8712,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9116000413894654,
114
+ "llm_top_1_test_accuracy": 0.6076,
115
+ "llm_top_2_test_accuracy": 0.6492,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9154000401496887,
122
+ "sae_top_1_test_accuracy": 0.7402,
123
+ "sae_top_2_test_accuracy": 0.8008000000000001,
124
+ "sae_top_5_test_accuracy": 0.8423999999999999,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9725000560283661,
141
+ "sae_top_1_test_accuracy": 0.894,
142
+ "sae_top_2_test_accuracy": 0.91,
143
+ "sae_top_5_test_accuracy": 0.959,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9672000527381897,
152
+ "llm_top_1_test_accuracy": 0.6634,
153
+ "llm_top_2_test_accuracy": 0.6894,
154
+ "llm_top_5_test_accuracy": 0.7562,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9706000328063965,
160
+ "sae_top_1_test_accuracy": 0.7874000000000001,
161
+ "sae_top_2_test_accuracy": 0.8124,
162
+ "sae_top_5_test_accuracy": 0.8798,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9515000432729721,
171
+ "llm_top_1_test_accuracy": 0.63925,
172
+ "llm_top_2_test_accuracy": 0.7785,
173
+ "llm_top_5_test_accuracy": 0.8225,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9492500424385071,
179
+ "sae_top_1_test_accuracy": 0.7505000000000001,
180
+ "sae_top_2_test_accuracy": 0.7865,
181
+ "sae_top_5_test_accuracy": 0.8427499999999999,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000196456909,
190
+ "llm_top_1_test_accuracy": 0.6401999999999999,
191
+ "llm_top_2_test_accuracy": 0.7786000000000001,
192
+ "llm_top_5_test_accuracy": 0.9103999999999999,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9984000325202942,
198
+ "sae_top_1_test_accuracy": 0.8957999999999998,
199
+ "sae_top_2_test_accuracy": 0.9788,
200
+ "sae_top_5_test_accuracy": 0.9911999999999999,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_4",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 65536,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "standard_april_update",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9380000233650208,
240
+ "1": 0.9610000252723694,
241
+ "2": 0.9520000219345093,
242
+ "6": 0.984000027179718,
243
+ "9": 0.9780000448226929
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.872,
275
+ "1": 0.68,
276
+ "2": 0.84,
277
+ "6": 0.975,
278
+ "9": 0.776
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.883,
282
+ "1": 0.81,
283
+ "2": 0.907,
284
+ "6": 0.974,
285
+ "9": 0.799
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.889,
289
+ "1": 0.817,
290
+ "2": 0.921,
291
+ "6": 0.985,
292
+ "9": 0.949
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9540000557899475,
298
+ "13": 0.9520000219345093,
299
+ "14": 0.9640000462532043,
300
+ "18": 0.9080000519752502,
301
+ "19": 0.956000030040741
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.9550000429153442,
305
+ "13": 0.9550000429153442,
306
+ "14": 0.9550000429153442,
307
+ "18": 0.9330000281333923,
308
+ "19": 0.9530000686645508
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.557,
312
+ "13": 0.673,
313
+ "14": 0.645,
314
+ "18": 0.697,
315
+ "19": 0.787
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.705,
319
+ "13": 0.718,
320
+ "14": 0.679,
321
+ "18": 0.73,
322
+ "19": 0.783
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.794,
326
+ "13": 0.744,
327
+ "14": 0.724,
328
+ "18": 0.713,
329
+ "19": 0.833
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.719,
333
+ "13": 0.689,
334
+ "14": 0.726,
335
+ "18": 0.707,
336
+ "19": 0.838
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.726,
340
+ "13": 0.691,
341
+ "14": 0.763,
342
+ "18": 0.704,
343
+ "19": 0.881
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.899,
347
+ "13": 0.721,
348
+ "14": 0.892,
349
+ "18": 0.887,
350
+ "19": 0.909
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9550000429153442,
356
+ "21": 0.921000063419342,
357
+ "22": 0.9120000600814819,
358
+ "25": 0.9590000510215759,
359
+ "26": 0.8910000324249268
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9570000171661377,
363
+ "21": 0.9150000214576721,
364
+ "22": 0.9230000376701355,
365
+ "25": 0.9610000252723694,
366
+ "26": 0.89000004529953
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.716,
370
+ "21": 0.761,
371
+ "22": 0.648,
372
+ "25": 0.692,
373
+ "26": 0.618
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.805,
377
+ "21": 0.762,
378
+ "22": 0.649,
379
+ "25": 0.766,
380
+ "26": 0.671
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.875,
384
+ "21": 0.783,
385
+ "22": 0.711,
386
+ "25": 0.782,
387
+ "26": 0.671
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.885,
391
+ "21": 0.764,
392
+ "22": 0.872,
393
+ "25": 0.858,
394
+ "26": 0.668
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.899,
398
+ "21": 0.805,
399
+ "22": 0.879,
400
+ "25": 0.86,
401
+ "26": 0.749
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.926,
405
+ "21": 0.834,
406
+ "22": 0.885,
407
+ "25": 0.873,
408
+ "26": 0.838
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9510000348091125,
414
+ "2": 0.9390000700950623,
415
+ "3": 0.9040000438690186,
416
+ "5": 0.9200000166893005,
417
+ "6": 0.8630000352859497
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.9460000395774841,
421
+ "2": 0.9330000281333923,
422
+ "3": 0.9130000472068787,
423
+ "5": 0.9160000681877136,
424
+ "6": 0.8500000238418579
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.674,
428
+ "2": 0.587,
429
+ "3": 0.601,
430
+ "5": 0.583,
431
+ "6": 0.593
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.737,
435
+ "2": 0.632,
436
+ "3": 0.605,
437
+ "5": 0.634,
438
+ "6": 0.638
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.763,
442
+ "2": 0.626,
443
+ "3": 0.63,
444
+ "5": 0.656,
445
+ "6": 0.689
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.845,
449
+ "2": 0.829,
450
+ "3": 0.633,
451
+ "5": 0.76,
452
+ "6": 0.634
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.878,
456
+ "2": 0.861,
457
+ "3": 0.708,
458
+ "5": 0.881,
459
+ "6": 0.676
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.907,
463
+ "2": 0.895,
464
+ "3": 0.763,
465
+ "5": 0.887,
466
+ "6": 0.76
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9730000495910645,
472
+ "5.0": 0.9720000624656677
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9820000529289246,
476
+ "5.0": 0.9800000190734863
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.894,
492
+ "5.0": 0.894
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.91,
496
+ "5.0": 0.91
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.959,
500
+ "5.0": 0.959
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9570000171661377,
506
+ "Python": 0.9830000400543213,
507
+ "HTML": 0.987000048160553,
508
+ "Java": 0.9650000333786011,
509
+ "PHP": 0.9610000252723694
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9450000524520874,
513
+ "Python": 0.9890000224113464,
514
+ "HTML": 0.987000048160553,
515
+ "Java": 0.9620000720024109,
516
+ "PHP": 0.9530000686645508
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.669,
520
+ "Python": 0.638,
521
+ "HTML": 0.788,
522
+ "Java": 0.621,
523
+ "PHP": 0.601
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.656,
527
+ "Python": 0.671,
528
+ "HTML": 0.811,
529
+ "Java": 0.678,
530
+ "PHP": 0.631
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.744,
534
+ "Python": 0.735,
535
+ "HTML": 0.904,
536
+ "Java": 0.726,
537
+ "PHP": 0.672
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.618,
541
+ "Python": 0.949,
542
+ "HTML": 0.795,
543
+ "Java": 0.678,
544
+ "PHP": 0.897
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.622,
548
+ "Python": 0.943,
549
+ "HTML": 0.923,
550
+ "Java": 0.669,
551
+ "PHP": 0.905
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.761,
555
+ "Python": 0.969,
556
+ "HTML": 0.918,
557
+ "Java": 0.826,
558
+ "PHP": 0.925
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9420000314712524,
564
+ "1": 0.9830000400543213,
565
+ "2": 0.9230000376701355,
566
+ "3": 0.9490000605583191
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.937000036239624,
570
+ "1": 0.987000048160553,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9520000219345093
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.586,
576
+ "1": 0.658,
577
+ "2": 0.669,
578
+ "3": 0.644
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.809,
582
+ "1": 0.8,
583
+ "2": 0.691,
584
+ "3": 0.814
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.827,
588
+ "1": 0.877,
589
+ "2": 0.748,
590
+ "3": 0.838
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.716,
594
+ "1": 0.932,
595
+ "2": 0.603,
596
+ "3": 0.751
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.813,
600
+ "1": 0.94,
601
+ "2": 0.635,
602
+ "3": 0.758
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.813,
606
+ "1": 0.938,
607
+ "2": 0.732,
608
+ "3": 0.888
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.9980000257492065,
614
+ "fr": 0.9980000257492065,
615
+ "de": 1.0,
616
+ "es": 0.9970000386238098,
617
+ "nl": 0.999000072479248
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 1.0,
621
+ "fr": 1.0,
622
+ "de": 1.0,
623
+ "es": 0.9980000257492065,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.742,
628
+ "fr": 0.575,
629
+ "de": 0.737,
630
+ "es": 0.504,
631
+ "nl": 0.643
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.822,
635
+ "fr": 0.593,
636
+ "de": 0.83,
637
+ "es": 0.905,
638
+ "nl": 0.743
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.898,
642
+ "fr": 0.908,
643
+ "de": 0.908,
644
+ "es": 0.982,
645
+ "nl": 0.856
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.686,
649
+ "fr": 0.988,
650
+ "de": 0.936,
651
+ "es": 0.873,
652
+ "nl": 0.996
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.979,
656
+ "fr": 0.992,
657
+ "de": 0.936,
658
+ "es": 0.99,
659
+ "nl": 0.997
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.997,
663
+ "fr": 0.996,
664
+ "de": 0.967,
665
+ "es": 0.997,
666
+ "nl": 0.999
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "e6bf7aa2-251f-4bd9-9e64-4e882a4feab6",
30
+ "datetime_epoch_millis": 1740125021721,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9571125406771899,
34
+ "llm_top_1_test_accuracy": 0.6527562499999999,
35
+ "llm_top_2_test_accuracy": 0.7210875,
36
+ "llm_top_5_test_accuracy": 0.7801125,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9544812977313996,
44
+ "sae_top_1_test_accuracy": 0.7810687499999999,
45
+ "sae_top_2_test_accuracy": 0.83790625,
46
+ "sae_top_5_test_accuracy": 0.8870687500000001,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9590000510215759,
65
+ "sae_top_1_test_accuracy": 0.8022,
66
+ "sae_top_2_test_accuracy": 0.8854000000000001,
67
+ "sae_top_5_test_accuracy": 0.9,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9502000451087952,
76
+ "llm_top_1_test_accuracy": 0.6718,
77
+ "llm_top_2_test_accuracy": 0.7230000000000001,
78
+ "llm_top_5_test_accuracy": 0.7615999999999999,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9440000534057618,
84
+ "sae_top_1_test_accuracy": 0.7405999999999999,
85
+ "sae_top_2_test_accuracy": 0.7498,
86
+ "sae_top_5_test_accuracy": 0.8848,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9292000293731689,
95
+ "llm_top_1_test_accuracy": 0.687,
96
+ "llm_top_2_test_accuracy": 0.7306000000000001,
97
+ "llm_top_5_test_accuracy": 0.7644,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9284000515937805,
103
+ "sae_top_1_test_accuracy": 0.8099999999999999,
104
+ "sae_top_2_test_accuracy": 0.8310000000000001,
105
+ "sae_top_5_test_accuracy": 0.849,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9116000413894654,
114
+ "llm_top_1_test_accuracy": 0.6076,
115
+ "llm_top_2_test_accuracy": 0.6492,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.916800034046173,
122
+ "sae_top_1_test_accuracy": 0.6922,
123
+ "sae_top_2_test_accuracy": 0.7604,
124
+ "sae_top_5_test_accuracy": 0.8432000000000001,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9665000438690186,
141
+ "sae_top_1_test_accuracy": 0.883,
142
+ "sae_top_2_test_accuracy": 0.949,
143
+ "sae_top_5_test_accuracy": 0.954,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9672000527381897,
152
+ "llm_top_1_test_accuracy": 0.6634,
153
+ "llm_top_2_test_accuracy": 0.6894,
154
+ "llm_top_5_test_accuracy": 0.7562,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9706000447273254,
160
+ "sae_top_1_test_accuracy": 0.7254,
161
+ "sae_top_2_test_accuracy": 0.8144,
162
+ "sae_top_5_test_accuracy": 0.8554,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9515000432729721,
171
+ "llm_top_1_test_accuracy": 0.63925,
172
+ "llm_top_2_test_accuracy": 0.7785,
173
+ "llm_top_5_test_accuracy": 0.8225,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9517500400543213,
179
+ "sae_top_1_test_accuracy": 0.70675,
180
+ "sae_top_2_test_accuracy": 0.76925,
181
+ "sae_top_5_test_accuracy": 0.81975,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000196456909,
190
+ "llm_top_1_test_accuracy": 0.6401999999999999,
191
+ "llm_top_2_test_accuracy": 0.7786000000000001,
192
+ "llm_top_5_test_accuracy": 0.9103999999999999,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9988000631332398,
198
+ "sae_top_1_test_accuracy": 0.8884000000000001,
199
+ "sae_top_2_test_accuracy": 0.9440000000000002,
200
+ "sae_top_5_test_accuracy": 0.9904,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_5",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 65536,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "standard_april_update",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9470000267028809,
240
+ "1": 0.956000030040741,
241
+ "2": 0.9440000653266907,
242
+ "6": 0.9850000739097595,
243
+ "9": 0.9630000591278076
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.594,
275
+ "1": 0.676,
276
+ "2": 0.843,
277
+ "6": 0.975,
278
+ "9": 0.923
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.875,
282
+ "1": 0.802,
283
+ "2": 0.846,
284
+ "6": 0.975,
285
+ "9": 0.929
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.875,
289
+ "1": 0.803,
290
+ "2": 0.907,
291
+ "6": 0.977,
292
+ "9": 0.938
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9550000429153442,
298
+ "13": 0.9450000524520874,
299
+ "14": 0.9450000524520874,
300
+ "18": 0.9170000553131104,
301
+ "19": 0.9580000638961792
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.9550000429153442,
305
+ "13": 0.9550000429153442,
306
+ "14": 0.9550000429153442,
307
+ "18": 0.9330000281333923,
308
+ "19": 0.9530000686645508
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.557,
312
+ "13": 0.673,
313
+ "14": 0.645,
314
+ "18": 0.697,
315
+ "19": 0.787
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.705,
319
+ "13": 0.718,
320
+ "14": 0.679,
321
+ "18": 0.73,
322
+ "19": 0.783
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.794,
326
+ "13": 0.744,
327
+ "14": 0.724,
328
+ "18": 0.713,
329
+ "19": 0.833
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.732,
333
+ "13": 0.7,
334
+ "14": 0.726,
335
+ "18": 0.699,
336
+ "19": 0.846
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.731,
340
+ "13": 0.735,
341
+ "14": 0.723,
342
+ "18": 0.701,
343
+ "19": 0.859
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.901,
347
+ "13": 0.825,
348
+ "14": 0.878,
349
+ "18": 0.913,
350
+ "19": 0.907
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9530000686645508,
356
+ "21": 0.9300000667572021,
357
+ "22": 0.9150000214576721,
358
+ "25": 0.9540000557899475,
359
+ "26": 0.89000004529953
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9570000171661377,
363
+ "21": 0.9150000214576721,
364
+ "22": 0.9230000376701355,
365
+ "25": 0.9610000252723694,
366
+ "26": 0.89000004529953
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.716,
370
+ "21": 0.761,
371
+ "22": 0.648,
372
+ "25": 0.692,
373
+ "26": 0.618
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.805,
377
+ "21": 0.762,
378
+ "22": 0.649,
379
+ "25": 0.766,
380
+ "26": 0.671
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.875,
384
+ "21": 0.783,
385
+ "22": 0.711,
386
+ "25": 0.782,
387
+ "26": 0.671
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.889,
391
+ "21": 0.749,
392
+ "22": 0.868,
393
+ "25": 0.864,
394
+ "26": 0.68
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.887,
398
+ "21": 0.8,
399
+ "22": 0.87,
400
+ "25": 0.858,
401
+ "26": 0.74
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.919,
405
+ "21": 0.802,
406
+ "22": 0.858,
407
+ "25": 0.879,
408
+ "26": 0.787
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9430000185966492,
414
+ "2": 0.9350000619888306,
415
+ "3": 0.9200000166893005,
416
+ "5": 0.9150000214576721,
417
+ "6": 0.8710000514984131
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.9460000395774841,
421
+ "2": 0.9330000281333923,
422
+ "3": 0.9130000472068787,
423
+ "5": 0.9160000681877136,
424
+ "6": 0.8500000238418579
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.674,
428
+ "2": 0.587,
429
+ "3": 0.601,
430
+ "5": 0.583,
431
+ "6": 0.593
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.737,
435
+ "2": 0.632,
436
+ "3": 0.605,
437
+ "5": 0.634,
438
+ "6": 0.638
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.763,
442
+ "2": 0.626,
443
+ "3": 0.63,
444
+ "5": 0.656,
445
+ "6": 0.689
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.67,
449
+ "2": 0.834,
450
+ "3": 0.638,
451
+ "5": 0.777,
452
+ "6": 0.542
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.785,
456
+ "2": 0.836,
457
+ "3": 0.706,
458
+ "5": 0.785,
459
+ "6": 0.69
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.902,
463
+ "2": 0.893,
464
+ "3": 0.787,
465
+ "5": 0.894,
466
+ "6": 0.74
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9670000672340393,
472
+ "5.0": 0.9660000205039978
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9820000529289246,
476
+ "5.0": 0.9800000190734863
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.883,
492
+ "5.0": 0.883
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.949,
496
+ "5.0": 0.949
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.954,
500
+ "5.0": 0.954
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9520000219345093,
506
+ "Python": 0.9880000352859497,
507
+ "HTML": 0.9820000529289246,
508
+ "Java": 0.9730000495910645,
509
+ "PHP": 0.9580000638961792
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9450000524520874,
513
+ "Python": 0.9890000224113464,
514
+ "HTML": 0.987000048160553,
515
+ "Java": 0.9620000720024109,
516
+ "PHP": 0.9530000686645508
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.669,
520
+ "Python": 0.638,
521
+ "HTML": 0.788,
522
+ "Java": 0.621,
523
+ "PHP": 0.601
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.656,
527
+ "Python": 0.671,
528
+ "HTML": 0.811,
529
+ "Java": 0.678,
530
+ "PHP": 0.631
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.744,
534
+ "Python": 0.735,
535
+ "HTML": 0.904,
536
+ "Java": 0.726,
537
+ "PHP": 0.672
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.608,
541
+ "Python": 0.683,
542
+ "HTML": 0.797,
543
+ "Java": 0.629,
544
+ "PHP": 0.91
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.675,
548
+ "Python": 0.889,
549
+ "HTML": 0.851,
550
+ "Java": 0.75,
551
+ "PHP": 0.907
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.716,
555
+ "Python": 0.957,
556
+ "HTML": 0.857,
557
+ "Java": 0.829,
558
+ "PHP": 0.918
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9350000619888306,
564
+ "1": 0.9830000400543213,
565
+ "2": 0.9280000329017639,
566
+ "3": 0.9610000252723694
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.937000036239624,
570
+ "1": 0.987000048160553,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9520000219345093
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.586,
576
+ "1": 0.658,
577
+ "2": 0.669,
578
+ "3": 0.644
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.809,
582
+ "1": 0.8,
583
+ "2": 0.691,
584
+ "3": 0.814
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.827,
588
+ "1": 0.877,
589
+ "2": 0.748,
590
+ "3": 0.838
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.74,
594
+ "1": 0.79,
595
+ "2": 0.603,
596
+ "3": 0.694
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.773,
600
+ "1": 0.81,
601
+ "2": 0.779,
602
+ "3": 0.715
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.804,
606
+ "1": 0.871,
607
+ "2": 0.809,
608
+ "3": 0.795
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.9980000257492065,
614
+ "fr": 0.999000072479248,
615
+ "de": 0.999000072479248,
616
+ "es": 0.999000072479248,
617
+ "nl": 0.999000072479248
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 1.0,
621
+ "fr": 1.0,
622
+ "de": 1.0,
623
+ "es": 0.9980000257492065,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.742,
628
+ "fr": 0.575,
629
+ "de": 0.737,
630
+ "es": 0.504,
631
+ "nl": 0.643
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.822,
635
+ "fr": 0.593,
636
+ "de": 0.83,
637
+ "es": 0.905,
638
+ "nl": 0.743
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.898,
642
+ "fr": 0.908,
643
+ "de": 0.908,
644
+ "es": 0.982,
645
+ "nl": 0.856
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.696,
649
+ "fr": 0.982,
650
+ "de": 0.956,
651
+ "es": 0.812,
652
+ "nl": 0.996
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.831,
656
+ "fr": 0.978,
657
+ "de": 0.96,
658
+ "es": 0.955,
659
+ "nl": 0.996
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.997,
663
+ "fr": 0.991,
664
+ "de": 0.971,
665
+ "es": 0.995,
666
+ "nl": 0.998
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "9866eebd-cd06-48fb-8a64-b109659cc220",
30
+ "datetime_epoch_millis": 1740086609615,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9588312957435847,
34
+ "llm_top_1_test_accuracy": 0.6504687499999999,
35
+ "llm_top_2_test_accuracy": 0.7214187500000001,
36
+ "llm_top_5_test_accuracy": 0.7812625,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9515750460326672,
44
+ "sae_top_1_test_accuracy": 0.71018125,
45
+ "sae_top_2_test_accuracy": 0.7746000000000001,
46
+ "sae_top_5_test_accuracy": 0.8620687499999999,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9614000439643859,
65
+ "sae_top_1_test_accuracy": 0.7733999999999999,
66
+ "sae_top_2_test_accuracy": 0.8433999999999999,
67
+ "sae_top_5_test_accuracy": 0.8969999999999999,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9542000532150269,
76
+ "llm_top_1_test_accuracy": 0.6686,
77
+ "llm_top_2_test_accuracy": 0.7194,
78
+ "llm_top_5_test_accuracy": 0.763,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9378000497817993,
84
+ "sae_top_1_test_accuracy": 0.6926,
85
+ "sae_top_2_test_accuracy": 0.743,
86
+ "sae_top_5_test_accuracy": 0.8358000000000001,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9332000374794006,
95
+ "llm_top_1_test_accuracy": 0.6826000000000001,
96
+ "llm_top_2_test_accuracy": 0.7456,
97
+ "llm_top_5_test_accuracy": 0.7732,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9264000415802002,
103
+ "sae_top_1_test_accuracy": 0.6918,
104
+ "sae_top_2_test_accuracy": 0.7550000000000001,
105
+ "sae_top_5_test_accuracy": 0.8442000000000001,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9140000343322754,
114
+ "llm_top_1_test_accuracy": 0.6006,
115
+ "llm_top_2_test_accuracy": 0.6432,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9108000516891479,
122
+ "sae_top_1_test_accuracy": 0.6826,
123
+ "sae_top_2_test_accuracy": 0.7562,
124
+ "sae_top_5_test_accuracy": 0.8084,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9640000462532043,
141
+ "sae_top_1_test_accuracy": 0.699,
142
+ "sae_top_2_test_accuracy": 0.786,
143
+ "sae_top_5_test_accuracy": 0.832,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9708000421524048,
152
+ "llm_top_1_test_accuracy": 0.6612,
153
+ "llm_top_2_test_accuracy": 0.6961999999999999,
154
+ "llm_top_5_test_accuracy": 0.7626,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9676000475883484,
160
+ "sae_top_1_test_accuracy": 0.6199999999999999,
161
+ "sae_top_2_test_accuracy": 0.7534000000000001,
162
+ "sae_top_5_test_accuracy": 0.8734000000000002,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9512500613927841,
171
+ "llm_top_1_test_accuracy": 0.6367499999999999,
172
+ "llm_top_2_test_accuracy": 0.76075,
173
+ "llm_top_5_test_accuracy": 0.8255,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9480000436306,
179
+ "sae_top_1_test_accuracy": 0.67825,
180
+ "sae_top_2_test_accuracy": 0.687,
181
+ "sae_top_5_test_accuracy": 0.82475,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000434875489,
190
+ "llm_top_1_test_accuracy": 0.6411999999999999,
191
+ "llm_top_2_test_accuracy": 0.7868,
192
+ "llm_top_5_test_accuracy": 0.9,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9966000437736511,
198
+ "sae_top_1_test_accuracy": 0.8438000000000001,
199
+ "sae_top_2_test_accuracy": 0.8728,
200
+ "sae_top_5_test_accuracy": 0.9810000000000001,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_0",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 16384,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "topk",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.940000057220459,
240
+ "1": 0.9650000333786011,
241
+ "2": 0.9470000267028809,
242
+ "6": 0.9850000739097595,
243
+ "9": 0.9700000286102295
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.615,
275
+ "1": 0.631,
276
+ "2": 0.865,
277
+ "6": 0.836,
278
+ "9": 0.92
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.648,
282
+ "1": 0.8,
283
+ "2": 0.873,
284
+ "6": 0.978,
285
+ "9": 0.918
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.863,
289
+ "1": 0.835,
290
+ "2": 0.875,
291
+ "6": 0.974,
292
+ "9": 0.938
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9490000605583191,
298
+ "13": 0.9380000233650208,
299
+ "14": 0.9390000700950623,
300
+ "18": 0.9150000214576721,
301
+ "19": 0.9480000734329224
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.968000054359436,
305
+ "13": 0.9500000476837158,
306
+ "14": 0.956000030040741,
307
+ "18": 0.9350000619888306,
308
+ "19": 0.9620000720024109
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.545,
312
+ "13": 0.666,
313
+ "14": 0.649,
314
+ "18": 0.693,
315
+ "19": 0.79
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.692,
319
+ "13": 0.724,
320
+ "14": 0.68,
321
+ "18": 0.732,
322
+ "19": 0.769
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.793,
326
+ "13": 0.751,
327
+ "14": 0.718,
328
+ "18": 0.723,
329
+ "19": 0.83
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.602,
333
+ "13": 0.68,
334
+ "14": 0.634,
335
+ "18": 0.713,
336
+ "19": 0.834
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.757,
340
+ "13": 0.684,
341
+ "14": 0.73,
342
+ "18": 0.723,
343
+ "19": 0.821
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.883,
347
+ "13": 0.691,
348
+ "14": 0.866,
349
+ "18": 0.908,
350
+ "19": 0.831
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9580000638961792,
356
+ "21": 0.9300000667572021,
357
+ "22": 0.9050000309944153,
358
+ "25": 0.9470000267028809,
359
+ "26": 0.8920000195503235
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9580000638961792,
363
+ "21": 0.9240000247955322,
364
+ "22": 0.9200000166893005,
365
+ "25": 0.9630000591278076,
366
+ "26": 0.9010000228881836
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.696,
370
+ "21": 0.757,
371
+ "22": 0.637,
372
+ "25": 0.692,
373
+ "26": 0.631
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.818,
377
+ "21": 0.774,
378
+ "22": 0.688,
379
+ "25": 0.762,
380
+ "26": 0.686
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.862,
384
+ "21": 0.792,
385
+ "22": 0.748,
386
+ "25": 0.791,
387
+ "26": 0.673
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.781,
391
+ "21": 0.714,
392
+ "22": 0.479,
393
+ "25": 0.864,
394
+ "26": 0.621
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.917,
398
+ "21": 0.723,
399
+ "22": 0.55,
400
+ "25": 0.875,
401
+ "26": 0.71
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.927,
405
+ "21": 0.852,
406
+ "22": 0.837,
407
+ "25": 0.875,
408
+ "26": 0.73
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9320000410079956,
414
+ "2": 0.9350000619888306,
415
+ "3": 0.9310000538825989,
416
+ "5": 0.9030000567436218,
417
+ "6": 0.8530000448226929
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.940000057220459,
421
+ "2": 0.9340000152587891,
422
+ "3": 0.9200000166893005,
423
+ "5": 0.9150000214576721,
424
+ "6": 0.8610000610351562
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.662,
428
+ "2": 0.599,
429
+ "3": 0.592,
430
+ "5": 0.57,
431
+ "6": 0.58
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.748,
435
+ "2": 0.642,
436
+ "3": 0.6,
437
+ "5": 0.625,
438
+ "6": 0.601
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.764,
442
+ "2": 0.646,
443
+ "3": 0.639,
444
+ "5": 0.638,
445
+ "6": 0.677
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.838,
449
+ "2": 0.606,
450
+ "3": 0.553,
451
+ "5": 0.775,
452
+ "6": 0.641
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.867,
456
+ "2": 0.743,
457
+ "3": 0.673,
458
+ "5": 0.781,
459
+ "6": 0.717
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.91,
463
+ "2": 0.856,
464
+ "3": 0.763,
465
+ "5": 0.782,
466
+ "6": 0.731
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9630000591278076,
472
+ "5.0": 0.9650000333786011
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9800000190734863,
476
+ "5.0": 0.9820000529289246
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.699,
492
+ "5.0": 0.699
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.786,
496
+ "5.0": 0.786
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.832,
500
+ "5.0": 0.832
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9570000171661377,
506
+ "Python": 0.9850000739097595,
507
+ "HTML": 0.9790000319480896,
508
+ "Java": 0.9580000638961792,
509
+ "PHP": 0.9590000510215759
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9580000638961792,
513
+ "Python": 0.9850000739097595,
514
+ "HTML": 0.9890000224113464,
515
+ "Java": 0.9660000205039978,
516
+ "PHP": 0.956000030040741
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.672,
520
+ "Python": 0.643,
521
+ "HTML": 0.788,
522
+ "Java": 0.616,
523
+ "PHP": 0.587
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.653,
527
+ "Python": 0.675,
528
+ "HTML": 0.826,
529
+ "Java": 0.685,
530
+ "PHP": 0.642
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.758,
534
+ "Python": 0.731,
535
+ "HTML": 0.898,
536
+ "Java": 0.722,
537
+ "PHP": 0.704
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.539,
541
+ "Python": 0.62,
542
+ "HTML": 0.687,
543
+ "Java": 0.643,
544
+ "PHP": 0.611
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.618,
548
+ "Python": 0.669,
549
+ "HTML": 0.89,
550
+ "Java": 0.672,
551
+ "PHP": 0.918
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.761,
555
+ "Python": 0.935,
556
+ "HTML": 0.927,
557
+ "Java": 0.825,
558
+ "PHP": 0.919
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9360000491142273,
564
+ "1": 0.9790000319480896,
565
+ "2": 0.9300000667572021,
566
+ "3": 0.9470000267028809
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.940000057220459,
570
+ "1": 0.9850000739097595,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9500000476837158
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.568,
576
+ "1": 0.671,
577
+ "2": 0.667,
578
+ "3": 0.641
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.802,
582
+ "1": 0.802,
583
+ "2": 0.701,
584
+ "3": 0.738
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.813,
588
+ "1": 0.884,
589
+ "2": 0.762,
590
+ "3": 0.843
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.667,
594
+ "1": 0.677,
595
+ "2": 0.744,
596
+ "3": 0.625
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.671,
600
+ "1": 0.674,
601
+ "2": 0.772,
602
+ "3": 0.631
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.812,
606
+ "1": 0.927,
607
+ "2": 0.819,
608
+ "3": 0.741
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.9980000257492065,
614
+ "fr": 0.999000072479248,
615
+ "de": 0.9940000176429749,
616
+ "es": 0.9950000643730164,
617
+ "nl": 0.9970000386238098
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 0.999000072479248,
621
+ "fr": 0.999000072479248,
622
+ "de": 1.0,
623
+ "es": 1.0,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.732,
628
+ "fr": 0.587,
629
+ "de": 0.759,
630
+ "es": 0.489,
631
+ "nl": 0.639
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.834,
635
+ "fr": 0.604,
636
+ "de": 0.84,
637
+ "es": 0.907,
638
+ "nl": 0.749
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.889,
642
+ "fr": 0.927,
643
+ "de": 0.834,
644
+ "es": 0.977,
645
+ "nl": 0.873
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.827,
649
+ "fr": 0.985,
650
+ "de": 0.916,
651
+ "es": 0.732,
652
+ "nl": 0.759
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.842,
656
+ "fr": 0.986,
657
+ "de": 0.931,
658
+ "es": 0.868,
659
+ "nl": 0.737
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.997,
663
+ "fr": 0.993,
664
+ "de": 0.937,
665
+ "es": 0.989,
666
+ "nl": 0.989
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "3282e44d-514a-4270-a8eb-bb5f7a778513",
30
+ "datetime_epoch_millis": 1740086292458,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9588312957435847,
34
+ "llm_top_1_test_accuracy": 0.6504687499999999,
35
+ "llm_top_2_test_accuracy": 0.7214187500000001,
36
+ "llm_top_5_test_accuracy": 0.7812625,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9543812893331051,
44
+ "sae_top_1_test_accuracy": 0.7360812499999999,
45
+ "sae_top_2_test_accuracy": 0.7880250000000001,
46
+ "sae_top_5_test_accuracy": 0.8683437499999999,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9612000465393067,
65
+ "sae_top_1_test_accuracy": 0.761,
66
+ "sae_top_2_test_accuracy": 0.8454,
67
+ "sae_top_5_test_accuracy": 0.8981999999999999,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9542000532150269,
76
+ "llm_top_1_test_accuracy": 0.6686,
77
+ "llm_top_2_test_accuracy": 0.7194,
78
+ "llm_top_5_test_accuracy": 0.763,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9456000328063965,
84
+ "sae_top_1_test_accuracy": 0.6824000000000001,
85
+ "sae_top_2_test_accuracy": 0.7657999999999999,
86
+ "sae_top_5_test_accuracy": 0.8288,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9332000374794006,
95
+ "llm_top_1_test_accuracy": 0.6826000000000001,
96
+ "llm_top_2_test_accuracy": 0.7456,
97
+ "llm_top_5_test_accuracy": 0.7732,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9264000296592713,
103
+ "sae_top_1_test_accuracy": 0.7498,
104
+ "sae_top_2_test_accuracy": 0.8088000000000001,
105
+ "sae_top_5_test_accuracy": 0.8632,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9140000343322754,
114
+ "llm_top_1_test_accuracy": 0.6006,
115
+ "llm_top_2_test_accuracy": 0.6432,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9126000285148621,
122
+ "sae_top_1_test_accuracy": 0.72,
123
+ "sae_top_2_test_accuracy": 0.7510000000000001,
124
+ "sae_top_5_test_accuracy": 0.8051999999999999,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9705000519752502,
141
+ "sae_top_1_test_accuracy": 0.773,
142
+ "sae_top_2_test_accuracy": 0.818,
143
+ "sae_top_5_test_accuracy": 0.93,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9708000421524048,
152
+ "llm_top_1_test_accuracy": 0.6612,
153
+ "llm_top_2_test_accuracy": 0.6961999999999999,
154
+ "llm_top_5_test_accuracy": 0.7626,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9720000505447388,
160
+ "sae_top_1_test_accuracy": 0.6352,
161
+ "sae_top_2_test_accuracy": 0.6584,
162
+ "sae_top_5_test_accuracy": 0.8277999999999999,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9512500613927841,
171
+ "llm_top_1_test_accuracy": 0.6367499999999999,
172
+ "llm_top_2_test_accuracy": 0.76075,
173
+ "llm_top_5_test_accuracy": 0.8255,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9487500488758087,
179
+ "sae_top_1_test_accuracy": 0.68725,
180
+ "sae_top_2_test_accuracy": 0.6950000000000001,
181
+ "sae_top_5_test_accuracy": 0.7987500000000001,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000434875489,
190
+ "llm_top_1_test_accuracy": 0.6411999999999999,
191
+ "llm_top_2_test_accuracy": 0.7868,
192
+ "llm_top_5_test_accuracy": 0.9,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9980000257492065,
198
+ "sae_top_1_test_accuracy": 0.8800000000000001,
199
+ "sae_top_2_test_accuracy": 0.9617999999999999,
200
+ "sae_top_5_test_accuracy": 0.9948,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_1",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 16384,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "topk",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9380000233650208,
240
+ "1": 0.9620000720024109,
241
+ "2": 0.9490000605583191,
242
+ "6": 0.9820000529289246,
243
+ "9": 0.9750000238418579
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.562,
275
+ "1": 0.629,
276
+ "2": 0.86,
277
+ "6": 0.83,
278
+ "9": 0.924
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.64,
282
+ "1": 0.818,
283
+ "2": 0.869,
284
+ "6": 0.98,
285
+ "9": 0.92
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.844,
289
+ "1": 0.836,
290
+ "2": 0.883,
291
+ "6": 0.979,
292
+ "9": 0.949
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9470000267028809,
298
+ "13": 0.9510000348091125,
299
+ "14": 0.9510000348091125,
300
+ "18": 0.9220000505447388,
301
+ "19": 0.9570000171661377
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.968000054359436,
305
+ "13": 0.9500000476837158,
306
+ "14": 0.956000030040741,
307
+ "18": 0.9350000619888306,
308
+ "19": 0.9620000720024109
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.545,
312
+ "13": 0.666,
313
+ "14": 0.649,
314
+ "18": 0.693,
315
+ "19": 0.79
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.692,
319
+ "13": 0.724,
320
+ "14": 0.68,
321
+ "18": 0.732,
322
+ "19": 0.769
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.793,
326
+ "13": 0.751,
327
+ "14": 0.718,
328
+ "18": 0.723,
329
+ "19": 0.83
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.583,
333
+ "13": 0.675,
334
+ "14": 0.652,
335
+ "18": 0.697,
336
+ "19": 0.805
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.749,
340
+ "13": 0.668,
341
+ "14": 0.862,
342
+ "18": 0.709,
343
+ "19": 0.841
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.951,
347
+ "13": 0.709,
348
+ "14": 0.872,
349
+ "18": 0.734,
350
+ "19": 0.878
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.956000030040741,
356
+ "21": 0.9190000295639038,
357
+ "22": 0.9080000519752502,
358
+ "25": 0.9520000219345093,
359
+ "26": 0.8970000147819519
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9580000638961792,
363
+ "21": 0.9240000247955322,
364
+ "22": 0.9200000166893005,
365
+ "25": 0.9630000591278076,
366
+ "26": 0.9010000228881836
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.696,
370
+ "21": 0.757,
371
+ "22": 0.637,
372
+ "25": 0.692,
373
+ "26": 0.631
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.818,
377
+ "21": 0.774,
378
+ "22": 0.688,
379
+ "25": 0.762,
380
+ "26": 0.686
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.862,
384
+ "21": 0.792,
385
+ "22": 0.748,
386
+ "25": 0.791,
387
+ "26": 0.673
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.897,
391
+ "21": 0.513,
392
+ "22": 0.833,
393
+ "25": 0.896,
394
+ "26": 0.61
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.904,
398
+ "21": 0.795,
399
+ "22": 0.857,
400
+ "25": 0.875,
401
+ "26": 0.613
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.923,
405
+ "21": 0.849,
406
+ "22": 0.874,
407
+ "25": 0.895,
408
+ "26": 0.775
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9430000185966492,
414
+ "2": 0.9340000152587891,
415
+ "3": 0.9100000262260437,
416
+ "5": 0.9000000357627869,
417
+ "6": 0.8760000467300415
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.940000057220459,
421
+ "2": 0.9340000152587891,
422
+ "3": 0.9200000166893005,
423
+ "5": 0.9150000214576721,
424
+ "6": 0.8610000610351562
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.662,
428
+ "2": 0.599,
429
+ "3": 0.592,
430
+ "5": 0.57,
431
+ "6": 0.58
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.748,
435
+ "2": 0.642,
436
+ "3": 0.6,
437
+ "5": 0.625,
438
+ "6": 0.601
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.764,
442
+ "2": 0.646,
443
+ "3": 0.639,
444
+ "5": 0.638,
445
+ "6": 0.677
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.841,
449
+ "2": 0.659,
450
+ "3": 0.604,
451
+ "5": 0.827,
452
+ "6": 0.669
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.879,
456
+ "2": 0.679,
457
+ "3": 0.687,
458
+ "5": 0.834,
459
+ "6": 0.676
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.915,
463
+ "2": 0.809,
464
+ "3": 0.719,
465
+ "5": 0.828,
466
+ "6": 0.755
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9700000286102295,
472
+ "5.0": 0.971000075340271
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9800000190734863,
476
+ "5.0": 0.9820000529289246
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.773,
492
+ "5.0": 0.773
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.818,
496
+ "5.0": 0.818
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.93,
500
+ "5.0": 0.93
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9580000638961792,
506
+ "Python": 0.9860000610351562,
507
+ "HTML": 0.9860000610351562,
508
+ "Java": 0.9660000205039978,
509
+ "PHP": 0.9640000462532043
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9580000638961792,
513
+ "Python": 0.9850000739097595,
514
+ "HTML": 0.9890000224113464,
515
+ "Java": 0.9660000205039978,
516
+ "PHP": 0.956000030040741
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.672,
520
+ "Python": 0.643,
521
+ "HTML": 0.788,
522
+ "Java": 0.616,
523
+ "PHP": 0.587
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.653,
527
+ "Python": 0.675,
528
+ "HTML": 0.826,
529
+ "Java": 0.685,
530
+ "PHP": 0.642
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.758,
534
+ "Python": 0.731,
535
+ "HTML": 0.898,
536
+ "Java": 0.722,
537
+ "PHP": 0.704
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.636,
541
+ "Python": 0.629,
542
+ "HTML": 0.696,
543
+ "Java": 0.615,
544
+ "PHP": 0.6
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.629,
548
+ "Python": 0.66,
549
+ "HTML": 0.735,
550
+ "Java": 0.674,
551
+ "PHP": 0.594
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.654,
555
+ "Python": 0.935,
556
+ "HTML": 0.92,
557
+ "Java": 0.703,
558
+ "PHP": 0.927
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9350000619888306,
564
+ "1": 0.9810000658035278,
565
+ "2": 0.9220000505447388,
566
+ "3": 0.9570000171661377
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.940000057220459,
570
+ "1": 0.9850000739097595,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9500000476837158
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.568,
576
+ "1": 0.671,
577
+ "2": 0.667,
578
+ "3": 0.641
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.802,
582
+ "1": 0.802,
583
+ "2": 0.701,
584
+ "3": 0.738
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.813,
588
+ "1": 0.884,
589
+ "2": 0.762,
590
+ "3": 0.843
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.804,
594
+ "1": 0.692,
595
+ "2": 0.629,
596
+ "3": 0.624
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.809,
600
+ "1": 0.685,
601
+ "2": 0.643,
602
+ "3": 0.643
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.829,
606
+ "1": 0.843,
607
+ "2": 0.767,
608
+ "3": 0.756
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 1.0,
614
+ "fr": 0.9980000257492065,
615
+ "de": 0.9980000257492065,
616
+ "es": 0.9980000257492065,
617
+ "nl": 0.9960000514984131
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 0.999000072479248,
621
+ "fr": 0.999000072479248,
622
+ "de": 1.0,
623
+ "es": 1.0,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.732,
628
+ "fr": 0.587,
629
+ "de": 0.759,
630
+ "es": 0.489,
631
+ "nl": 0.639
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.834,
635
+ "fr": 0.604,
636
+ "de": 0.84,
637
+ "es": 0.907,
638
+ "nl": 0.749
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.889,
642
+ "fr": 0.927,
643
+ "de": 0.834,
644
+ "es": 0.977,
645
+ "nl": 0.873
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.836,
649
+ "fr": 0.993,
650
+ "de": 0.924,
651
+ "es": 0.896,
652
+ "nl": 0.751
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.85,
656
+ "fr": 0.994,
657
+ "de": 0.974,
658
+ "es": 0.992,
659
+ "nl": 0.999
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 1.0,
663
+ "fr": 0.998,
664
+ "de": 0.983,
665
+ "es": 0.995,
666
+ "nl": 0.998
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "d1386cf0-fcf2-41f3-ad6f-d7d1a35e9d96",
30
+ "datetime_epoch_millis": 1740086708114,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9588312957435847,
34
+ "llm_top_1_test_accuracy": 0.6504687499999999,
35
+ "llm_top_2_test_accuracy": 0.7214187500000001,
36
+ "llm_top_5_test_accuracy": 0.7812625,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9571375425904989,
44
+ "sae_top_1_test_accuracy": 0.7459749999999999,
45
+ "sae_top_2_test_accuracy": 0.81159375,
46
+ "sae_top_5_test_accuracy": 0.87978125,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9630000352859497,
65
+ "sae_top_1_test_accuracy": 0.772,
66
+ "sae_top_2_test_accuracy": 0.8458,
67
+ "sae_top_5_test_accuracy": 0.8926000000000001,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9542000532150269,
76
+ "llm_top_1_test_accuracy": 0.6686,
77
+ "llm_top_2_test_accuracy": 0.7194,
78
+ "llm_top_5_test_accuracy": 0.763,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9518000483512878,
84
+ "sae_top_1_test_accuracy": 0.6912,
85
+ "sae_top_2_test_accuracy": 0.7636000000000001,
86
+ "sae_top_5_test_accuracy": 0.8488,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9332000374794006,
95
+ "llm_top_1_test_accuracy": 0.6826000000000001,
96
+ "llm_top_2_test_accuracy": 0.7456,
97
+ "llm_top_5_test_accuracy": 0.7732,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9284000515937805,
103
+ "sae_top_1_test_accuracy": 0.8140000000000001,
104
+ "sae_top_2_test_accuracy": 0.8192,
105
+ "sae_top_5_test_accuracy": 0.873,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9140000343322754,
114
+ "llm_top_1_test_accuracy": 0.6006,
115
+ "llm_top_2_test_accuracy": 0.6432,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9184000492095947,
122
+ "sae_top_1_test_accuracy": 0.6582,
123
+ "sae_top_2_test_accuracy": 0.7948000000000001,
124
+ "sae_top_5_test_accuracy": 0.8282,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9730000495910645,
141
+ "sae_top_1_test_accuracy": 0.847,
142
+ "sae_top_2_test_accuracy": 0.908,
143
+ "sae_top_5_test_accuracy": 0.945,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9708000421524048,
152
+ "llm_top_1_test_accuracy": 0.6612,
153
+ "llm_top_2_test_accuracy": 0.6961999999999999,
154
+ "llm_top_5_test_accuracy": 0.7626,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9724000453948974,
160
+ "sae_top_1_test_accuracy": 0.635,
161
+ "sae_top_2_test_accuracy": 0.7135999999999999,
162
+ "sae_top_5_test_accuracy": 0.8318,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9512500613927841,
171
+ "llm_top_1_test_accuracy": 0.6367499999999999,
172
+ "llm_top_2_test_accuracy": 0.76075,
173
+ "llm_top_5_test_accuracy": 0.8255,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9515000432729721,
179
+ "sae_top_1_test_accuracy": 0.6910000000000001,
180
+ "sae_top_2_test_accuracy": 0.7007500000000001,
181
+ "sae_top_5_test_accuracy": 0.8232499999999999,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000434875489,
190
+ "llm_top_1_test_accuracy": 0.6411999999999999,
191
+ "llm_top_2_test_accuracy": 0.7868,
192
+ "llm_top_5_test_accuracy": 0.9,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9986000180244445,
198
+ "sae_top_1_test_accuracy": 0.8594000000000002,
199
+ "sae_top_2_test_accuracy": 0.9470000000000001,
200
+ "sae_top_5_test_accuracy": 0.9955999999999999,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_2",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 16384,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "topk",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9460000395774841,
240
+ "1": 0.9570000171661377,
241
+ "2": 0.9480000734329224,
242
+ "6": 0.984000027179718,
243
+ "9": 0.9800000190734863
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.599,
275
+ "1": 0.631,
276
+ "2": 0.888,
277
+ "6": 0.822,
278
+ "9": 0.92
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.832,
282
+ "1": 0.623,
283
+ "2": 0.882,
284
+ "6": 0.983,
285
+ "9": 0.909
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.873,
289
+ "1": 0.766,
290
+ "2": 0.906,
291
+ "6": 0.981,
292
+ "9": 0.937
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9640000462532043,
298
+ "13": 0.9550000429153442,
299
+ "14": 0.9540000557899475,
300
+ "18": 0.9260000586509705,
301
+ "19": 0.9600000381469727
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.968000054359436,
305
+ "13": 0.9500000476837158,
306
+ "14": 0.956000030040741,
307
+ "18": 0.9350000619888306,
308
+ "19": 0.9620000720024109
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.545,
312
+ "13": 0.666,
313
+ "14": 0.649,
314
+ "18": 0.693,
315
+ "19": 0.79
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.692,
319
+ "13": 0.724,
320
+ "14": 0.68,
321
+ "18": 0.732,
322
+ "19": 0.769
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.793,
326
+ "13": 0.751,
327
+ "14": 0.718,
328
+ "18": 0.723,
329
+ "19": 0.83
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.606,
333
+ "13": 0.686,
334
+ "14": 0.642,
335
+ "18": 0.681,
336
+ "19": 0.841
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.744,
340
+ "13": 0.667,
341
+ "14": 0.874,
342
+ "18": 0.698,
343
+ "19": 0.835
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.943,
347
+ "13": 0.698,
348
+ "14": 0.875,
349
+ "18": 0.89,
350
+ "19": 0.838
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9580000638961792,
356
+ "21": 0.9250000715255737,
357
+ "22": 0.909000039100647,
358
+ "25": 0.9550000429153442,
359
+ "26": 0.8950000405311584
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9580000638961792,
363
+ "21": 0.9240000247955322,
364
+ "22": 0.9200000166893005,
365
+ "25": 0.9630000591278076,
366
+ "26": 0.9010000228881836
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.696,
370
+ "21": 0.757,
371
+ "22": 0.637,
372
+ "25": 0.692,
373
+ "26": 0.631
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.818,
377
+ "21": 0.774,
378
+ "22": 0.688,
379
+ "25": 0.762,
380
+ "26": 0.686
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.862,
384
+ "21": 0.792,
385
+ "22": 0.748,
386
+ "25": 0.791,
387
+ "26": 0.673
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.868,
391
+ "21": 0.828,
392
+ "22": 0.876,
393
+ "25": 0.884,
394
+ "26": 0.614
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.885,
398
+ "21": 0.835,
399
+ "22": 0.879,
400
+ "25": 0.88,
401
+ "26": 0.617
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.931,
405
+ "21": 0.855,
406
+ "22": 0.876,
407
+ "25": 0.907,
408
+ "26": 0.796
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9530000686645508,
414
+ "2": 0.9380000233650208,
415
+ "3": 0.9120000600814819,
416
+ "5": 0.9140000343322754,
417
+ "6": 0.8750000596046448
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.940000057220459,
421
+ "2": 0.9340000152587891,
422
+ "3": 0.9200000166893005,
423
+ "5": 0.9150000214576721,
424
+ "6": 0.8610000610351562
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.662,
428
+ "2": 0.599,
429
+ "3": 0.592,
430
+ "5": 0.57,
431
+ "6": 0.58
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.748,
435
+ "2": 0.642,
436
+ "3": 0.6,
437
+ "5": 0.625,
438
+ "6": 0.601
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.764,
442
+ "2": 0.646,
443
+ "3": 0.639,
444
+ "5": 0.638,
445
+ "6": 0.677
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.863,
449
+ "2": 0.621,
450
+ "3": 0.574,
451
+ "5": 0.523,
452
+ "6": 0.71
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.886,
456
+ "2": 0.846,
457
+ "3": 0.651,
458
+ "5": 0.859,
459
+ "6": 0.732
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.908,
463
+ "2": 0.872,
464
+ "3": 0.727,
465
+ "5": 0.862,
466
+ "6": 0.772
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9720000624656677,
472
+ "5.0": 0.9740000367164612
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9800000190734863,
476
+ "5.0": 0.9820000529289246
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.847,
492
+ "5.0": 0.847
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.908,
496
+ "5.0": 0.908
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.945,
500
+ "5.0": 0.945
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9530000686645508,
506
+ "Python": 0.9900000691413879,
507
+ "HTML": 0.9880000352859497,
508
+ "Java": 0.9700000286102295,
509
+ "PHP": 0.9610000252723694
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9580000638961792,
513
+ "Python": 0.9850000739097595,
514
+ "HTML": 0.9890000224113464,
515
+ "Java": 0.9660000205039978,
516
+ "PHP": 0.956000030040741
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.672,
520
+ "Python": 0.643,
521
+ "HTML": 0.788,
522
+ "Java": 0.616,
523
+ "PHP": 0.587
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.653,
527
+ "Python": 0.675,
528
+ "HTML": 0.826,
529
+ "Java": 0.685,
530
+ "PHP": 0.642
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.758,
534
+ "Python": 0.731,
535
+ "HTML": 0.898,
536
+ "Java": 0.722,
537
+ "PHP": 0.704
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.607,
541
+ "Python": 0.63,
542
+ "HTML": 0.694,
543
+ "Java": 0.644,
544
+ "PHP": 0.6
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.608,
548
+ "Python": 0.663,
549
+ "HTML": 0.725,
550
+ "Java": 0.655,
551
+ "PHP": 0.917
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.697,
555
+ "Python": 0.951,
556
+ "HTML": 0.905,
557
+ "Java": 0.68,
558
+ "PHP": 0.926
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.940000057220459,
564
+ "1": 0.9790000319480896,
565
+ "2": 0.9330000281333923,
566
+ "3": 0.9540000557899475
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.940000057220459,
570
+ "1": 0.9850000739097595,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9500000476837158
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.568,
576
+ "1": 0.671,
577
+ "2": 0.667,
578
+ "3": 0.641
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.802,
582
+ "1": 0.802,
583
+ "2": 0.701,
584
+ "3": 0.738
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.813,
588
+ "1": 0.884,
589
+ "2": 0.762,
590
+ "3": 0.843
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.705,
594
+ "1": 0.676,
595
+ "2": 0.743,
596
+ "3": 0.64
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.731,
600
+ "1": 0.663,
601
+ "2": 0.772,
602
+ "3": 0.637
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.818,
606
+ "1": 0.845,
607
+ "2": 0.815,
608
+ "3": 0.815
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.9970000386238098,
614
+ "fr": 1.0,
615
+ "de": 1.0,
616
+ "es": 0.9980000257492065,
617
+ "nl": 0.9980000257492065
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 0.999000072479248,
621
+ "fr": 0.999000072479248,
622
+ "de": 1.0,
623
+ "es": 1.0,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.732,
628
+ "fr": 0.587,
629
+ "de": 0.759,
630
+ "es": 0.489,
631
+ "nl": 0.639
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.834,
635
+ "fr": 0.604,
636
+ "de": 0.84,
637
+ "es": 0.907,
638
+ "nl": 0.749
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.889,
642
+ "fr": 0.927,
643
+ "de": 0.834,
644
+ "es": 0.977,
645
+ "nl": 0.873
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.656,
649
+ "fr": 0.995,
650
+ "de": 0.913,
651
+ "es": 0.99,
652
+ "nl": 0.743
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.843,
656
+ "fr": 0.995,
657
+ "de": 0.908,
658
+ "es": 0.991,
659
+ "nl": 0.998
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 1.0,
663
+ "fr": 0.993,
664
+ "de": 0.991,
665
+ "es": 0.996,
666
+ "nl": 0.998
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "7ec4dec8-43f2-4a12-a33a-0f0e1434682b",
30
+ "datetime_epoch_millis": 1740086801771,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9588312957435847,
34
+ "llm_top_1_test_accuracy": 0.6504687499999999,
35
+ "llm_top_2_test_accuracy": 0.7214187500000001,
36
+ "llm_top_5_test_accuracy": 0.7812625,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9573875416070222,
44
+ "sae_top_1_test_accuracy": 0.76879375,
45
+ "sae_top_2_test_accuracy": 0.81516875,
46
+ "sae_top_5_test_accuracy": 0.8675375000000001,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9664000511169434,
65
+ "sae_top_1_test_accuracy": 0.8071999999999999,
66
+ "sae_top_2_test_accuracy": 0.8168,
67
+ "sae_top_5_test_accuracy": 0.9034000000000001,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9542000532150269,
76
+ "llm_top_1_test_accuracy": 0.6686,
77
+ "llm_top_2_test_accuracy": 0.7194,
78
+ "llm_top_5_test_accuracy": 0.763,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9504000425338746,
84
+ "sae_top_1_test_accuracy": 0.723,
85
+ "sae_top_2_test_accuracy": 0.7712,
86
+ "sae_top_5_test_accuracy": 0.8336,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9332000374794006,
95
+ "llm_top_1_test_accuracy": 0.6826000000000001,
96
+ "llm_top_2_test_accuracy": 0.7456,
97
+ "llm_top_5_test_accuracy": 0.7732,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9298000454902648,
103
+ "sae_top_1_test_accuracy": 0.7984,
104
+ "sae_top_2_test_accuracy": 0.8112,
105
+ "sae_top_5_test_accuracy": 0.8593999999999999,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9140000343322754,
114
+ "llm_top_1_test_accuracy": 0.6006,
115
+ "llm_top_2_test_accuracy": 0.6432,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9200000524520874,
122
+ "sae_top_1_test_accuracy": 0.6971999999999999,
123
+ "sae_top_2_test_accuracy": 0.7605999999999999,
124
+ "sae_top_5_test_accuracy": 0.8104000000000001,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9740000367164612,
141
+ "sae_top_1_test_accuracy": 0.915,
142
+ "sae_top_2_test_accuracy": 0.915,
143
+ "sae_top_5_test_accuracy": 0.912,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9708000421524048,
152
+ "llm_top_1_test_accuracy": 0.6612,
153
+ "llm_top_2_test_accuracy": 0.6961999999999999,
154
+ "llm_top_5_test_accuracy": 0.7626,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9698000311851501,
160
+ "sae_top_1_test_accuracy": 0.645,
161
+ "sae_top_2_test_accuracy": 0.7304,
162
+ "sae_top_5_test_accuracy": 0.8123999999999999,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9512500613927841,
171
+ "llm_top_1_test_accuracy": 0.6367499999999999,
172
+ "llm_top_2_test_accuracy": 0.76075,
173
+ "llm_top_5_test_accuracy": 0.8255,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9495000392198563,
179
+ "sae_top_1_test_accuracy": 0.69075,
180
+ "sae_top_2_test_accuracy": 0.73375,
181
+ "sae_top_5_test_accuracy": 0.8125,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000434875489,
190
+ "llm_top_1_test_accuracy": 0.6411999999999999,
191
+ "llm_top_2_test_accuracy": 0.7868,
192
+ "llm_top_5_test_accuracy": 0.9,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9992000341415406,
198
+ "sae_top_1_test_accuracy": 0.8737999999999999,
199
+ "sae_top_2_test_accuracy": 0.9823999999999999,
200
+ "sae_top_5_test_accuracy": 0.9965999999999999,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_3",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 16384,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "topk",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9480000734329224,
240
+ "1": 0.9660000205039978,
241
+ "2": 0.9530000686645508,
242
+ "6": 0.9890000224113464,
243
+ "9": 0.9760000705718994
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.582,
275
+ "1": 0.648,
276
+ "2": 0.888,
277
+ "6": 0.982,
278
+ "9": 0.936
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.612,
282
+ "1": 0.655,
283
+ "2": 0.888,
284
+ "6": 0.979,
285
+ "9": 0.95
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.851,
289
+ "1": 0.838,
290
+ "2": 0.892,
291
+ "6": 0.978,
292
+ "9": 0.958
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9600000381469727,
298
+ "13": 0.9530000686645508,
299
+ "14": 0.9450000524520874,
300
+ "18": 0.9290000200271606,
301
+ "19": 0.9650000333786011
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.968000054359436,
305
+ "13": 0.9500000476837158,
306
+ "14": 0.956000030040741,
307
+ "18": 0.9350000619888306,
308
+ "19": 0.9620000720024109
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.545,
312
+ "13": 0.666,
313
+ "14": 0.649,
314
+ "18": 0.693,
315
+ "19": 0.79
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.692,
319
+ "13": 0.724,
320
+ "14": 0.68,
321
+ "18": 0.732,
322
+ "19": 0.769
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.793,
326
+ "13": 0.751,
327
+ "14": 0.718,
328
+ "18": 0.723,
329
+ "19": 0.83
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.73,
333
+ "13": 0.678,
334
+ "14": 0.657,
335
+ "18": 0.71,
336
+ "19": 0.84
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.739,
340
+ "13": 0.668,
341
+ "14": 0.892,
342
+ "18": 0.713,
343
+ "19": 0.844
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.951,
347
+ "13": 0.737,
348
+ "14": 0.887,
349
+ "18": 0.74,
350
+ "19": 0.853
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9600000381469727,
356
+ "21": 0.9220000505447388,
357
+ "22": 0.9160000681877136,
358
+ "25": 0.9650000333786011,
359
+ "26": 0.8860000371932983
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9580000638961792,
363
+ "21": 0.9240000247955322,
364
+ "22": 0.9200000166893005,
365
+ "25": 0.9630000591278076,
366
+ "26": 0.9010000228881836
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.696,
370
+ "21": 0.757,
371
+ "22": 0.637,
372
+ "25": 0.692,
373
+ "26": 0.631
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.818,
377
+ "21": 0.774,
378
+ "22": 0.688,
379
+ "25": 0.762,
380
+ "26": 0.686
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.862,
384
+ "21": 0.792,
385
+ "22": 0.748,
386
+ "25": 0.791,
387
+ "26": 0.673
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.885,
391
+ "21": 0.711,
392
+ "22": 0.878,
393
+ "25": 0.884,
394
+ "26": 0.634
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.902,
398
+ "21": 0.744,
399
+ "22": 0.878,
400
+ "25": 0.871,
401
+ "26": 0.661
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.931,
405
+ "21": 0.797,
406
+ "22": 0.889,
407
+ "25": 0.895,
408
+ "26": 0.785
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9480000734329224,
414
+ "2": 0.9410000443458557,
415
+ "3": 0.909000039100647,
416
+ "5": 0.9220000505447388,
417
+ "6": 0.8800000548362732
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.940000057220459,
421
+ "2": 0.9340000152587891,
422
+ "3": 0.9200000166893005,
423
+ "5": 0.9150000214576721,
424
+ "6": 0.8610000610351562
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.662,
428
+ "2": 0.599,
429
+ "3": 0.592,
430
+ "5": 0.57,
431
+ "6": 0.58
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.748,
435
+ "2": 0.642,
436
+ "3": 0.6,
437
+ "5": 0.625,
438
+ "6": 0.601
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.764,
442
+ "2": 0.646,
443
+ "3": 0.639,
444
+ "5": 0.638,
445
+ "6": 0.677
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.838,
449
+ "2": 0.849,
450
+ "3": 0.565,
451
+ "5": 0.638,
452
+ "6": 0.596
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.912,
456
+ "2": 0.866,
457
+ "3": 0.664,
458
+ "5": 0.762,
459
+ "6": 0.599
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.915,
463
+ "2": 0.877,
464
+ "3": 0.706,
465
+ "5": 0.809,
466
+ "6": 0.745
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9750000238418579,
472
+ "5.0": 0.9730000495910645
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9800000190734863,
476
+ "5.0": 0.9820000529289246
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.915,
492
+ "5.0": 0.915
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.915,
496
+ "5.0": 0.915
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.912,
500
+ "5.0": 0.912
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9610000252723694,
506
+ "Python": 0.9750000238418579,
507
+ "HTML": 0.9890000224113464,
508
+ "Java": 0.9640000462532043,
509
+ "PHP": 0.9600000381469727
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9580000638961792,
513
+ "Python": 0.9850000739097595,
514
+ "HTML": 0.9890000224113464,
515
+ "Java": 0.9660000205039978,
516
+ "PHP": 0.956000030040741
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.672,
520
+ "Python": 0.643,
521
+ "HTML": 0.788,
522
+ "Java": 0.616,
523
+ "PHP": 0.587
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.653,
527
+ "Python": 0.675,
528
+ "HTML": 0.826,
529
+ "Java": 0.685,
530
+ "PHP": 0.642
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.758,
534
+ "Python": 0.731,
535
+ "HTML": 0.898,
536
+ "Java": 0.722,
537
+ "PHP": 0.704
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.634,
541
+ "Python": 0.637,
542
+ "HTML": 0.712,
543
+ "Java": 0.653,
544
+ "PHP": 0.589
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.674,
548
+ "Python": 0.635,
549
+ "HTML": 0.779,
550
+ "Java": 0.644,
551
+ "PHP": 0.92
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.683,
555
+ "Python": 0.955,
556
+ "HTML": 0.84,
557
+ "Java": 0.657,
558
+ "PHP": 0.927
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9340000152587891,
564
+ "1": 0.987000048160553,
565
+ "2": 0.9290000200271606,
566
+ "3": 0.9480000734329224
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.940000057220459,
570
+ "1": 0.9850000739097595,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9500000476837158
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.568,
576
+ "1": 0.671,
577
+ "2": 0.667,
578
+ "3": 0.641
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.802,
582
+ "1": 0.802,
583
+ "2": 0.701,
584
+ "3": 0.738
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.813,
588
+ "1": 0.884,
589
+ "2": 0.762,
590
+ "3": 0.843
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.708,
594
+ "1": 0.648,
595
+ "2": 0.705,
596
+ "3": 0.702
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.761,
600
+ "1": 0.709,
601
+ "2": 0.744,
602
+ "3": 0.721
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.855,
606
+ "1": 0.735,
607
+ "2": 0.842,
608
+ "3": 0.818
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.9980000257492065,
614
+ "fr": 1.0,
615
+ "de": 1.0,
616
+ "es": 0.999000072479248,
617
+ "nl": 0.999000072479248
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 0.999000072479248,
621
+ "fr": 0.999000072479248,
622
+ "de": 1.0,
623
+ "es": 1.0,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.732,
628
+ "fr": 0.587,
629
+ "de": 0.759,
630
+ "es": 0.489,
631
+ "nl": 0.639
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.834,
635
+ "fr": 0.604,
636
+ "de": 0.84,
637
+ "es": 0.907,
638
+ "nl": 0.749
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.889,
642
+ "fr": 0.927,
643
+ "de": 0.834,
644
+ "es": 0.977,
645
+ "nl": 0.873
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.874,
649
+ "fr": 0.995,
650
+ "de": 0.918,
651
+ "es": 0.875,
652
+ "nl": 0.707
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.999,
656
+ "fr": 0.996,
657
+ "de": 0.924,
658
+ "es": 0.994,
659
+ "nl": 0.999
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.997,
663
+ "fr": 0.996,
664
+ "de": 0.995,
665
+ "es": 0.998,
666
+ "nl": 0.997
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "c96ea51a-aa4a-4eac-8cd3-baaf101dffaa",
30
+ "datetime_epoch_millis": 1740086511510,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9588312957435847,
34
+ "llm_top_1_test_accuracy": 0.6504687499999999,
35
+ "llm_top_2_test_accuracy": 0.7214187500000001,
36
+ "llm_top_5_test_accuracy": 0.7812625,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9594812966883182,
44
+ "sae_top_1_test_accuracy": 0.7436875000000001,
45
+ "sae_top_2_test_accuracy": 0.78871875,
46
+ "sae_top_5_test_accuracy": 0.8602875000000001,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9676000356674195,
65
+ "sae_top_1_test_accuracy": 0.7188,
66
+ "sae_top_2_test_accuracy": 0.784,
67
+ "sae_top_5_test_accuracy": 0.8559999999999999,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9542000532150269,
76
+ "llm_top_1_test_accuracy": 0.6686,
77
+ "llm_top_2_test_accuracy": 0.7194,
78
+ "llm_top_5_test_accuracy": 0.763,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9544000506401062,
84
+ "sae_top_1_test_accuracy": 0.6948000000000001,
85
+ "sae_top_2_test_accuracy": 0.74,
86
+ "sae_top_5_test_accuracy": 0.8316000000000001,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9332000374794006,
95
+ "llm_top_1_test_accuracy": 0.6826000000000001,
96
+ "llm_top_2_test_accuracy": 0.7456,
97
+ "llm_top_5_test_accuracy": 0.7732,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9342000484466553,
103
+ "sae_top_1_test_accuracy": 0.7196,
104
+ "sae_top_2_test_accuracy": 0.7874,
105
+ "sae_top_5_test_accuracy": 0.8324,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9140000343322754,
114
+ "llm_top_1_test_accuracy": 0.6006,
115
+ "llm_top_2_test_accuracy": 0.6432,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9200000405311585,
122
+ "sae_top_1_test_accuracy": 0.6786,
123
+ "sae_top_2_test_accuracy": 0.6848,
124
+ "sae_top_5_test_accuracy": 0.759,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9765000641345978,
141
+ "sae_top_1_test_accuracy": 0.924,
142
+ "sae_top_2_test_accuracy": 0.924,
143
+ "sae_top_5_test_accuracy": 0.939,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9708000421524048,
152
+ "llm_top_1_test_accuracy": 0.6612,
153
+ "llm_top_2_test_accuracy": 0.6961999999999999,
154
+ "llm_top_5_test_accuracy": 0.7626,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9728000521659851,
160
+ "sae_top_1_test_accuracy": 0.6140000000000001,
161
+ "sae_top_2_test_accuracy": 0.6952,
162
+ "sae_top_5_test_accuracy": 0.8089999999999999,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9512500613927841,
171
+ "llm_top_1_test_accuracy": 0.6367499999999999,
172
+ "llm_top_2_test_accuracy": 0.76075,
173
+ "llm_top_5_test_accuracy": 0.8255,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9507500529289246,
179
+ "sae_top_1_test_accuracy": 0.7815000000000001,
180
+ "sae_top_2_test_accuracy": 0.83975,
181
+ "sae_top_5_test_accuracy": 0.8614999999999999,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000434875489,
190
+ "llm_top_1_test_accuracy": 0.6411999999999999,
191
+ "llm_top_2_test_accuracy": 0.7868,
192
+ "llm_top_5_test_accuracy": 0.9,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9996000289916992,
198
+ "sae_top_1_test_accuracy": 0.8182,
199
+ "sae_top_2_test_accuracy": 0.8546000000000001,
200
+ "sae_top_5_test_accuracy": 0.9938,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_4",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 16384,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "topk",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9510000348091125,
240
+ "1": 0.9650000333786011,
241
+ "2": 0.9540000557899475,
242
+ "6": 0.9930000305175781,
243
+ "9": 0.9750000238418579
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.602,
275
+ "1": 0.645,
276
+ "2": 0.86,
277
+ "6": 0.811,
278
+ "9": 0.676
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.634,
282
+ "1": 0.669,
283
+ "2": 0.871,
284
+ "6": 0.985,
285
+ "9": 0.761
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.746,
289
+ "1": 0.711,
290
+ "2": 0.894,
291
+ "6": 0.981,
292
+ "9": 0.948
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9670000672340393,
298
+ "13": 0.9510000348091125,
299
+ "14": 0.9530000686645508,
300
+ "18": 0.940000057220459,
301
+ "19": 0.9610000252723694
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.968000054359436,
305
+ "13": 0.9500000476837158,
306
+ "14": 0.956000030040741,
307
+ "18": 0.9350000619888306,
308
+ "19": 0.9620000720024109
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.545,
312
+ "13": 0.666,
313
+ "14": 0.649,
314
+ "18": 0.693,
315
+ "19": 0.79
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.692,
319
+ "13": 0.724,
320
+ "14": 0.68,
321
+ "18": 0.732,
322
+ "19": 0.769
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.793,
326
+ "13": 0.751,
327
+ "14": 0.718,
328
+ "18": 0.723,
329
+ "19": 0.83
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.628,
333
+ "13": 0.66,
334
+ "14": 0.651,
335
+ "18": 0.693,
336
+ "19": 0.842
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.736,
340
+ "13": 0.731,
341
+ "14": 0.698,
342
+ "18": 0.702,
343
+ "19": 0.833
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.873,
347
+ "13": 0.779,
348
+ "14": 0.895,
349
+ "18": 0.734,
350
+ "19": 0.877
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9720000624656677,
356
+ "21": 0.9320000410079956,
357
+ "22": 0.9190000295639038,
358
+ "25": 0.9580000638961792,
359
+ "26": 0.89000004529953
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9580000638961792,
363
+ "21": 0.9240000247955322,
364
+ "22": 0.9200000166893005,
365
+ "25": 0.9630000591278076,
366
+ "26": 0.9010000228881836
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.696,
370
+ "21": 0.757,
371
+ "22": 0.637,
372
+ "25": 0.692,
373
+ "26": 0.631
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.818,
377
+ "21": 0.774,
378
+ "22": 0.688,
379
+ "25": 0.762,
380
+ "26": 0.686
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.862,
384
+ "21": 0.792,
385
+ "22": 0.748,
386
+ "25": 0.791,
387
+ "26": 0.673
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.726,
391
+ "21": 0.75,
392
+ "22": 0.612,
393
+ "25": 0.888,
394
+ "26": 0.622
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.74,
398
+ "21": 0.763,
399
+ "22": 0.831,
400
+ "25": 0.904,
401
+ "26": 0.699
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.877,
405
+ "21": 0.779,
406
+ "22": 0.843,
407
+ "25": 0.887,
408
+ "26": 0.776
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9440000653266907,
414
+ "2": 0.9380000233650208,
415
+ "3": 0.9140000343322754,
416
+ "5": 0.9260000586509705,
417
+ "6": 0.878000020980835
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.940000057220459,
421
+ "2": 0.9340000152587891,
422
+ "3": 0.9200000166893005,
423
+ "5": 0.9150000214576721,
424
+ "6": 0.8610000610351562
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.662,
428
+ "2": 0.599,
429
+ "3": 0.592,
430
+ "5": 0.57,
431
+ "6": 0.58
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.748,
435
+ "2": 0.642,
436
+ "3": 0.6,
437
+ "5": 0.625,
438
+ "6": 0.601
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.764,
442
+ "2": 0.646,
443
+ "3": 0.639,
444
+ "5": 0.638,
445
+ "6": 0.677
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.862,
449
+ "2": 0.854,
450
+ "3": 0.572,
451
+ "5": 0.53,
452
+ "6": 0.575
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.851,
456
+ "2": 0.865,
457
+ "3": 0.583,
458
+ "5": 0.556,
459
+ "6": 0.569
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.855,
463
+ "2": 0.873,
464
+ "3": 0.614,
465
+ "5": 0.824,
466
+ "6": 0.629
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9770000576972961,
472
+ "5.0": 0.9760000705718994
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9800000190734863,
476
+ "5.0": 0.9820000529289246
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.924,
492
+ "5.0": 0.924
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.924,
496
+ "5.0": 0.924
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.939,
500
+ "5.0": 0.939
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9600000381469727,
506
+ "Python": 0.9820000529289246,
507
+ "HTML": 0.9910000562667847,
508
+ "Java": 0.9690000414848328,
509
+ "PHP": 0.9620000720024109
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9580000638961792,
513
+ "Python": 0.9850000739097595,
514
+ "HTML": 0.9890000224113464,
515
+ "Java": 0.9660000205039978,
516
+ "PHP": 0.956000030040741
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.672,
520
+ "Python": 0.643,
521
+ "HTML": 0.788,
522
+ "Java": 0.616,
523
+ "PHP": 0.587
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.653,
527
+ "Python": 0.675,
528
+ "HTML": 0.826,
529
+ "Java": 0.685,
530
+ "PHP": 0.642
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.758,
534
+ "Python": 0.731,
535
+ "HTML": 0.898,
536
+ "Java": 0.722,
537
+ "PHP": 0.704
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.633,
541
+ "Python": 0.636,
542
+ "HTML": 0.597,
543
+ "Java": 0.592,
544
+ "PHP": 0.612
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.656,
548
+ "Python": 0.648,
549
+ "HTML": 0.835,
550
+ "Java": 0.623,
551
+ "PHP": 0.714
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.664,
555
+ "Python": 0.956,
556
+ "HTML": 0.833,
557
+ "Java": 0.667,
558
+ "PHP": 0.925
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9350000619888306,
564
+ "1": 0.9860000610351562,
565
+ "2": 0.9320000410079956,
566
+ "3": 0.9500000476837158
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.940000057220459,
570
+ "1": 0.9850000739097595,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9500000476837158
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.568,
576
+ "1": 0.671,
577
+ "2": 0.667,
578
+ "3": 0.641
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.802,
582
+ "1": 0.802,
583
+ "2": 0.701,
584
+ "3": 0.738
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.813,
588
+ "1": 0.884,
589
+ "2": 0.762,
590
+ "3": 0.843
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.767,
594
+ "1": 0.962,
595
+ "2": 0.588,
596
+ "3": 0.809
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.789,
600
+ "1": 0.967,
601
+ "2": 0.769,
602
+ "3": 0.834
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.791,
606
+ "1": 0.968,
607
+ "2": 0.841,
608
+ "3": 0.846
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 1.0,
614
+ "fr": 1.0,
615
+ "de": 1.0,
616
+ "es": 0.999000072479248,
617
+ "nl": 0.999000072479248
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 0.999000072479248,
621
+ "fr": 0.999000072479248,
622
+ "de": 1.0,
623
+ "es": 1.0,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.732,
628
+ "fr": 0.587,
629
+ "de": 0.759,
630
+ "es": 0.489,
631
+ "nl": 0.639
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.834,
635
+ "fr": 0.604,
636
+ "de": 0.84,
637
+ "es": 0.907,
638
+ "nl": 0.749
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.889,
642
+ "fr": 0.927,
643
+ "de": 0.834,
644
+ "es": 0.977,
645
+ "nl": 0.873
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.679,
649
+ "fr": 0.991,
650
+ "de": 0.897,
651
+ "es": 0.905,
652
+ "nl": 0.619
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.778,
656
+ "fr": 0.995,
657
+ "de": 0.899,
658
+ "es": 0.915,
659
+ "nl": 0.686
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.999,
663
+ "fr": 0.998,
664
+ "de": 0.983,
665
+ "es": 0.995,
666
+ "nl": 0.994
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "4dba3019-b486-4928-ae23-7f7632c2bebe",
30
+ "datetime_epoch_millis": 1740086399366,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9588312957435847,
34
+ "llm_top_1_test_accuracy": 0.6504687499999999,
35
+ "llm_top_2_test_accuracy": 0.7214187500000001,
36
+ "llm_top_5_test_accuracy": 0.7812625,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9582375384867191,
44
+ "sae_top_1_test_accuracy": 0.76724375,
45
+ "sae_top_2_test_accuracy": 0.7940874999999998,
46
+ "sae_top_5_test_accuracy": 0.8478312500000001,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9690000414848328,
65
+ "sae_top_1_test_accuracy": 0.7638,
66
+ "sae_top_2_test_accuracy": 0.8053999999999999,
67
+ "sae_top_5_test_accuracy": 0.859,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9542000532150269,
76
+ "llm_top_1_test_accuracy": 0.6686,
77
+ "llm_top_2_test_accuracy": 0.7194,
78
+ "llm_top_5_test_accuracy": 0.763,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9508000373840332,
84
+ "sae_top_1_test_accuracy": 0.7435999999999999,
85
+ "sae_top_2_test_accuracy": 0.7455999999999999,
86
+ "sae_top_5_test_accuracy": 0.8374,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9332000374794006,
95
+ "llm_top_1_test_accuracy": 0.6826000000000001,
96
+ "llm_top_2_test_accuracy": 0.7456,
97
+ "llm_top_5_test_accuracy": 0.7732,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9286000251770019,
103
+ "sae_top_1_test_accuracy": 0.6911999999999999,
104
+ "sae_top_2_test_accuracy": 0.7295999999999999,
105
+ "sae_top_5_test_accuracy": 0.8076000000000001,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9140000343322754,
114
+ "llm_top_1_test_accuracy": 0.6006,
115
+ "llm_top_2_test_accuracy": 0.6432,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9166000366210938,
122
+ "sae_top_1_test_accuracy": 0.7298,
123
+ "sae_top_2_test_accuracy": 0.7394,
124
+ "sae_top_5_test_accuracy": 0.8263999999999999,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9775000512599945,
141
+ "sae_top_1_test_accuracy": 0.909,
142
+ "sae_top_2_test_accuracy": 0.909,
143
+ "sae_top_5_test_accuracy": 0.916,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9708000421524048,
152
+ "llm_top_1_test_accuracy": 0.6612,
153
+ "llm_top_2_test_accuracy": 0.6961999999999999,
154
+ "llm_top_5_test_accuracy": 0.7626,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9698000550270081,
160
+ "sae_top_1_test_accuracy": 0.644,
161
+ "sae_top_2_test_accuracy": 0.681,
162
+ "sae_top_5_test_accuracy": 0.7186,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9512500613927841,
171
+ "llm_top_1_test_accuracy": 0.6367499999999999,
172
+ "llm_top_2_test_accuracy": 0.76075,
173
+ "llm_top_5_test_accuracy": 0.8255,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9540000557899475,
179
+ "sae_top_1_test_accuracy": 0.78075,
180
+ "sae_top_2_test_accuracy": 0.8355,
181
+ "sae_top_5_test_accuracy": 0.87725,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000434875489,
190
+ "llm_top_1_test_accuracy": 0.6411999999999999,
191
+ "llm_top_2_test_accuracy": 0.7868,
192
+ "llm_top_5_test_accuracy": 0.9,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9996000051498413,
198
+ "sae_top_1_test_accuracy": 0.8757999999999999,
199
+ "sae_top_2_test_accuracy": 0.9071999999999999,
200
+ "sae_top_5_test_accuracy": 0.9404,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_5",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 16384,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "topk",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9480000734329224,
240
+ "1": 0.9660000205039978,
241
+ "2": 0.9570000171661377,
242
+ "6": 0.9890000224113464,
243
+ "9": 0.9850000739097595
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.586,
275
+ "1": 0.709,
276
+ "2": 0.763,
277
+ "6": 0.982,
278
+ "9": 0.779
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.645,
282
+ "1": 0.698,
283
+ "2": 0.884,
284
+ "6": 0.981,
285
+ "9": 0.819
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.747,
289
+ "1": 0.726,
290
+ "2": 0.888,
291
+ "6": 0.98,
292
+ "9": 0.954
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9570000171661377,
298
+ "13": 0.9510000348091125,
299
+ "14": 0.9540000557899475,
300
+ "18": 0.9320000410079956,
301
+ "19": 0.9600000381469727
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.968000054359436,
305
+ "13": 0.9500000476837158,
306
+ "14": 0.956000030040741,
307
+ "18": 0.9350000619888306,
308
+ "19": 0.9620000720024109
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.545,
312
+ "13": 0.666,
313
+ "14": 0.649,
314
+ "18": 0.693,
315
+ "19": 0.79
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.692,
319
+ "13": 0.724,
320
+ "14": 0.68,
321
+ "18": 0.732,
322
+ "19": 0.769
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.793,
326
+ "13": 0.751,
327
+ "14": 0.718,
328
+ "18": 0.723,
329
+ "19": 0.83
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.723,
333
+ "13": 0.761,
334
+ "14": 0.696,
335
+ "18": 0.683,
336
+ "19": 0.855
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.711,
340
+ "13": 0.777,
341
+ "14": 0.682,
342
+ "18": 0.705,
343
+ "19": 0.853
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.88,
347
+ "13": 0.809,
348
+ "14": 0.812,
349
+ "18": 0.836,
350
+ "19": 0.85
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.956000030040741,
356
+ "21": 0.9150000214576721,
357
+ "22": 0.9180000424385071,
358
+ "25": 0.9570000171661377,
359
+ "26": 0.8970000147819519
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9580000638961792,
363
+ "21": 0.9240000247955322,
364
+ "22": 0.9200000166893005,
365
+ "25": 0.9630000591278076,
366
+ "26": 0.9010000228881836
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.696,
370
+ "21": 0.757,
371
+ "22": 0.637,
372
+ "25": 0.692,
373
+ "26": 0.631
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.818,
377
+ "21": 0.774,
378
+ "22": 0.688,
379
+ "25": 0.762,
380
+ "26": 0.686
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.862,
384
+ "21": 0.792,
385
+ "22": 0.748,
386
+ "25": 0.791,
387
+ "26": 0.673
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.699,
391
+ "21": 0.681,
392
+ "22": 0.686,
393
+ "25": 0.779,
394
+ "26": 0.611
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.692,
398
+ "21": 0.667,
399
+ "22": 0.683,
400
+ "25": 0.887,
401
+ "26": 0.719
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.885,
405
+ "21": 0.825,
406
+ "22": 0.717,
407
+ "25": 0.898,
408
+ "26": 0.713
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9520000219345093,
414
+ "2": 0.9320000410079956,
415
+ "3": 0.909000039100647,
416
+ "5": 0.9220000505447388,
417
+ "6": 0.8680000305175781
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.940000057220459,
421
+ "2": 0.9340000152587891,
422
+ "3": 0.9200000166893005,
423
+ "5": 0.9150000214576721,
424
+ "6": 0.8610000610351562
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.662,
428
+ "2": 0.599,
429
+ "3": 0.592,
430
+ "5": 0.57,
431
+ "6": 0.58
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.748,
435
+ "2": 0.642,
436
+ "3": 0.6,
437
+ "5": 0.625,
438
+ "6": 0.601
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.764,
442
+ "2": 0.646,
443
+ "3": 0.639,
444
+ "5": 0.638,
445
+ "6": 0.677
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.875,
449
+ "2": 0.832,
450
+ "3": 0.611,
451
+ "5": 0.567,
452
+ "6": 0.764
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.868,
456
+ "2": 0.836,
457
+ "3": 0.612,
458
+ "5": 0.622,
459
+ "6": 0.759
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.913,
463
+ "2": 0.858,
464
+ "3": 0.729,
465
+ "5": 0.876,
466
+ "6": 0.756
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9780000448226929,
472
+ "5.0": 0.9770000576972961
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9800000190734863,
476
+ "5.0": 0.9820000529289246
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.909,
492
+ "5.0": 0.909
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.909,
496
+ "5.0": 0.909
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.916,
500
+ "5.0": 0.916
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9540000557899475,
506
+ "Python": 0.9850000739097595,
507
+ "HTML": 0.987000048160553,
508
+ "Java": 0.9650000333786011,
509
+ "PHP": 0.9580000638961792
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9580000638961792,
513
+ "Python": 0.9850000739097595,
514
+ "HTML": 0.9890000224113464,
515
+ "Java": 0.9660000205039978,
516
+ "PHP": 0.956000030040741
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.672,
520
+ "Python": 0.643,
521
+ "HTML": 0.788,
522
+ "Java": 0.616,
523
+ "PHP": 0.587
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.653,
527
+ "Python": 0.675,
528
+ "HTML": 0.826,
529
+ "Java": 0.685,
530
+ "PHP": 0.642
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.758,
534
+ "Python": 0.731,
535
+ "HTML": 0.898,
536
+ "Java": 0.722,
537
+ "PHP": 0.704
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.645,
541
+ "Python": 0.637,
542
+ "HTML": 0.627,
543
+ "Java": 0.714,
544
+ "PHP": 0.597
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.64,
548
+ "Python": 0.641,
549
+ "HTML": 0.807,
550
+ "Java": 0.718,
551
+ "PHP": 0.599
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.711,
555
+ "Python": 0.617,
556
+ "HTML": 0.878,
557
+ "Java": 0.71,
558
+ "PHP": 0.677
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9390000700950623,
564
+ "1": 0.9890000224113464,
565
+ "2": 0.9300000667572021,
566
+ "3": 0.9580000638961792
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.940000057220459,
570
+ "1": 0.9850000739097595,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9500000476837158
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.568,
576
+ "1": 0.671,
577
+ "2": 0.667,
578
+ "3": 0.641
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.802,
582
+ "1": 0.802,
583
+ "2": 0.701,
584
+ "3": 0.738
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.813,
588
+ "1": 0.884,
589
+ "2": 0.762,
590
+ "3": 0.843
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.74,
594
+ "1": 0.98,
595
+ "2": 0.659,
596
+ "3": 0.744
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.807,
600
+ "1": 0.979,
601
+ "2": 0.736,
602
+ "3": 0.82
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.841,
606
+ "1": 0.977,
607
+ "2": 0.817,
608
+ "3": 0.874
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 1.0,
614
+ "fr": 1.0,
615
+ "de": 1.0,
616
+ "es": 1.0,
617
+ "nl": 0.9980000257492065
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 0.999000072479248,
621
+ "fr": 0.999000072479248,
622
+ "de": 1.0,
623
+ "es": 1.0,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.732,
628
+ "fr": 0.587,
629
+ "de": 0.759,
630
+ "es": 0.489,
631
+ "nl": 0.639
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.834,
635
+ "fr": 0.604,
636
+ "de": 0.84,
637
+ "es": 0.907,
638
+ "nl": 0.749
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.889,
642
+ "fr": 0.927,
643
+ "de": 0.834,
644
+ "es": 0.977,
645
+ "nl": 0.873
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.999,
649
+ "fr": 0.995,
650
+ "de": 0.882,
651
+ "es": 0.89,
652
+ "nl": 0.613
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 1.0,
656
+ "fr": 0.995,
657
+ "de": 0.886,
658
+ "es": 0.953,
659
+ "nl": 0.702
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 1.0,
663
+ "fr": 0.995,
664
+ "de": 0.9,
665
+ "es": 0.963,
666
+ "nl": 0.844
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "e8be90f1-dcda-4232-ae3c-3804b7f0f035",
30
+ "datetime_epoch_millis": 1740126031553,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9571125406771899,
34
+ "llm_top_1_test_accuracy": 0.6527562499999999,
35
+ "llm_top_2_test_accuracy": 0.7210875,
36
+ "llm_top_5_test_accuracy": 0.7801125,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9536375492811203,
44
+ "sae_top_1_test_accuracy": 0.74093125,
45
+ "sae_top_2_test_accuracy": 0.789025,
46
+ "sae_top_5_test_accuracy": 0.850925,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9600000500679016,
65
+ "sae_top_1_test_accuracy": 0.764,
66
+ "sae_top_2_test_accuracy": 0.7848,
67
+ "sae_top_5_test_accuracy": 0.8678000000000001,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9502000451087952,
76
+ "llm_top_1_test_accuracy": 0.6718,
77
+ "llm_top_2_test_accuracy": 0.7230000000000001,
78
+ "llm_top_5_test_accuracy": 0.7615999999999999,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9488000512123108,
84
+ "sae_top_1_test_accuracy": 0.7184000000000001,
85
+ "sae_top_2_test_accuracy": 0.7484,
86
+ "sae_top_5_test_accuracy": 0.8374,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9292000293731689,
95
+ "llm_top_1_test_accuracy": 0.687,
96
+ "llm_top_2_test_accuracy": 0.7306000000000001,
97
+ "llm_top_5_test_accuracy": 0.7644,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9272000432014466,
103
+ "sae_top_1_test_accuracy": 0.7466000000000002,
104
+ "sae_top_2_test_accuracy": 0.7849999999999999,
105
+ "sae_top_5_test_accuracy": 0.8164,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9116000413894654,
114
+ "llm_top_1_test_accuracy": 0.6076,
115
+ "llm_top_2_test_accuracy": 0.6492,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9074000358581543,
122
+ "sae_top_1_test_accuracy": 0.6859999999999999,
123
+ "sae_top_2_test_accuracy": 0.758,
124
+ "sae_top_5_test_accuracy": 0.7934,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9675000607967377,
141
+ "sae_top_1_test_accuracy": 0.886,
142
+ "sae_top_2_test_accuracy": 0.895,
143
+ "sae_top_5_test_accuracy": 0.939,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9672000527381897,
152
+ "llm_top_1_test_accuracy": 0.6634,
153
+ "llm_top_2_test_accuracy": 0.6894,
154
+ "llm_top_5_test_accuracy": 0.7562,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9694000482559204,
160
+ "sae_top_1_test_accuracy": 0.6424,
161
+ "sae_top_2_test_accuracy": 0.7304,
162
+ "sae_top_5_test_accuracy": 0.7984,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9515000432729721,
171
+ "llm_top_1_test_accuracy": 0.63925,
172
+ "llm_top_2_test_accuracy": 0.7785,
173
+ "llm_top_5_test_accuracy": 0.8225,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9520000517368317,
179
+ "sae_top_1_test_accuracy": 0.6192500000000001,
180
+ "sae_top_2_test_accuracy": 0.6609999999999999,
181
+ "sae_top_5_test_accuracy": 0.772,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000196456909,
190
+ "llm_top_1_test_accuracy": 0.6401999999999999,
191
+ "llm_top_2_test_accuracy": 0.7786000000000001,
192
+ "llm_top_5_test_accuracy": 0.9103999999999999,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9968000531196595,
198
+ "sae_top_1_test_accuracy": 0.8648000000000001,
199
+ "sae_top_2_test_accuracy": 0.9495999999999999,
200
+ "sae_top_5_test_accuracy": 0.983,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_0",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 65536,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "topk",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9390000700950623,
240
+ "1": 0.9570000171661377,
241
+ "2": 0.9420000314712524,
242
+ "6": 0.9860000610351562,
243
+ "9": 0.9760000705718994
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.591,
275
+ "1": 0.648,
276
+ "2": 0.836,
277
+ "6": 0.975,
278
+ "9": 0.77
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.587,
282
+ "1": 0.7,
283
+ "2": 0.85,
284
+ "6": 0.975,
285
+ "9": 0.812
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.869,
289
+ "1": 0.789,
290
+ "2": 0.869,
291
+ "6": 0.973,
292
+ "9": 0.839
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9630000591278076,
298
+ "13": 0.9480000734329224,
299
+ "14": 0.9480000734329224,
300
+ "18": 0.9280000329017639,
301
+ "19": 0.9570000171661377
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.9550000429153442,
305
+ "13": 0.9550000429153442,
306
+ "14": 0.9550000429153442,
307
+ "18": 0.9330000281333923,
308
+ "19": 0.9530000686645508
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.557,
312
+ "13": 0.673,
313
+ "14": 0.645,
314
+ "18": 0.697,
315
+ "19": 0.787
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.705,
319
+ "13": 0.718,
320
+ "14": 0.679,
321
+ "18": 0.73,
322
+ "19": 0.783
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.794,
326
+ "13": 0.744,
327
+ "14": 0.724,
328
+ "18": 0.713,
329
+ "19": 0.833
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.724,
333
+ "13": 0.679,
334
+ "14": 0.64,
335
+ "18": 0.7,
336
+ "19": 0.849
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.753,
340
+ "13": 0.676,
341
+ "14": 0.736,
342
+ "18": 0.71,
343
+ "19": 0.867
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.824,
347
+ "13": 0.677,
348
+ "14": 0.87,
349
+ "18": 0.918,
350
+ "19": 0.898
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9520000219345093,
356
+ "21": 0.9250000715255737,
357
+ "22": 0.9100000262260437,
358
+ "25": 0.9530000686645508,
359
+ "26": 0.8960000276565552
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9570000171661377,
363
+ "21": 0.9150000214576721,
364
+ "22": 0.9230000376701355,
365
+ "25": 0.9610000252723694,
366
+ "26": 0.89000004529953
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.716,
370
+ "21": 0.761,
371
+ "22": 0.648,
372
+ "25": 0.692,
373
+ "26": 0.618
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.805,
377
+ "21": 0.762,
378
+ "22": 0.649,
379
+ "25": 0.766,
380
+ "26": 0.671
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.875,
384
+ "21": 0.783,
385
+ "22": 0.711,
386
+ "25": 0.782,
387
+ "26": 0.671
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.764,
391
+ "21": 0.622,
392
+ "22": 0.862,
393
+ "25": 0.862,
394
+ "26": 0.623
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.9,
398
+ "21": 0.616,
399
+ "22": 0.857,
400
+ "25": 0.86,
401
+ "26": 0.692
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.915,
405
+ "21": 0.726,
406
+ "22": 0.848,
407
+ "25": 0.886,
408
+ "26": 0.707
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9410000443458557,
414
+ "2": 0.9290000200271606,
415
+ "3": 0.9030000567436218,
416
+ "5": 0.9050000309944153,
417
+ "6": 0.859000027179718
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.9460000395774841,
421
+ "2": 0.9330000281333923,
422
+ "3": 0.9130000472068787,
423
+ "5": 0.9160000681877136,
424
+ "6": 0.8500000238418579
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.674,
428
+ "2": 0.587,
429
+ "3": 0.601,
430
+ "5": 0.583,
431
+ "6": 0.593
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.737,
435
+ "2": 0.632,
436
+ "3": 0.605,
437
+ "5": 0.634,
438
+ "6": 0.638
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.763,
442
+ "2": 0.626,
443
+ "3": 0.63,
444
+ "5": 0.656,
445
+ "6": 0.689
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.763,
449
+ "2": 0.818,
450
+ "3": 0.638,
451
+ "5": 0.576,
452
+ "6": 0.635
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.794,
456
+ "2": 0.817,
457
+ "3": 0.681,
458
+ "5": 0.793,
459
+ "6": 0.705
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.873,
463
+ "2": 0.876,
464
+ "3": 0.709,
465
+ "5": 0.776,
466
+ "6": 0.733
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9670000672340393,
472
+ "5.0": 0.968000054359436
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9820000529289246,
476
+ "5.0": 0.9800000190734863
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.886,
492
+ "5.0": 0.886
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.895,
496
+ "5.0": 0.895
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.939,
500
+ "5.0": 0.939
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9550000429153442,
506
+ "Python": 0.987000048160553,
507
+ "HTML": 0.9890000224113464,
508
+ "Java": 0.9580000638961792,
509
+ "PHP": 0.9580000638961792
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9450000524520874,
513
+ "Python": 0.9890000224113464,
514
+ "HTML": 0.987000048160553,
515
+ "Java": 0.9620000720024109,
516
+ "PHP": 0.9530000686645508
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.669,
520
+ "Python": 0.638,
521
+ "HTML": 0.788,
522
+ "Java": 0.621,
523
+ "PHP": 0.601
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.656,
527
+ "Python": 0.671,
528
+ "HTML": 0.811,
529
+ "Java": 0.678,
530
+ "PHP": 0.631
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.744,
534
+ "Python": 0.735,
535
+ "HTML": 0.904,
536
+ "Java": 0.726,
537
+ "PHP": 0.672
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.54,
541
+ "Python": 0.623,
542
+ "HTML": 0.815,
543
+ "Java": 0.64,
544
+ "PHP": 0.594
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.629,
548
+ "Python": 0.657,
549
+ "HTML": 0.821,
550
+ "Java": 0.644,
551
+ "PHP": 0.901
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.655,
555
+ "Python": 0.786,
556
+ "HTML": 0.939,
557
+ "Java": 0.709,
558
+ "PHP": 0.903
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9440000653266907,
564
+ "1": 0.9770000576972961,
565
+ "2": 0.9260000586509705,
566
+ "3": 0.9610000252723694
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.937000036239624,
570
+ "1": 0.987000048160553,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9520000219345093
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.586,
576
+ "1": 0.658,
577
+ "2": 0.669,
578
+ "3": 0.644
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.809,
582
+ "1": 0.8,
583
+ "2": 0.691,
584
+ "3": 0.814
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.827,
588
+ "1": 0.877,
589
+ "2": 0.748,
590
+ "3": 0.838
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.667,
594
+ "1": 0.678,
595
+ "2": 0.517,
596
+ "3": 0.615
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.736,
600
+ "1": 0.691,
601
+ "2": 0.583,
602
+ "3": 0.634
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.784,
606
+ "1": 0.8,
607
+ "2": 0.673,
608
+ "3": 0.831
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.9970000386238098,
614
+ "fr": 0.999000072479248,
615
+ "de": 0.9950000643730164,
616
+ "es": 0.9980000257492065,
617
+ "nl": 0.9950000643730164
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 1.0,
621
+ "fr": 1.0,
622
+ "de": 1.0,
623
+ "es": 0.9980000257492065,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.742,
628
+ "fr": 0.575,
629
+ "de": 0.737,
630
+ "es": 0.504,
631
+ "nl": 0.643
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.822,
635
+ "fr": 0.593,
636
+ "de": 0.83,
637
+ "es": 0.905,
638
+ "nl": 0.743
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.898,
642
+ "fr": 0.908,
643
+ "de": 0.908,
644
+ "es": 0.982,
645
+ "nl": 0.856
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.846,
649
+ "fr": 0.989,
650
+ "de": 0.905,
651
+ "es": 0.833,
652
+ "nl": 0.751
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.946,
656
+ "fr": 0.992,
657
+ "de": 0.925,
658
+ "es": 0.889,
659
+ "nl": 0.996
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 1.0,
663
+ "fr": 0.995,
664
+ "de": 0.936,
665
+ "es": 0.989,
666
+ "nl": 0.995
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "4aa6f584-e8d2-44f2-8192-3f18c7bc0505",
30
+ "datetime_epoch_millis": 1740125622667,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9571125406771899,
34
+ "llm_top_1_test_accuracy": 0.6527562499999999,
35
+ "llm_top_2_test_accuracy": 0.7210875,
36
+ "llm_top_5_test_accuracy": 0.7801125,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9543625459074975,
44
+ "sae_top_1_test_accuracy": 0.737125,
45
+ "sae_top_2_test_accuracy": 0.78683125,
46
+ "sae_top_5_test_accuracy": 0.85568125,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9600000381469727,
65
+ "sae_top_1_test_accuracy": 0.6628000000000001,
66
+ "sae_top_2_test_accuracy": 0.7802,
67
+ "sae_top_5_test_accuracy": 0.9032,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9502000451087952,
76
+ "llm_top_1_test_accuracy": 0.6718,
77
+ "llm_top_2_test_accuracy": 0.7230000000000001,
78
+ "llm_top_5_test_accuracy": 0.7615999999999999,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9474000453948974,
84
+ "sae_top_1_test_accuracy": 0.6812,
85
+ "sae_top_2_test_accuracy": 0.7704,
86
+ "sae_top_5_test_accuracy": 0.8158,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9292000293731689,
95
+ "llm_top_1_test_accuracy": 0.687,
96
+ "llm_top_2_test_accuracy": 0.7306000000000001,
97
+ "llm_top_5_test_accuracy": 0.7644,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9260000467300415,
103
+ "sae_top_1_test_accuracy": 0.7666,
104
+ "sae_top_2_test_accuracy": 0.8024000000000001,
105
+ "sae_top_5_test_accuracy": 0.8436,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9116000413894654,
114
+ "llm_top_1_test_accuracy": 0.6076,
115
+ "llm_top_2_test_accuracy": 0.6492,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9126000523567199,
122
+ "sae_top_1_test_accuracy": 0.6842,
123
+ "sae_top_2_test_accuracy": 0.7218,
124
+ "sae_top_5_test_accuracy": 0.8238,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9705000519752502,
141
+ "sae_top_1_test_accuracy": 0.911,
142
+ "sae_top_2_test_accuracy": 0.938,
143
+ "sae_top_5_test_accuracy": 0.934,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9672000527381897,
152
+ "llm_top_1_test_accuracy": 0.6634,
153
+ "llm_top_2_test_accuracy": 0.6894,
154
+ "llm_top_5_test_accuracy": 0.7562,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9682000398635864,
160
+ "sae_top_1_test_accuracy": 0.6384000000000001,
161
+ "sae_top_2_test_accuracy": 0.656,
162
+ "sae_top_5_test_accuracy": 0.7674,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9515000432729721,
171
+ "llm_top_1_test_accuracy": 0.63925,
172
+ "llm_top_2_test_accuracy": 0.7785,
173
+ "llm_top_5_test_accuracy": 0.8225,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9510000348091125,
179
+ "sae_top_1_test_accuracy": 0.655,
180
+ "sae_top_2_test_accuracy": 0.67025,
181
+ "sae_top_5_test_accuracy": 0.77025,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000196456909,
190
+ "llm_top_1_test_accuracy": 0.6401999999999999,
191
+ "llm_top_2_test_accuracy": 0.7786000000000001,
192
+ "llm_top_5_test_accuracy": 0.9103999999999999,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9992000579833984,
198
+ "sae_top_1_test_accuracy": 0.8977999999999999,
199
+ "sae_top_2_test_accuracy": 0.9555999999999999,
200
+ "sae_top_5_test_accuracy": 0.9874,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_1",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 65536,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "topk",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9430000185966492,
240
+ "1": 0.9580000638961792,
241
+ "2": 0.9450000524520874,
242
+ "6": 0.9880000352859497,
243
+ "9": 0.9660000205039978
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.578,
275
+ "1": 0.622,
276
+ "2": 0.738,
277
+ "6": 0.823,
278
+ "9": 0.553
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.609,
282
+ "1": 0.671,
283
+ "2": 0.911,
284
+ "6": 0.974,
285
+ "9": 0.736
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.869,
289
+ "1": 0.83,
290
+ "2": 0.92,
291
+ "6": 0.978,
292
+ "9": 0.919
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9590000510215759,
298
+ "13": 0.9530000686645508,
299
+ "14": 0.9430000185966492,
300
+ "18": 0.9190000295639038,
301
+ "19": 0.9630000591278076
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.9550000429153442,
305
+ "13": 0.9550000429153442,
306
+ "14": 0.9550000429153442,
307
+ "18": 0.9330000281333923,
308
+ "19": 0.9530000686645508
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.557,
312
+ "13": 0.673,
313
+ "14": 0.645,
314
+ "18": 0.697,
315
+ "19": 0.787
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.705,
319
+ "13": 0.718,
320
+ "14": 0.679,
321
+ "18": 0.73,
322
+ "19": 0.783
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.794,
326
+ "13": 0.744,
327
+ "14": 0.724,
328
+ "18": 0.713,
329
+ "19": 0.833
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.592,
333
+ "13": 0.679,
334
+ "14": 0.636,
335
+ "18": 0.689,
336
+ "19": 0.81
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.755,
340
+ "13": 0.671,
341
+ "14": 0.875,
342
+ "18": 0.715,
343
+ "19": 0.836
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.865,
347
+ "13": 0.705,
348
+ "14": 0.889,
349
+ "18": 0.725,
350
+ "19": 0.895
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9540000557899475,
356
+ "21": 0.9180000424385071,
357
+ "22": 0.9100000262260437,
358
+ "25": 0.9590000510215759,
359
+ "26": 0.8890000581741333
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9570000171661377,
363
+ "21": 0.9150000214576721,
364
+ "22": 0.9230000376701355,
365
+ "25": 0.9610000252723694,
366
+ "26": 0.89000004529953
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.716,
370
+ "21": 0.761,
371
+ "22": 0.648,
372
+ "25": 0.692,
373
+ "26": 0.618
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.805,
377
+ "21": 0.762,
378
+ "22": 0.649,
379
+ "25": 0.766,
380
+ "26": 0.671
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.875,
384
+ "21": 0.783,
385
+ "22": 0.711,
386
+ "25": 0.782,
387
+ "26": 0.671
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.896,
391
+ "21": 0.739,
392
+ "22": 0.859,
393
+ "25": 0.718,
394
+ "26": 0.621
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.905,
398
+ "21": 0.743,
399
+ "22": 0.866,
400
+ "25": 0.869,
401
+ "26": 0.629
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.917,
405
+ "21": 0.815,
406
+ "22": 0.847,
407
+ "25": 0.903,
408
+ "26": 0.736
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9390000700950623,
414
+ "2": 0.9260000586509705,
415
+ "3": 0.9150000214576721,
416
+ "5": 0.9180000424385071,
417
+ "6": 0.8650000691413879
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.9460000395774841,
421
+ "2": 0.9330000281333923,
422
+ "3": 0.9130000472068787,
423
+ "5": 0.9160000681877136,
424
+ "6": 0.8500000238418579
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.674,
428
+ "2": 0.587,
429
+ "3": 0.601,
430
+ "5": 0.583,
431
+ "6": 0.593
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.737,
435
+ "2": 0.632,
436
+ "3": 0.605,
437
+ "5": 0.634,
438
+ "6": 0.638
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.763,
442
+ "2": 0.626,
443
+ "3": 0.63,
444
+ "5": 0.656,
445
+ "6": 0.689
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.849,
449
+ "2": 0.832,
450
+ "3": 0.608,
451
+ "5": 0.528,
452
+ "6": 0.604
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.87,
456
+ "2": 0.826,
457
+ "3": 0.677,
458
+ "5": 0.578,
459
+ "6": 0.658
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.883,
463
+ "2": 0.882,
464
+ "3": 0.79,
465
+ "5": 0.838,
466
+ "6": 0.726
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9720000624656677,
472
+ "5.0": 0.9690000414848328
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9820000529289246,
476
+ "5.0": 0.9800000190734863
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.911,
492
+ "5.0": 0.911
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.938,
496
+ "5.0": 0.938
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.934,
500
+ "5.0": 0.934
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9570000171661377,
506
+ "Python": 0.9810000658035278,
507
+ "HTML": 0.9880000352859497,
508
+ "Java": 0.9650000333786011,
509
+ "PHP": 0.9500000476837158
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9450000524520874,
513
+ "Python": 0.9890000224113464,
514
+ "HTML": 0.987000048160553,
515
+ "Java": 0.9620000720024109,
516
+ "PHP": 0.9530000686645508
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.669,
520
+ "Python": 0.638,
521
+ "HTML": 0.788,
522
+ "Java": 0.621,
523
+ "PHP": 0.601
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.656,
527
+ "Python": 0.671,
528
+ "HTML": 0.811,
529
+ "Java": 0.678,
530
+ "PHP": 0.631
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.744,
534
+ "Python": 0.735,
535
+ "HTML": 0.904,
536
+ "Java": 0.726,
537
+ "PHP": 0.672
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.628,
541
+ "Python": 0.625,
542
+ "HTML": 0.702,
543
+ "Java": 0.644,
544
+ "PHP": 0.593
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.616,
548
+ "Python": 0.661,
549
+ "HTML": 0.75,
550
+ "Java": 0.663,
551
+ "PHP": 0.59
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.646,
555
+ "Python": 0.645,
556
+ "HTML": 0.944,
557
+ "Java": 0.693,
558
+ "PHP": 0.909
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9500000476837158,
564
+ "1": 0.9800000190734863,
565
+ "2": 0.9280000329017639,
566
+ "3": 0.9460000395774841
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.937000036239624,
570
+ "1": 0.987000048160553,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9520000219345093
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.586,
576
+ "1": 0.658,
577
+ "2": 0.669,
578
+ "3": 0.644
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.809,
582
+ "1": 0.8,
583
+ "2": 0.691,
584
+ "3": 0.814
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.827,
588
+ "1": 0.877,
589
+ "2": 0.748,
590
+ "3": 0.838
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.733,
594
+ "1": 0.68,
595
+ "2": 0.581,
596
+ "3": 0.626
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.747,
600
+ "1": 0.692,
601
+ "2": 0.604,
602
+ "3": 0.638
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.781,
606
+ "1": 0.798,
607
+ "2": 0.691,
608
+ "3": 0.811
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.999000072479248,
614
+ "fr": 0.999000072479248,
615
+ "de": 1.0,
616
+ "es": 0.999000072479248,
617
+ "nl": 0.999000072479248
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 1.0,
621
+ "fr": 1.0,
622
+ "de": 1.0,
623
+ "es": 0.9980000257492065,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.742,
628
+ "fr": 0.575,
629
+ "de": 0.737,
630
+ "es": 0.504,
631
+ "nl": 0.643
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.822,
635
+ "fr": 0.593,
636
+ "de": 0.83,
637
+ "es": 0.905,
638
+ "nl": 0.743
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.898,
642
+ "fr": 0.908,
643
+ "de": 0.908,
644
+ "es": 0.982,
645
+ "nl": 0.856
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.848,
649
+ "fr": 0.991,
650
+ "de": 0.93,
651
+ "es": 0.989,
652
+ "nl": 0.731
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.844,
656
+ "fr": 0.996,
657
+ "de": 0.945,
658
+ "es": 0.995,
659
+ "nl": 0.998
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.997,
663
+ "fr": 0.998,
664
+ "de": 0.95,
665
+ "es": 0.995,
666
+ "nl": 0.997
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "e2b45094-2ec8-4d29-8b63-1c7e9be96bdd",
30
+ "datetime_epoch_millis": 1740126182814,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9571125406771899,
34
+ "llm_top_1_test_accuracy": 0.6527562499999999,
35
+ "llm_top_2_test_accuracy": 0.7210875,
36
+ "llm_top_5_test_accuracy": 0.7801125,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9562625385820867,
44
+ "sae_top_1_test_accuracy": 0.7503375,
45
+ "sae_top_2_test_accuracy": 0.79488125,
46
+ "sae_top_5_test_accuracy": 0.84885,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9628000497817993,
65
+ "sae_top_1_test_accuracy": 0.777,
66
+ "sae_top_2_test_accuracy": 0.8455999999999999,
67
+ "sae_top_5_test_accuracy": 0.892,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9502000451087952,
76
+ "llm_top_1_test_accuracy": 0.6718,
77
+ "llm_top_2_test_accuracy": 0.7230000000000001,
78
+ "llm_top_5_test_accuracy": 0.7615999999999999,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9502000451087952,
84
+ "sae_top_1_test_accuracy": 0.6762,
85
+ "sae_top_2_test_accuracy": 0.7348000000000001,
86
+ "sae_top_5_test_accuracy": 0.8061999999999999,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9292000293731689,
95
+ "llm_top_1_test_accuracy": 0.687,
96
+ "llm_top_2_test_accuracy": 0.7306000000000001,
97
+ "llm_top_5_test_accuracy": 0.7644,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9298000335693359,
103
+ "sae_top_1_test_accuracy": 0.7222,
104
+ "sae_top_2_test_accuracy": 0.8029999999999999,
105
+ "sae_top_5_test_accuracy": 0.8331999999999999,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9116000413894654,
114
+ "llm_top_1_test_accuracy": 0.6076,
115
+ "llm_top_2_test_accuracy": 0.6492,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.915600037574768,
122
+ "sae_top_1_test_accuracy": 0.7064,
123
+ "sae_top_2_test_accuracy": 0.7431999999999999,
124
+ "sae_top_5_test_accuracy": 0.7992,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9735000431537628,
141
+ "sae_top_1_test_accuracy": 0.916,
142
+ "sae_top_2_test_accuracy": 0.914,
143
+ "sae_top_5_test_accuracy": 0.921,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9672000527381897,
152
+ "llm_top_1_test_accuracy": 0.6634,
153
+ "llm_top_2_test_accuracy": 0.6894,
154
+ "llm_top_5_test_accuracy": 0.7562,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9688000321388245,
160
+ "sae_top_1_test_accuracy": 0.6359999999999999,
161
+ "sae_top_2_test_accuracy": 0.6656000000000001,
162
+ "sae_top_5_test_accuracy": 0.7496,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9515000432729721,
171
+ "llm_top_1_test_accuracy": 0.63925,
172
+ "llm_top_2_test_accuracy": 0.7785,
173
+ "llm_top_5_test_accuracy": 0.8225,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9510000348091125,
179
+ "sae_top_1_test_accuracy": 0.6745000000000001,
180
+ "sae_top_2_test_accuracy": 0.69925,
181
+ "sae_top_5_test_accuracy": 0.808,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000196456909,
190
+ "llm_top_1_test_accuracy": 0.6401999999999999,
191
+ "llm_top_2_test_accuracy": 0.7786000000000001,
192
+ "llm_top_5_test_accuracy": 0.9103999999999999,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9984000325202942,
198
+ "sae_top_1_test_accuracy": 0.8943999999999999,
199
+ "sae_top_2_test_accuracy": 0.9536000000000001,
200
+ "sae_top_5_test_accuracy": 0.9816,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_2",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 65536,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "topk",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9460000395774841,
240
+ "1": 0.9640000462532043,
241
+ "2": 0.9460000395774841,
242
+ "6": 0.9860000610351562,
243
+ "9": 0.9720000624656677
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.586,
275
+ "1": 0.638,
276
+ "2": 0.899,
277
+ "6": 0.83,
278
+ "9": 0.932
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.62,
282
+ "1": 0.796,
283
+ "2": 0.899,
284
+ "6": 0.978,
285
+ "9": 0.935
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.807,
289
+ "1": 0.841,
290
+ "2": 0.906,
291
+ "6": 0.977,
292
+ "9": 0.929
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9600000381469727,
298
+ "13": 0.9530000686645508,
299
+ "14": 0.9550000429153442,
300
+ "18": 0.9270000457763672,
301
+ "19": 0.956000030040741
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.9550000429153442,
305
+ "13": 0.9550000429153442,
306
+ "14": 0.9550000429153442,
307
+ "18": 0.9330000281333923,
308
+ "19": 0.9530000686645508
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.557,
312
+ "13": 0.673,
313
+ "14": 0.645,
314
+ "18": 0.697,
315
+ "19": 0.787
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.705,
319
+ "13": 0.718,
320
+ "14": 0.679,
321
+ "18": 0.73,
322
+ "19": 0.783
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.794,
326
+ "13": 0.744,
327
+ "14": 0.724,
328
+ "18": 0.713,
329
+ "19": 0.833
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.583,
333
+ "13": 0.681,
334
+ "14": 0.638,
335
+ "18": 0.681,
336
+ "19": 0.798
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.746,
340
+ "13": 0.673,
341
+ "14": 0.734,
342
+ "18": 0.696,
343
+ "19": 0.825
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.859,
347
+ "13": 0.671,
348
+ "14": 0.887,
349
+ "18": 0.722,
350
+ "19": 0.892
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.956000030040741,
356
+ "21": 0.9300000667572021,
357
+ "22": 0.9100000262260437,
358
+ "25": 0.956000030040741,
359
+ "26": 0.8970000147819519
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9570000171661377,
363
+ "21": 0.9150000214576721,
364
+ "22": 0.9230000376701355,
365
+ "25": 0.9610000252723694,
366
+ "26": 0.89000004529953
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.716,
370
+ "21": 0.761,
371
+ "22": 0.648,
372
+ "25": 0.692,
373
+ "26": 0.618
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.805,
377
+ "21": 0.762,
378
+ "22": 0.649,
379
+ "25": 0.766,
380
+ "26": 0.671
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.875,
384
+ "21": 0.783,
385
+ "22": 0.711,
386
+ "25": 0.782,
387
+ "26": 0.671
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.893,
391
+ "21": 0.554,
392
+ "22": 0.866,
393
+ "25": 0.691,
394
+ "26": 0.607
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.891,
398
+ "21": 0.744,
399
+ "22": 0.859,
400
+ "25": 0.869,
401
+ "26": 0.652
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.914,
405
+ "21": 0.799,
406
+ "22": 0.863,
407
+ "25": 0.889,
408
+ "26": 0.701
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9470000267028809,
414
+ "2": 0.9410000443458557,
415
+ "3": 0.909000039100647,
416
+ "5": 0.9180000424385071,
417
+ "6": 0.8630000352859497
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.9460000395774841,
421
+ "2": 0.9330000281333923,
422
+ "3": 0.9130000472068787,
423
+ "5": 0.9160000681877136,
424
+ "6": 0.8500000238418579
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.674,
428
+ "2": 0.587,
429
+ "3": 0.601,
430
+ "5": 0.583,
431
+ "6": 0.593
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.737,
435
+ "2": 0.632,
436
+ "3": 0.605,
437
+ "5": 0.634,
438
+ "6": 0.638
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.763,
442
+ "2": 0.626,
443
+ "3": 0.63,
444
+ "5": 0.656,
445
+ "6": 0.689
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.81,
449
+ "2": 0.817,
450
+ "3": 0.579,
451
+ "5": 0.75,
452
+ "6": 0.576
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.873,
456
+ "2": 0.825,
457
+ "3": 0.655,
458
+ "5": 0.752,
459
+ "6": 0.611
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.906,
463
+ "2": 0.877,
464
+ "3": 0.694,
465
+ "5": 0.875,
466
+ "6": 0.644
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9740000367164612,
472
+ "5.0": 0.9730000495910645
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9820000529289246,
476
+ "5.0": 0.9800000190734863
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.916,
492
+ "5.0": 0.916
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.914,
496
+ "5.0": 0.914
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.921,
500
+ "5.0": 0.921
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9570000171661377,
506
+ "Python": 0.9800000190734863,
507
+ "HTML": 0.987000048160553,
508
+ "Java": 0.9640000462532043,
509
+ "PHP": 0.956000030040741
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9450000524520874,
513
+ "Python": 0.9890000224113464,
514
+ "HTML": 0.987000048160553,
515
+ "Java": 0.9620000720024109,
516
+ "PHP": 0.9530000686645508
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.669,
520
+ "Python": 0.638,
521
+ "HTML": 0.788,
522
+ "Java": 0.621,
523
+ "PHP": 0.601
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.656,
527
+ "Python": 0.671,
528
+ "HTML": 0.811,
529
+ "Java": 0.678,
530
+ "PHP": 0.631
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.744,
534
+ "Python": 0.735,
535
+ "HTML": 0.904,
536
+ "Java": 0.726,
537
+ "PHP": 0.672
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.618,
541
+ "Python": 0.64,
542
+ "HTML": 0.688,
543
+ "Java": 0.624,
544
+ "PHP": 0.61
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.637,
548
+ "Python": 0.631,
549
+ "HTML": 0.801,
550
+ "Java": 0.655,
551
+ "PHP": 0.604
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.664,
555
+ "Python": 0.695,
556
+ "HTML": 0.802,
557
+ "Java": 0.678,
558
+ "PHP": 0.909
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9430000185966492,
564
+ "1": 0.9830000400543213,
565
+ "2": 0.9270000457763672,
566
+ "3": 0.9510000348091125
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.937000036239624,
570
+ "1": 0.987000048160553,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9520000219345093
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.586,
576
+ "1": 0.658,
577
+ "2": 0.669,
578
+ "3": 0.644
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.809,
582
+ "1": 0.8,
583
+ "2": 0.691,
584
+ "3": 0.814
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.827,
588
+ "1": 0.877,
589
+ "2": 0.748,
590
+ "3": 0.838
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.618,
594
+ "1": 0.7,
595
+ "2": 0.773,
596
+ "3": 0.607
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.693,
600
+ "1": 0.704,
601
+ "2": 0.776,
602
+ "3": 0.624
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.815,
606
+ "1": 0.931,
607
+ "2": 0.79,
608
+ "3": 0.696
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.999000072479248,
614
+ "fr": 0.9980000257492065,
615
+ "de": 0.9980000257492065,
616
+ "es": 1.0,
617
+ "nl": 0.9970000386238098
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 1.0,
621
+ "fr": 1.0,
622
+ "de": 1.0,
623
+ "es": 0.9980000257492065,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.742,
628
+ "fr": 0.575,
629
+ "de": 0.737,
630
+ "es": 0.504,
631
+ "nl": 0.643
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.822,
635
+ "fr": 0.593,
636
+ "de": 0.83,
637
+ "es": 0.905,
638
+ "nl": 0.743
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.898,
642
+ "fr": 0.908,
643
+ "de": 0.908,
644
+ "es": 0.982,
645
+ "nl": 0.856
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.86,
649
+ "fr": 0.991,
650
+ "de": 0.933,
651
+ "es": 0.925,
652
+ "nl": 0.763
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.853,
656
+ "fr": 0.993,
657
+ "de": 0.93,
658
+ "es": 0.994,
659
+ "nl": 0.998
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.999,
663
+ "fr": 0.991,
664
+ "de": 0.926,
665
+ "es": 0.995,
666
+ "nl": 0.997
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "52fe69e1-aae7-4f2a-a60c-95a718b3be70",
30
+ "datetime_epoch_millis": 1740126318798,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9571125406771899,
34
+ "llm_top_1_test_accuracy": 0.6527562499999999,
35
+ "llm_top_2_test_accuracy": 0.7210875,
36
+ "llm_top_5_test_accuracy": 0.7801125,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9576750457286834,
44
+ "sae_top_1_test_accuracy": 0.74648125,
45
+ "sae_top_2_test_accuracy": 0.7996875,
46
+ "sae_top_5_test_accuracy": 0.8541124999999999,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9628000497817993,
65
+ "sae_top_1_test_accuracy": 0.7438,
66
+ "sae_top_2_test_accuracy": 0.8533999999999999,
67
+ "sae_top_5_test_accuracy": 0.9,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9502000451087952,
76
+ "llm_top_1_test_accuracy": 0.6718,
77
+ "llm_top_2_test_accuracy": 0.7230000000000001,
78
+ "llm_top_5_test_accuracy": 0.7615999999999999,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9506000399589538,
84
+ "sae_top_1_test_accuracy": 0.6769999999999999,
85
+ "sae_top_2_test_accuracy": 0.7562,
86
+ "sae_top_5_test_accuracy": 0.8155999999999999,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9292000293731689,
95
+ "llm_top_1_test_accuracy": 0.687,
96
+ "llm_top_2_test_accuracy": 0.7306000000000001,
97
+ "llm_top_5_test_accuracy": 0.7644,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.934600043296814,
103
+ "sae_top_1_test_accuracy": 0.7738,
104
+ "sae_top_2_test_accuracy": 0.8134,
105
+ "sae_top_5_test_accuracy": 0.8506,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9116000413894654,
114
+ "llm_top_1_test_accuracy": 0.6076,
115
+ "llm_top_2_test_accuracy": 0.6492,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9228000521659852,
122
+ "sae_top_1_test_accuracy": 0.6838,
123
+ "sae_top_2_test_accuracy": 0.76,
124
+ "sae_top_5_test_accuracy": 0.7978,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9730000495910645,
141
+ "sae_top_1_test_accuracy": 0.922,
142
+ "sae_top_2_test_accuracy": 0.92,
143
+ "sae_top_5_test_accuracy": 0.922,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9672000527381897,
152
+ "llm_top_1_test_accuracy": 0.6634,
153
+ "llm_top_2_test_accuracy": 0.6894,
154
+ "llm_top_5_test_accuracy": 0.7562,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.969200050830841,
160
+ "sae_top_1_test_accuracy": 0.6379999999999999,
161
+ "sae_top_2_test_accuracy": 0.6563999999999999,
162
+ "sae_top_5_test_accuracy": 0.7608,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9515000432729721,
171
+ "llm_top_1_test_accuracy": 0.63925,
172
+ "llm_top_2_test_accuracy": 0.7785,
173
+ "llm_top_5_test_accuracy": 0.8225,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9500000476837158,
179
+ "sae_top_1_test_accuracy": 0.66825,
180
+ "sae_top_2_test_accuracy": 0.7095,
181
+ "sae_top_5_test_accuracy": 0.7965,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000196456909,
190
+ "llm_top_1_test_accuracy": 0.6401999999999999,
191
+ "llm_top_2_test_accuracy": 0.7786000000000001,
192
+ "llm_top_5_test_accuracy": 0.9103999999999999,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9984000325202942,
198
+ "sae_top_1_test_accuracy": 0.8652000000000001,
199
+ "sae_top_2_test_accuracy": 0.9286,
200
+ "sae_top_5_test_accuracy": 0.9896,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_3",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 65536,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "topk",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9450000524520874,
240
+ "1": 0.9590000510215759,
241
+ "2": 0.9450000524520874,
242
+ "6": 0.9900000691413879,
243
+ "9": 0.9750000238418579
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.606,
275
+ "1": 0.638,
276
+ "2": 0.729,
277
+ "6": 0.823,
278
+ "9": 0.923
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.863,
282
+ "1": 0.646,
283
+ "2": 0.866,
284
+ "6": 0.962,
285
+ "9": 0.93
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.871,
289
+ "1": 0.819,
290
+ "2": 0.889,
291
+ "6": 0.985,
292
+ "9": 0.936
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9570000171661377,
298
+ "13": 0.9450000524520874,
299
+ "14": 0.9600000381469727,
300
+ "18": 0.9240000247955322,
301
+ "19": 0.9670000672340393
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.9550000429153442,
305
+ "13": 0.9550000429153442,
306
+ "14": 0.9550000429153442,
307
+ "18": 0.9330000281333923,
308
+ "19": 0.9530000686645508
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.557,
312
+ "13": 0.673,
313
+ "14": 0.645,
314
+ "18": 0.697,
315
+ "19": 0.787
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.705,
319
+ "13": 0.718,
320
+ "14": 0.679,
321
+ "18": 0.73,
322
+ "19": 0.783
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.794,
326
+ "13": 0.744,
327
+ "14": 0.724,
328
+ "18": 0.713,
329
+ "19": 0.833
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.569,
333
+ "13": 0.679,
334
+ "14": 0.641,
335
+ "18": 0.697,
336
+ "19": 0.799
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.741,
340
+ "13": 0.646,
341
+ "14": 0.86,
342
+ "18": 0.688,
343
+ "19": 0.846
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.868,
347
+ "13": 0.743,
348
+ "14": 0.875,
349
+ "18": 0.734,
350
+ "19": 0.858
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9650000333786011,
356
+ "21": 0.9340000152587891,
357
+ "22": 0.9070000648498535,
358
+ "25": 0.9650000333786011,
359
+ "26": 0.9020000696182251
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9570000171661377,
363
+ "21": 0.9150000214576721,
364
+ "22": 0.9230000376701355,
365
+ "25": 0.9610000252723694,
366
+ "26": 0.89000004529953
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.716,
370
+ "21": 0.761,
371
+ "22": 0.648,
372
+ "25": 0.692,
373
+ "26": 0.618
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.805,
377
+ "21": 0.762,
378
+ "22": 0.649,
379
+ "25": 0.766,
380
+ "26": 0.671
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.875,
384
+ "21": 0.783,
385
+ "22": 0.711,
386
+ "25": 0.782,
387
+ "26": 0.671
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.906,
391
+ "21": 0.774,
392
+ "22": 0.868,
393
+ "25": 0.701,
394
+ "26": 0.62
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.904,
398
+ "21": 0.777,
399
+ "22": 0.868,
400
+ "25": 0.882,
401
+ "26": 0.636
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.925,
405
+ "21": 0.804,
406
+ "22": 0.871,
407
+ "25": 0.883,
408
+ "26": 0.77
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9480000734329224,
414
+ "2": 0.9310000538825989,
415
+ "3": 0.9240000247955322,
416
+ "5": 0.9250000715255737,
417
+ "6": 0.8860000371932983
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.9460000395774841,
421
+ "2": 0.9330000281333923,
422
+ "3": 0.9130000472068787,
423
+ "5": 0.9160000681877136,
424
+ "6": 0.8500000238418579
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.674,
428
+ "2": 0.587,
429
+ "3": 0.601,
430
+ "5": 0.583,
431
+ "6": 0.593
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.737,
435
+ "2": 0.632,
436
+ "3": 0.605,
437
+ "5": 0.634,
438
+ "6": 0.638
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.763,
442
+ "2": 0.626,
443
+ "3": 0.63,
444
+ "5": 0.656,
445
+ "6": 0.689
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.845,
449
+ "2": 0.84,
450
+ "3": 0.583,
451
+ "5": 0.55,
452
+ "6": 0.601
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.842,
456
+ "2": 0.84,
457
+ "3": 0.676,
458
+ "5": 0.842,
459
+ "6": 0.6
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.89,
463
+ "2": 0.88,
464
+ "3": 0.67,
465
+ "5": 0.846,
466
+ "6": 0.703
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9740000367164612,
472
+ "5.0": 0.9720000624656677
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9820000529289246,
476
+ "5.0": 0.9800000190734863
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.922,
492
+ "5.0": 0.922
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.92,
496
+ "5.0": 0.92
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.922,
500
+ "5.0": 0.922
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9630000591278076,
506
+ "Python": 0.987000048160553,
507
+ "HTML": 0.9860000610351562,
508
+ "Java": 0.9520000219345093,
509
+ "PHP": 0.9580000638961792
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9450000524520874,
513
+ "Python": 0.9890000224113464,
514
+ "HTML": 0.987000048160553,
515
+ "Java": 0.9620000720024109,
516
+ "PHP": 0.9530000686645508
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.669,
520
+ "Python": 0.638,
521
+ "HTML": 0.788,
522
+ "Java": 0.621,
523
+ "PHP": 0.601
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.656,
527
+ "Python": 0.671,
528
+ "HTML": 0.811,
529
+ "Java": 0.678,
530
+ "PHP": 0.631
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.744,
534
+ "Python": 0.735,
535
+ "HTML": 0.904,
536
+ "Java": 0.726,
537
+ "PHP": 0.672
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.623,
541
+ "Python": 0.651,
542
+ "HTML": 0.695,
543
+ "Java": 0.642,
544
+ "PHP": 0.579
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.641,
548
+ "Python": 0.65,
549
+ "HTML": 0.763,
550
+ "Java": 0.647,
551
+ "PHP": 0.581
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.667,
555
+ "Python": 0.74,
556
+ "HTML": 0.805,
557
+ "Java": 0.672,
558
+ "PHP": 0.92
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9330000281333923,
564
+ "1": 0.9880000352859497,
565
+ "2": 0.9310000538825989,
566
+ "3": 0.9480000734329224
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.937000036239624,
570
+ "1": 0.987000048160553,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9520000219345093
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.586,
576
+ "1": 0.658,
577
+ "2": 0.669,
578
+ "3": 0.644
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.809,
582
+ "1": 0.8,
583
+ "2": 0.691,
584
+ "3": 0.814
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.827,
588
+ "1": 0.877,
589
+ "2": 0.748,
590
+ "3": 0.838
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.662,
594
+ "1": 0.694,
595
+ "2": 0.656,
596
+ "3": 0.661
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.706,
600
+ "1": 0.784,
601
+ "2": 0.687,
602
+ "3": 0.661
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.765,
606
+ "1": 0.795,
607
+ "2": 0.819,
608
+ "3": 0.807
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.9980000257492065,
614
+ "fr": 0.9970000386238098,
615
+ "de": 0.999000072479248,
616
+ "es": 1.0,
617
+ "nl": 0.9980000257492065
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 1.0,
621
+ "fr": 1.0,
622
+ "de": 1.0,
623
+ "es": 0.9980000257492065,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.742,
628
+ "fr": 0.575,
629
+ "de": 0.737,
630
+ "es": 0.504,
631
+ "nl": 0.643
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.822,
635
+ "fr": 0.593,
636
+ "de": 0.83,
637
+ "es": 0.905,
638
+ "nl": 0.743
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.898,
642
+ "fr": 0.908,
643
+ "de": 0.908,
644
+ "es": 0.982,
645
+ "nl": 0.856
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.837,
649
+ "fr": 0.996,
650
+ "de": 0.899,
651
+ "es": 0.87,
652
+ "nl": 0.724
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.842,
656
+ "fr": 0.995,
657
+ "de": 0.904,
658
+ "es": 0.905,
659
+ "nl": 0.997
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.997,
663
+ "fr": 0.995,
664
+ "de": 0.967,
665
+ "es": 0.991,
666
+ "nl": 0.998
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "ad2826b0-f869-4567-ade4-ccaeb1459c3e",
30
+ "datetime_epoch_millis": 1740125907677,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9571125406771899,
34
+ "llm_top_1_test_accuracy": 0.6527562499999999,
35
+ "llm_top_2_test_accuracy": 0.7210875,
36
+ "llm_top_5_test_accuracy": 0.7801125,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9573125381022692,
44
+ "sae_top_1_test_accuracy": 0.723675,
45
+ "sae_top_2_test_accuracy": 0.78325,
46
+ "sae_top_5_test_accuracy": 0.8566562499999999,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9648000359535217,
65
+ "sae_top_1_test_accuracy": 0.726,
66
+ "sae_top_2_test_accuracy": 0.7692,
67
+ "sae_top_5_test_accuracy": 0.8936,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9502000451087952,
76
+ "llm_top_1_test_accuracy": 0.6718,
77
+ "llm_top_2_test_accuracy": 0.7230000000000001,
78
+ "llm_top_5_test_accuracy": 0.7615999999999999,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9500000476837158,
84
+ "sae_top_1_test_accuracy": 0.7068000000000001,
85
+ "sae_top_2_test_accuracy": 0.7484,
86
+ "sae_top_5_test_accuracy": 0.8228,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9292000293731689,
95
+ "llm_top_1_test_accuracy": 0.687,
96
+ "llm_top_2_test_accuracy": 0.7306000000000001,
97
+ "llm_top_5_test_accuracy": 0.7644,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9308000326156616,
103
+ "sae_top_1_test_accuracy": 0.6934,
104
+ "sae_top_2_test_accuracy": 0.7333999999999999,
105
+ "sae_top_5_test_accuracy": 0.8364,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9116000413894654,
114
+ "llm_top_1_test_accuracy": 0.6076,
115
+ "llm_top_2_test_accuracy": 0.6492,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9226000428199768,
122
+ "sae_top_1_test_accuracy": 0.6284,
123
+ "sae_top_2_test_accuracy": 0.6893999999999999,
124
+ "sae_top_5_test_accuracy": 0.7689999999999999,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.976000040769577,
141
+ "sae_top_1_test_accuracy": 0.746,
142
+ "sae_top_2_test_accuracy": 0.905,
143
+ "sae_top_5_test_accuracy": 0.928,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9672000527381897,
152
+ "llm_top_1_test_accuracy": 0.6634,
153
+ "llm_top_2_test_accuracy": 0.6894,
154
+ "llm_top_5_test_accuracy": 0.7562,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9646000385284423,
160
+ "sae_top_1_test_accuracy": 0.6454000000000001,
161
+ "sae_top_2_test_accuracy": 0.6788000000000001,
162
+ "sae_top_5_test_accuracy": 0.7572,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9515000432729721,
171
+ "llm_top_1_test_accuracy": 0.63925,
172
+ "llm_top_2_test_accuracy": 0.7785,
173
+ "llm_top_5_test_accuracy": 0.8225,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9505000561475754,
179
+ "sae_top_1_test_accuracy": 0.7699999999999999,
180
+ "sae_top_2_test_accuracy": 0.8059999999999999,
181
+ "sae_top_5_test_accuracy": 0.8522500000000001,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000196456909,
190
+ "llm_top_1_test_accuracy": 0.6401999999999999,
191
+ "llm_top_2_test_accuracy": 0.7786000000000001,
192
+ "llm_top_5_test_accuracy": 0.9103999999999999,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9992000102996826,
198
+ "sae_top_1_test_accuracy": 0.8734,
199
+ "sae_top_2_test_accuracy": 0.9358000000000001,
200
+ "sae_top_5_test_accuracy": 0.994,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_4",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 65536,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "topk",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9470000267028809,
240
+ "1": 0.9610000252723694,
241
+ "2": 0.9540000557899475,
242
+ "6": 0.9880000352859497,
243
+ "9": 0.9740000367164612
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.654,
275
+ "1": 0.654,
276
+ "2": 0.737,
277
+ "6": 0.816,
278
+ "9": 0.769
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.658,
282
+ "1": 0.657,
283
+ "2": 0.761,
284
+ "6": 0.828,
285
+ "9": 0.942
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.869,
289
+ "1": 0.778,
290
+ "2": 0.895,
291
+ "6": 0.983,
292
+ "9": 0.943
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.968000054359436,
298
+ "13": 0.9460000395774841,
299
+ "14": 0.956000030040741,
300
+ "18": 0.9170000553131104,
301
+ "19": 0.9630000591278076
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.9550000429153442,
305
+ "13": 0.9550000429153442,
306
+ "14": 0.9550000429153442,
307
+ "18": 0.9330000281333923,
308
+ "19": 0.9530000686645508
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.557,
312
+ "13": 0.673,
313
+ "14": 0.645,
314
+ "18": 0.697,
315
+ "19": 0.787
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.705,
319
+ "13": 0.718,
320
+ "14": 0.679,
321
+ "18": 0.73,
322
+ "19": 0.783
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.794,
326
+ "13": 0.744,
327
+ "14": 0.724,
328
+ "18": 0.713,
329
+ "19": 0.833
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.669,
333
+ "13": 0.692,
334
+ "14": 0.661,
335
+ "18": 0.724,
336
+ "19": 0.788
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.78,
340
+ "13": 0.686,
341
+ "14": 0.722,
342
+ "18": 0.711,
343
+ "19": 0.843
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.878,
347
+ "13": 0.726,
348
+ "14": 0.9,
349
+ "18": 0.748,
350
+ "19": 0.862
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.956000030040741,
356
+ "21": 0.9330000281333923,
357
+ "22": 0.9180000424385071,
358
+ "25": 0.9550000429153442,
359
+ "26": 0.8920000195503235
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9570000171661377,
363
+ "21": 0.9150000214576721,
364
+ "22": 0.9230000376701355,
365
+ "25": 0.9610000252723694,
366
+ "26": 0.89000004529953
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.716,
370
+ "21": 0.761,
371
+ "22": 0.648,
372
+ "25": 0.692,
373
+ "26": 0.618
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.805,
377
+ "21": 0.762,
378
+ "22": 0.649,
379
+ "25": 0.766,
380
+ "26": 0.671
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.875,
384
+ "21": 0.783,
385
+ "22": 0.711,
386
+ "25": 0.782,
387
+ "26": 0.671
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.887,
391
+ "21": 0.646,
392
+ "22": 0.562,
393
+ "25": 0.692,
394
+ "26": 0.68
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.888,
398
+ "21": 0.75,
399
+ "22": 0.626,
400
+ "25": 0.713,
401
+ "26": 0.69
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.91,
405
+ "21": 0.807,
406
+ "22": 0.87,
407
+ "25": 0.881,
408
+ "26": 0.714
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9420000314712524,
414
+ "2": 0.9410000443458557,
415
+ "3": 0.9260000586509705,
416
+ "5": 0.9230000376701355,
417
+ "6": 0.8810000419616699
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.9460000395774841,
421
+ "2": 0.9330000281333923,
422
+ "3": 0.9130000472068787,
423
+ "5": 0.9160000681877136,
424
+ "6": 0.8500000238418579
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.674,
428
+ "2": 0.587,
429
+ "3": 0.601,
430
+ "5": 0.583,
431
+ "6": 0.593
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.737,
435
+ "2": 0.632,
436
+ "3": 0.605,
437
+ "5": 0.634,
438
+ "6": 0.638
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.763,
442
+ "2": 0.626,
443
+ "3": 0.63,
444
+ "5": 0.656,
445
+ "6": 0.689
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.666,
449
+ "2": 0.815,
450
+ "3": 0.577,
451
+ "5": 0.545,
452
+ "6": 0.539
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.73,
456
+ "2": 0.815,
457
+ "3": 0.59,
458
+ "5": 0.566,
459
+ "6": 0.746
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.881,
463
+ "2": 0.819,
464
+ "3": 0.724,
465
+ "5": 0.67,
466
+ "6": 0.751
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9750000238418579,
472
+ "5.0": 0.9770000576972961
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9820000529289246,
476
+ "5.0": 0.9800000190734863
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.746,
492
+ "5.0": 0.746
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.905,
496
+ "5.0": 0.905
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.928,
500
+ "5.0": 0.928
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9450000524520874,
506
+ "Python": 0.9890000224113464,
507
+ "HTML": 0.9860000610351562,
508
+ "Java": 0.9600000381469727,
509
+ "PHP": 0.9430000185966492
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9450000524520874,
513
+ "Python": 0.9890000224113464,
514
+ "HTML": 0.987000048160553,
515
+ "Java": 0.9620000720024109,
516
+ "PHP": 0.9530000686645508
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.669,
520
+ "Python": 0.638,
521
+ "HTML": 0.788,
522
+ "Java": 0.621,
523
+ "PHP": 0.601
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.656,
527
+ "Python": 0.671,
528
+ "HTML": 0.811,
529
+ "Java": 0.678,
530
+ "PHP": 0.631
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.744,
534
+ "Python": 0.735,
535
+ "HTML": 0.904,
536
+ "Java": 0.726,
537
+ "PHP": 0.672
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.624,
541
+ "Python": 0.622,
542
+ "HTML": 0.749,
543
+ "Java": 0.632,
544
+ "PHP": 0.6
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.646,
548
+ "Python": 0.673,
549
+ "HTML": 0.779,
550
+ "Java": 0.657,
551
+ "PHP": 0.639
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.668,
555
+ "Python": 0.691,
556
+ "HTML": 0.848,
557
+ "Java": 0.648,
558
+ "PHP": 0.931
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9460000395774841,
564
+ "1": 0.9810000658035278,
565
+ "2": 0.9250000715255737,
566
+ "3": 0.9500000476837158
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.937000036239624,
570
+ "1": 0.987000048160553,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9520000219345093
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.586,
576
+ "1": 0.658,
577
+ "2": 0.669,
578
+ "3": 0.644
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.809,
582
+ "1": 0.8,
583
+ "2": 0.691,
584
+ "3": 0.814
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.827,
588
+ "1": 0.877,
589
+ "2": 0.748,
590
+ "3": 0.838
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.731,
594
+ "1": 0.976,
595
+ "2": 0.662,
596
+ "3": 0.711
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.759,
600
+ "1": 0.98,
601
+ "2": 0.711,
602
+ "3": 0.774
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.781,
606
+ "1": 0.98,
607
+ "2": 0.801,
608
+ "3": 0.847
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.9980000257492065,
614
+ "fr": 1.0,
615
+ "de": 1.0,
616
+ "es": 0.9980000257492065,
617
+ "nl": 1.0
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 1.0,
621
+ "fr": 1.0,
622
+ "de": 1.0,
623
+ "es": 0.9980000257492065,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.742,
628
+ "fr": 0.575,
629
+ "de": 0.737,
630
+ "es": 0.504,
631
+ "nl": 0.643
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.822,
635
+ "fr": 0.593,
636
+ "de": 0.83,
637
+ "es": 0.905,
638
+ "nl": 0.743
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.898,
642
+ "fr": 0.908,
643
+ "de": 0.908,
644
+ "es": 0.982,
645
+ "nl": 0.856
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.846,
649
+ "fr": 0.996,
650
+ "de": 0.903,
651
+ "es": 0.902,
652
+ "nl": 0.72
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.822,
656
+ "fr": 0.995,
657
+ "de": 0.929,
658
+ "es": 0.935,
659
+ "nl": 0.998
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 1.0,
663
+ "fr": 0.994,
664
+ "de": 0.983,
665
+ "es": 0.995,
666
+ "nl": 0.998
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/sparse_probing/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "8c189893-38dd-4aa5-a42c-0afca502ffe2",
30
+ "datetime_epoch_millis": 1740125762283,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9571125406771899,
34
+ "llm_top_1_test_accuracy": 0.6527562499999999,
35
+ "llm_top_2_test_accuracy": 0.7210875,
36
+ "llm_top_5_test_accuracy": 0.7801125,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9586562950164079,
44
+ "sae_top_1_test_accuracy": 0.7742249999999999,
45
+ "sae_top_2_test_accuracy": 0.8160999999999999,
46
+ "sae_top_5_test_accuracy": 0.8598625,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.966800057888031,
57
+ "llm_top_1_test_accuracy": 0.6397999999999999,
58
+ "llm_top_2_test_accuracy": 0.6954,
59
+ "llm_top_5_test_accuracy": 0.7869999999999999,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9666000604629517,
65
+ "sae_top_1_test_accuracy": 0.8216000000000001,
66
+ "sae_top_2_test_accuracy": 0.8404,
67
+ "sae_top_5_test_accuracy": 0.8644000000000001,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9502000451087952,
76
+ "llm_top_1_test_accuracy": 0.6718,
77
+ "llm_top_2_test_accuracy": 0.7230000000000001,
78
+ "llm_top_5_test_accuracy": 0.7615999999999999,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9500000357627869,
84
+ "sae_top_1_test_accuracy": 0.7466,
85
+ "sae_top_2_test_accuracy": 0.7604,
86
+ "sae_top_5_test_accuracy": 0.7991999999999999,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9292000293731689,
95
+ "llm_top_1_test_accuracy": 0.687,
96
+ "llm_top_2_test_accuracy": 0.7306000000000001,
97
+ "llm_top_5_test_accuracy": 0.7644,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9310000538825989,
103
+ "sae_top_1_test_accuracy": 0.7194,
104
+ "sae_top_2_test_accuracy": 0.8061999999999999,
105
+ "sae_top_5_test_accuracy": 0.8398,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9116000413894654,
114
+ "llm_top_1_test_accuracy": 0.6076,
115
+ "llm_top_2_test_accuracy": 0.6492,
116
+ "llm_top_5_test_accuracy": 0.6728000000000001,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9230000495910644,
122
+ "sae_top_1_test_accuracy": 0.7692,
123
+ "sae_top_2_test_accuracy": 0.7677999999999999,
124
+ "sae_top_5_test_accuracy": 0.7866,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9810000360012054,
133
+ "llm_top_1_test_accuracy": 0.673,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9745000302791595,
141
+ "sae_top_1_test_accuracy": 0.724,
142
+ "sae_top_2_test_accuracy": 0.873,
143
+ "sae_top_5_test_accuracy": 0.933,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.9672000527381897,
152
+ "llm_top_1_test_accuracy": 0.6634,
153
+ "llm_top_2_test_accuracy": 0.6894,
154
+ "llm_top_5_test_accuracy": 0.7562,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9702000498771668,
160
+ "sae_top_1_test_accuracy": 0.6712,
161
+ "sae_top_2_test_accuracy": 0.6881999999999999,
162
+ "sae_top_5_test_accuracy": 0.7924,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9515000432729721,
171
+ "llm_top_1_test_accuracy": 0.63925,
172
+ "llm_top_2_test_accuracy": 0.7785,
173
+ "llm_top_5_test_accuracy": 0.8225,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9547500461339951,
179
+ "sae_top_1_test_accuracy": 0.821,
180
+ "sae_top_2_test_accuracy": 0.8580000000000001,
181
+ "sae_top_5_test_accuracy": 0.8825,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9994000196456909,
190
+ "llm_top_1_test_accuracy": 0.6401999999999999,
191
+ "llm_top_2_test_accuracy": 0.7786000000000001,
192
+ "llm_top_5_test_accuracy": 0.9103999999999999,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9992000341415406,
198
+ "sae_top_1_test_accuracy": 0.9208000000000001,
199
+ "sae_top_2_test_accuracy": 0.9347999999999999,
200
+ "sae_top_5_test_accuracy": 0.9809999999999999,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_5",
210
+ "sae_lens_version": "5.4.2",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 65536,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "topk",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9480000734329224,
240
+ "1": 0.9600000381469727,
241
+ "2": 0.9580000638961792,
242
+ "6": 0.9900000691413879,
243
+ "9": 0.9770000576972961
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9530000686645508,
249
+ "6": 0.987000048160553,
250
+ "9": 0.9760000705718994
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.577,
254
+ "1": 0.613,
255
+ "2": 0.662,
256
+ "6": 0.787,
257
+ "9": 0.56
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.574,
261
+ "1": 0.66,
262
+ "2": 0.718,
263
+ "6": 0.811,
264
+ "9": 0.714
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.713,
268
+ "1": 0.711,
269
+ "2": 0.755,
270
+ "6": 0.895,
271
+ "9": 0.861
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.666,
275
+ "1": 0.663,
276
+ "2": 0.869,
277
+ "6": 0.981,
278
+ "9": 0.929
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.688,
282
+ "1": 0.734,
283
+ "2": 0.864,
284
+ "6": 0.982,
285
+ "9": 0.934
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.746,
289
+ "1": 0.764,
290
+ "2": 0.883,
291
+ "6": 0.984,
292
+ "9": 0.945
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9540000557899475,
298
+ "13": 0.9510000348091125,
299
+ "14": 0.9520000219345093,
300
+ "18": 0.9320000410079956,
301
+ "19": 0.9610000252723694
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.9550000429153442,
305
+ "13": 0.9550000429153442,
306
+ "14": 0.9550000429153442,
307
+ "18": 0.9330000281333923,
308
+ "19": 0.9530000686645508
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.557,
312
+ "13": 0.673,
313
+ "14": 0.645,
314
+ "18": 0.697,
315
+ "19": 0.787
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.705,
319
+ "13": 0.718,
320
+ "14": 0.679,
321
+ "18": 0.73,
322
+ "19": 0.783
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.794,
326
+ "13": 0.744,
327
+ "14": 0.724,
328
+ "18": 0.713,
329
+ "19": 0.833
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.724,
333
+ "13": 0.731,
334
+ "14": 0.717,
335
+ "18": 0.719,
336
+ "19": 0.842
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.74,
340
+ "13": 0.766,
341
+ "14": 0.73,
342
+ "18": 0.72,
343
+ "19": 0.846
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.886,
347
+ "13": 0.761,
348
+ "14": 0.748,
349
+ "18": 0.74,
350
+ "19": 0.861
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9540000557899475,
356
+ "21": 0.9300000667572021,
357
+ "22": 0.9180000424385071,
358
+ "25": 0.9600000381469727,
359
+ "26": 0.893000066280365
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.9570000171661377,
363
+ "21": 0.9150000214576721,
364
+ "22": 0.9230000376701355,
365
+ "25": 0.9610000252723694,
366
+ "26": 0.89000004529953
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.716,
370
+ "21": 0.761,
371
+ "22": 0.648,
372
+ "25": 0.692,
373
+ "26": 0.618
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.805,
377
+ "21": 0.762,
378
+ "22": 0.649,
379
+ "25": 0.766,
380
+ "26": 0.671
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.875,
384
+ "21": 0.783,
385
+ "22": 0.711,
386
+ "25": 0.782,
387
+ "26": 0.671
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.749,
391
+ "21": 0.671,
392
+ "22": 0.591,
393
+ "25": 0.888,
394
+ "26": 0.698
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.796,
398
+ "21": 0.753,
399
+ "22": 0.874,
400
+ "25": 0.895,
401
+ "26": 0.713
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.865,
405
+ "21": 0.807,
406
+ "22": 0.877,
407
+ "25": 0.888,
408
+ "26": 0.762
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9550000429153442,
414
+ "2": 0.940000057220459,
415
+ "3": 0.9110000729560852,
416
+ "5": 0.9290000200271606,
417
+ "6": 0.8800000548362732
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.9460000395774841,
421
+ "2": 0.9330000281333923,
422
+ "3": 0.9130000472068787,
423
+ "5": 0.9160000681877136,
424
+ "6": 0.8500000238418579
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.674,
428
+ "2": 0.587,
429
+ "3": 0.601,
430
+ "5": 0.583,
431
+ "6": 0.593
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.737,
435
+ "2": 0.632,
436
+ "3": 0.605,
437
+ "5": 0.634,
438
+ "6": 0.638
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.763,
442
+ "2": 0.626,
443
+ "3": 0.63,
444
+ "5": 0.656,
445
+ "6": 0.689
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.879,
449
+ "2": 0.794,
450
+ "3": 0.6,
451
+ "5": 0.819,
452
+ "6": 0.754
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.867,
456
+ "2": 0.782,
457
+ "3": 0.611,
458
+ "5": 0.82,
459
+ "6": 0.759
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.88,
463
+ "2": 0.812,
464
+ "3": 0.654,
465
+ "5": 0.829,
466
+ "6": 0.758
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9750000238418579,
472
+ "5.0": 0.9740000367164612
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9820000529289246,
476
+ "5.0": 0.9800000190734863
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.673,
480
+ "5.0": 0.673
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.724,
492
+ "5.0": 0.724
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.873,
496
+ "5.0": 0.873
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.933,
500
+ "5.0": 0.933
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9630000591278076,
506
+ "Python": 0.987000048160553,
507
+ "HTML": 0.987000048160553,
508
+ "Java": 0.9610000252723694,
509
+ "PHP": 0.9530000686645508
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9450000524520874,
513
+ "Python": 0.9890000224113464,
514
+ "HTML": 0.987000048160553,
515
+ "Java": 0.9620000720024109,
516
+ "PHP": 0.9530000686645508
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.669,
520
+ "Python": 0.638,
521
+ "HTML": 0.788,
522
+ "Java": 0.621,
523
+ "PHP": 0.601
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.656,
527
+ "Python": 0.671,
528
+ "HTML": 0.811,
529
+ "Java": 0.678,
530
+ "PHP": 0.631
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.744,
534
+ "Python": 0.735,
535
+ "HTML": 0.904,
536
+ "Java": 0.726,
537
+ "PHP": 0.672
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.641,
541
+ "Python": 0.642,
542
+ "HTML": 0.774,
543
+ "Java": 0.65,
544
+ "PHP": 0.649
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.647,
548
+ "Python": 0.664,
549
+ "HTML": 0.813,
550
+ "Java": 0.649,
551
+ "PHP": 0.668
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.718,
555
+ "Python": 0.673,
556
+ "HTML": 0.924,
557
+ "Java": 0.721,
558
+ "PHP": 0.926
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9450000524520874,
564
+ "1": 0.984000027179718,
565
+ "2": 0.937000036239624,
566
+ "3": 0.9530000686645508
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.937000036239624,
570
+ "1": 0.987000048160553,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9520000219345093
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.586,
576
+ "1": 0.658,
577
+ "2": 0.669,
578
+ "3": 0.644
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.809,
582
+ "1": 0.8,
583
+ "2": 0.691,
584
+ "3": 0.814
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.827,
588
+ "1": 0.877,
589
+ "2": 0.748,
590
+ "3": 0.838
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.719,
594
+ "1": 0.977,
595
+ "2": 0.845,
596
+ "3": 0.743
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.83,
600
+ "1": 0.976,
601
+ "2": 0.85,
602
+ "3": 0.776
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.865,
606
+ "1": 0.977,
607
+ "2": 0.86,
608
+ "3": 0.828
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.999000072479248,
614
+ "fr": 1.0,
615
+ "de": 1.0,
616
+ "es": 0.999000072479248,
617
+ "nl": 0.9980000257492065
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 1.0,
621
+ "fr": 1.0,
622
+ "de": 1.0,
623
+ "es": 0.9980000257492065,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.742,
628
+ "fr": 0.575,
629
+ "de": 0.737,
630
+ "es": 0.504,
631
+ "nl": 0.643
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.822,
635
+ "fr": 0.593,
636
+ "de": 0.83,
637
+ "es": 0.905,
638
+ "nl": 0.743
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.898,
642
+ "fr": 0.908,
643
+ "de": 0.908,
644
+ "es": 0.982,
645
+ "nl": 0.856
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 1.0,
649
+ "fr": 0.993,
650
+ "de": 0.893,
651
+ "es": 0.883,
652
+ "nl": 0.835
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 1.0,
656
+ "fr": 0.994,
657
+ "de": 0.891,
658
+ "es": 0.95,
659
+ "nl": 0.839
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.999,
663
+ "fr": 0.994,
664
+ "de": 0.918,
665
+ "es": 0.998,
666
+ "nl": 0.996
667
+ }
668
+ }
669
+ }
670
+ }
eval_results_finetunes/tpp/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "tpp",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": false,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "05e5fb66-63ad-4c4d-9bee-8de054f9167b",
73
+ "datetime_epoch_millis": 1740084871219,
74
+ "eval_result_metrics": {
75
+ "tpp_metrics": {
76
+ "tpp_threshold_2_total_metric": 0.007200005650520324,
77
+ "tpp_threshold_2_intended_diff_only": 0.010000008344650268,
78
+ "tpp_threshold_2_unintended_diff_only": 0.002800002694129944,
79
+ "tpp_threshold_5_total_metric": 0.009374988079071046,
80
+ "tpp_threshold_5_intended_diff_only": 0.012799990177154542,
81
+ "tpp_threshold_5_unintended_diff_only": 0.003425002098083496,
82
+ "tpp_threshold_10_total_metric": 0.021299999952316285,
83
+ "tpp_threshold_10_intended_diff_only": 0.025300002098083495,
84
+ "tpp_threshold_10_unintended_diff_only": 0.004000002145767212,
85
+ "tpp_threshold_20_total_metric": 0.034925007820129396,
86
+ "tpp_threshold_20_intended_diff_only": 0.039900004863739014,
87
+ "tpp_threshold_20_unintended_diff_only": 0.0049749970436096195,
88
+ "tpp_threshold_50_total_metric": 0.06687500774860382,
89
+ "tpp_threshold_50_intended_diff_only": 0.07270000576972963,
90
+ "tpp_threshold_50_unintended_diff_only": 0.0058249980211257935,
91
+ "tpp_threshold_100_total_metric": 0.12227500528097152,
92
+ "tpp_threshold_100_intended_diff_only": 0.13190000653266906,
93
+ "tpp_threshold_100_unintended_diff_only": 0.00962500125169754,
94
+ "tpp_threshold_500_total_metric": 0.30722502022981646,
95
+ "tpp_threshold_500_intended_diff_only": 0.3229000210762024,
96
+ "tpp_threshold_500_unintended_diff_only": 0.015675000846385956
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results",
102
+ "tpp_threshold_2_total_metric": 0.009200009703636169,
103
+ "tpp_threshold_2_intended_diff_only": 0.011600017547607422,
104
+ "tpp_threshold_2_unintended_diff_only": 0.0024000078439712523,
105
+ "tpp_threshold_5_total_metric": 0.013899984955787658,
106
+ "tpp_threshold_5_intended_diff_only": 0.017199993133544922,
107
+ "tpp_threshold_5_unintended_diff_only": 0.003300008177757263,
108
+ "tpp_threshold_10_total_metric": 0.027149999141693117,
109
+ "tpp_threshold_10_intended_diff_only": 0.029800009727478028,
110
+ "tpp_threshold_10_unintended_diff_only": 0.0026500105857849123,
111
+ "tpp_threshold_20_total_metric": 0.04479999840259552,
112
+ "tpp_threshold_20_intended_diff_only": 0.04860000610351563,
113
+ "tpp_threshold_20_unintended_diff_only": 0.003800007700920105,
114
+ "tpp_threshold_50_total_metric": 0.08785000145435333,
115
+ "tpp_threshold_50_intended_diff_only": 0.09180001020431519,
116
+ "tpp_threshold_50_unintended_diff_only": 0.003950008749961853,
117
+ "tpp_threshold_100_total_metric": 0.15520000755786895,
118
+ "tpp_threshold_100_intended_diff_only": 0.16040002107620238,
119
+ "tpp_threshold_100_unintended_diff_only": 0.005200013518333435,
120
+ "tpp_threshold_500_total_metric": 0.38140002489089964,
121
+ "tpp_threshold_500_intended_diff_only": 0.39200003147125245,
122
+ "tpp_threshold_500_unintended_diff_only": 0.010600006580352784
123
+ },
124
+ {
125
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results",
126
+ "tpp_threshold_2_total_metric": 0.00520000159740448,
127
+ "tpp_threshold_2_intended_diff_only": 0.008399999141693116,
128
+ "tpp_threshold_2_unintended_diff_only": 0.0031999975442886354,
129
+ "tpp_threshold_5_total_metric": 0.0048499912023544315,
130
+ "tpp_threshold_5_intended_diff_only": 0.00839998722076416,
131
+ "tpp_threshold_5_unintended_diff_only": 0.003549996018409729,
132
+ "tpp_threshold_10_total_metric": 0.015450000762939453,
133
+ "tpp_threshold_10_intended_diff_only": 0.020799994468688965,
134
+ "tpp_threshold_10_unintended_diff_only": 0.005349993705749512,
135
+ "tpp_threshold_20_total_metric": 0.02505001723766327,
136
+ "tpp_threshold_20_intended_diff_only": 0.031200003623962403,
137
+ "tpp_threshold_20_unintended_diff_only": 0.006149986386299133,
138
+ "tpp_threshold_50_total_metric": 0.045900014042854306,
139
+ "tpp_threshold_50_intended_diff_only": 0.053600001335144046,
140
+ "tpp_threshold_50_unintended_diff_only": 0.007699987292289734,
141
+ "tpp_threshold_100_total_metric": 0.0893500030040741,
142
+ "tpp_threshold_100_intended_diff_only": 0.10339999198913574,
143
+ "tpp_threshold_100_unintended_diff_only": 0.014049988985061646,
144
+ "tpp_threshold_500_total_metric": 0.23305001556873323,
145
+ "tpp_threshold_500_intended_diff_only": 0.2538000106811523,
146
+ "tpp_threshold_500_unintended_diff_only": 0.02074999511241913
147
+ }
148
+ ],
149
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
150
+ "sae_lens_id": "custom_sae",
151
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_1",
152
+ "sae_lens_version": "5.4.2",
153
+ "sae_cfg_dict": {
154
+ "model_name": "gemma-2-2b",
155
+ "d_in": 2304,
156
+ "d_sae": 16384,
157
+ "hook_layer": 12,
158
+ "hook_name": "blocks.12.hook_resid_post",
159
+ "context_size": null,
160
+ "hook_head_index": null,
161
+ "architecture": "topk",
162
+ "apply_b_dec_to_input": null,
163
+ "finetuning_scaling_factor": null,
164
+ "activation_fn_str": "",
165
+ "prepend_bos": true,
166
+ "normalize_activations": "none",
167
+ "dtype": "bfloat16",
168
+ "device": "",
169
+ "dataset_path": "",
170
+ "dataset_trust_remote_code": true,
171
+ "seqpos_slice": [
172
+ null
173
+ ],
174
+ "training_tokens": -100000,
175
+ "sae_lens_training_version": null,
176
+ "neuronpedia_id": null
177
+ },
178
+ "eval_result_unstructured": {
179
+ "LabHC/bias_in_bios_class_set1": {
180
+ "0": {
181
+ "tpp_threshold_2_total_metric": 0.01175004243850708,
182
+ "tpp_threshold_2_intended_diff_only": 0.01500004529953003,
183
+ "tpp_threshold_2_unintended_diff_only": 0.0032500028610229492,
184
+ "tpp_threshold_5_total_metric": 0.01449999213218689,
185
+ "tpp_threshold_5_intended_diff_only": 0.018999993801116943,
186
+ "tpp_threshold_5_unintended_diff_only": 0.004500001668930054,
187
+ "tpp_threshold_10_total_metric": 0.017000004649162292,
188
+ "tpp_threshold_10_intended_diff_only": 0.021000027656555176,
189
+ "tpp_threshold_10_unintended_diff_only": 0.004000023007392883,
190
+ "tpp_threshold_20_total_metric": 0.030750036239624023,
191
+ "tpp_threshold_20_intended_diff_only": 0.03400003910064697,
192
+ "tpp_threshold_20_unintended_diff_only": 0.0032500028610229492,
193
+ "tpp_threshold_50_total_metric": 0.058250024914741516,
194
+ "tpp_threshold_50_intended_diff_only": 0.06200003623962402,
195
+ "tpp_threshold_50_unintended_diff_only": 0.0037500113248825073,
196
+ "tpp_threshold_100_total_metric": 0.14350003004074097,
197
+ "tpp_threshold_100_intended_diff_only": 0.14900004863739014,
198
+ "tpp_threshold_100_unintended_diff_only": 0.00550001859664917,
199
+ "tpp_threshold_500_total_metric": 0.4230000227689743,
200
+ "tpp_threshold_500_intended_diff_only": 0.42900002002716064,
201
+ "tpp_threshold_500_unintended_diff_only": 0.00599999725818634
202
+ },
203
+ "1": {
204
+ "tpp_threshold_2_total_metric": 0.007750019431114197,
205
+ "tpp_threshold_2_intended_diff_only": 0.008000016212463379,
206
+ "tpp_threshold_2_unintended_diff_only": 0.00024999678134918213,
207
+ "tpp_threshold_5_total_metric": 0.009750008583068848,
208
+ "tpp_threshold_5_intended_diff_only": 0.012000024318695068,
209
+ "tpp_threshold_5_unintended_diff_only": 0.0022500157356262207,
210
+ "tpp_threshold_10_total_metric": 0.013250008225440979,
211
+ "tpp_threshold_10_intended_diff_only": 0.013000011444091797,
212
+ "tpp_threshold_10_unintended_diff_only": -0.00024999678134918213,
213
+ "tpp_threshold_20_total_metric": 0.009000003337860107,
214
+ "tpp_threshold_20_intended_diff_only": 0.013000011444091797,
215
+ "tpp_threshold_20_unintended_diff_only": 0.0040000081062316895,
216
+ "tpp_threshold_50_total_metric": 0.04599998891353607,
217
+ "tpp_threshold_50_intended_diff_only": 0.050999999046325684,
218
+ "tpp_threshold_50_unintended_diff_only": 0.005000010132789612,
219
+ "tpp_threshold_100_total_metric": 0.09175002574920654,
220
+ "tpp_threshold_100_intended_diff_only": 0.09900003671646118,
221
+ "tpp_threshold_100_unintended_diff_only": 0.007250010967254639,
222
+ "tpp_threshold_500_total_metric": 0.2900000661611557,
223
+ "tpp_threshold_500_intended_diff_only": 0.29600006341934204,
224
+ "tpp_threshold_500_unintended_diff_only": 0.00599999725818634
225
+ },
226
+ "2": {
227
+ "tpp_threshold_2_total_metric": 0.014249995350837708,
228
+ "tpp_threshold_2_intended_diff_only": 0.018000006675720215,
229
+ "tpp_threshold_2_unintended_diff_only": 0.0037500113248825073,
230
+ "tpp_threshold_5_total_metric": 0.022499993443489075,
231
+ "tpp_threshold_5_intended_diff_only": 0.02799999713897705,
232
+ "tpp_threshold_5_unintended_diff_only": 0.005500003695487976,
233
+ "tpp_threshold_10_total_metric": 0.057249993085861206,
234
+ "tpp_threshold_10_intended_diff_only": 0.06000000238418579,
235
+ "tpp_threshold_10_unintended_diff_only": 0.002750009298324585,
236
+ "tpp_threshold_20_total_metric": 0.08199998736381531,
237
+ "tpp_threshold_20_intended_diff_only": 0.08499997854232788,
238
+ "tpp_threshold_20_unintended_diff_only": 0.0029999911785125732,
239
+ "tpp_threshold_50_total_metric": 0.12174999713897705,
240
+ "tpp_threshold_50_intended_diff_only": 0.12400001287460327,
241
+ "tpp_threshold_50_unintended_diff_only": 0.0022500157356262207,
242
+ "tpp_threshold_100_total_metric": 0.19025000929832458,
243
+ "tpp_threshold_100_intended_diff_only": 0.19300001859664917,
244
+ "tpp_threshold_100_unintended_diff_only": 0.002750009298324585,
245
+ "tpp_threshold_500_total_metric": 0.40925002098083496,
246
+ "tpp_threshold_500_intended_diff_only": 0.4150000214576721,
247
+ "tpp_threshold_500_unintended_diff_only": 0.005750000476837158
248
+ },
249
+ "6": {
250
+ "tpp_threshold_2_total_metric": 0.0015000104904174805,
251
+ "tpp_threshold_2_intended_diff_only": 0.003000020980834961,
252
+ "tpp_threshold_2_unintended_diff_only": 0.0015000104904174805,
253
+ "tpp_threshold_5_total_metric": 0.002249985933303833,
254
+ "tpp_threshold_5_intended_diff_only": 0.0009999871253967285,
255
+ "tpp_threshold_5_unintended_diff_only": -0.0012499988079071045,
256
+ "tpp_threshold_10_total_metric": 0.002499997615814209,
257
+ "tpp_threshold_10_intended_diff_only": 0.0040000081062316895,
258
+ "tpp_threshold_10_unintended_diff_only": 0.0015000104904174805,
259
+ "tpp_threshold_20_total_metric": 0.0037499964237213135,
260
+ "tpp_threshold_20_intended_diff_only": 0.0040000081062316895,
261
+ "tpp_threshold_20_unintended_diff_only": 0.000250011682510376,
262
+ "tpp_threshold_50_total_metric": 0.021250009536743164,
263
+ "tpp_threshold_50_intended_diff_only": 0.023000001907348633,
264
+ "tpp_threshold_50_unintended_diff_only": 0.0017499923706054688,
265
+ "tpp_threshold_100_total_metric": 0.04800000786781311,
266
+ "tpp_threshold_100_intended_diff_only": 0.04900002479553223,
267
+ "tpp_threshold_100_unintended_diff_only": 0.0010000169277191162,
268
+ "tpp_threshold_500_total_metric": 0.3185000419616699,
269
+ "tpp_threshold_500_intended_diff_only": 0.34300005435943604,
270
+ "tpp_threshold_500_unintended_diff_only": 0.024500012397766113
271
+ },
272
+ "9": {
273
+ "tpp_threshold_2_total_metric": 0.010749980807304382,
274
+ "tpp_threshold_2_intended_diff_only": 0.013999998569488525,
275
+ "tpp_threshold_2_unintended_diff_only": 0.003250017762184143,
276
+ "tpp_threshold_5_total_metric": 0.02049994468688965,
277
+ "tpp_threshold_5_intended_diff_only": 0.02599996328353882,
278
+ "tpp_threshold_5_unintended_diff_only": 0.00550001859664917,
279
+ "tpp_threshold_10_total_metric": 0.04574999213218689,
280
+ "tpp_threshold_10_intended_diff_only": 0.050999999046325684,
281
+ "tpp_threshold_10_unintended_diff_only": 0.005250006914138794,
282
+ "tpp_threshold_20_total_metric": 0.09849996864795685,
283
+ "tpp_threshold_20_intended_diff_only": 0.10699999332427979,
284
+ "tpp_threshold_20_unintended_diff_only": 0.008500024676322937,
285
+ "tpp_threshold_50_total_metric": 0.19199998676776886,
286
+ "tpp_threshold_50_intended_diff_only": 0.19900000095367432,
287
+ "tpp_threshold_50_unintended_diff_only": 0.0070000141859054565,
288
+ "tpp_threshold_100_total_metric": 0.3024999648332596,
289
+ "tpp_threshold_100_intended_diff_only": 0.31199997663497925,
290
+ "tpp_threshold_100_unintended_diff_only": 0.009500011801719666,
291
+ "tpp_threshold_500_total_metric": 0.4662499725818634,
292
+ "tpp_threshold_500_intended_diff_only": 0.47699999809265137,
293
+ "tpp_threshold_500_unintended_diff_only": 0.010750025510787964
294
+ }
295
+ },
296
+ "canrager/amazon_reviews_mcauley_1and5": {
297
+ "1": {
298
+ "tpp_threshold_2_total_metric": 0.004250004887580872,
299
+ "tpp_threshold_2_intended_diff_only": 0.009000003337860107,
300
+ "tpp_threshold_2_unintended_diff_only": 0.004749998450279236,
301
+ "tpp_threshold_5_total_metric": 0.0034999698400497437,
302
+ "tpp_threshold_5_intended_diff_only": 0.006999969482421875,
303
+ "tpp_threshold_5_unintended_diff_only": 0.0034999996423721313,
304
+ "tpp_threshold_10_total_metric": 0.0007500052452087402,
305
+ "tpp_threshold_10_intended_diff_only": 0.009000003337860107,
306
+ "tpp_threshold_10_unintended_diff_only": 0.008249998092651367,
307
+ "tpp_threshold_20_total_metric": 0.0037500113248825073,
308
+ "tpp_threshold_20_intended_diff_only": 0.009000003337860107,
309
+ "tpp_threshold_20_unintended_diff_only": 0.0052499920129776,
310
+ "tpp_threshold_50_total_metric": 0.00849999487400055,
311
+ "tpp_threshold_50_intended_diff_only": 0.010999977588653564,
312
+ "tpp_threshold_50_unintended_diff_only": 0.002499982714653015,
313
+ "tpp_threshold_100_total_metric": 0.01499997079372406,
314
+ "tpp_threshold_100_intended_diff_only": 0.02199995517730713,
315
+ "tpp_threshold_100_unintended_diff_only": 0.006999984383583069,
316
+ "tpp_threshold_500_total_metric": 0.10349996387958527,
317
+ "tpp_threshold_500_intended_diff_only": 0.11399996280670166,
318
+ "tpp_threshold_500_unintended_diff_only": 0.010499998927116394
319
+ },
320
+ "2": {
321
+ "tpp_threshold_2_total_metric": 0.0037500113248825073,
322
+ "tpp_threshold_2_intended_diff_only": 0.0040000081062316895,
323
+ "tpp_threshold_2_unintended_diff_only": 0.00024999678134918213,
324
+ "tpp_threshold_5_total_metric": -0.006500020623207092,
325
+ "tpp_threshold_5_intended_diff_only": 0.001999974250793457,
326
+ "tpp_threshold_5_unintended_diff_only": 0.00849999487400055,
327
+ "tpp_threshold_10_total_metric": 0.012249961495399475,
328
+ "tpp_threshold_10_intended_diff_only": 0.01699995994567871,
329
+ "tpp_threshold_10_unintended_diff_only": 0.004749998450279236,
330
+ "tpp_threshold_20_total_metric": 0.013500005006790161,
331
+ "tpp_threshold_20_intended_diff_only": 0.018999993801116943,
332
+ "tpp_threshold_20_unintended_diff_only": 0.005499988794326782,
333
+ "tpp_threshold_50_total_metric": 0.03350001573562622,
334
+ "tpp_threshold_50_intended_diff_only": 0.046000003814697266,
335
+ "tpp_threshold_50_unintended_diff_only": 0.012499988079071045,
336
+ "tpp_threshold_100_total_metric": 0.07224997878074646,
337
+ "tpp_threshold_100_intended_diff_only": 0.0899999737739563,
338
+ "tpp_threshold_100_unintended_diff_only": 0.01774999499320984,
339
+ "tpp_threshold_500_total_metric": 0.23975004255771637,
340
+ "tpp_threshold_500_intended_diff_only": 0.26200002431869507,
341
+ "tpp_threshold_500_unintended_diff_only": 0.0222499817609787
342
+ },
343
+ "3": {
344
+ "tpp_threshold_2_total_metric": -0.0037499815225601196,
345
+ "tpp_threshold_2_intended_diff_only": -0.0009999871253967285,
346
+ "tpp_threshold_2_unintended_diff_only": 0.002749994397163391,
347
+ "tpp_threshold_5_total_metric": 0.0024999678134918213,
348
+ "tpp_threshold_5_intended_diff_only": 0.001999974250793457,
349
+ "tpp_threshold_5_unintended_diff_only": -0.0004999935626983643,
350
+ "tpp_threshold_10_total_metric": 0.01600000262260437,
351
+ "tpp_threshold_10_intended_diff_only": 0.018999993801116943,
352
+ "tpp_threshold_10_unintended_diff_only": 0.0029999911785125732,
353
+ "tpp_threshold_20_total_metric": 0.009499996900558472,
354
+ "tpp_threshold_20_intended_diff_only": 0.014999985694885254,
355
+ "tpp_threshold_20_unintended_diff_only": 0.005499988794326782,
356
+ "tpp_threshold_50_total_metric": 0.028249993920326233,
357
+ "tpp_threshold_50_intended_diff_only": 0.03299999237060547,
358
+ "tpp_threshold_50_unintended_diff_only": 0.004749998450279236,
359
+ "tpp_threshold_100_total_metric": 0.0625,
360
+ "tpp_threshold_100_intended_diff_only": 0.07499998807907104,
361
+ "tpp_threshold_100_unintended_diff_only": 0.012499988079071045,
362
+ "tpp_threshold_500_total_metric": 0.21675001084804535,
363
+ "tpp_threshold_500_intended_diff_only": 0.23900002241134644,
364
+ "tpp_threshold_500_unintended_diff_only": 0.022250011563301086
365
+ },
366
+ "5": {
367
+ "tpp_threshold_2_total_metric": 0.0017500072717666626,
368
+ "tpp_threshold_2_intended_diff_only": 0.004999995231628418,
369
+ "tpp_threshold_2_unintended_diff_only": 0.0032499879598617554,
370
+ "tpp_threshold_5_total_metric": 0.0022500306367874146,
371
+ "tpp_threshold_5_intended_diff_only": 0.008000016212463379,
372
+ "tpp_threshold_5_unintended_diff_only": 0.005749985575675964,
373
+ "tpp_threshold_10_total_metric": 0.006000027060508728,
374
+ "tpp_threshold_10_intended_diff_only": 0.013000011444091797,
375
+ "tpp_threshold_10_unintended_diff_only": 0.006999984383583069,
376
+ "tpp_threshold_20_total_metric": 0.031250059604644775,
377
+ "tpp_threshold_20_intended_diff_only": 0.03900003433227539,
378
+ "tpp_threshold_20_unintended_diff_only": 0.007749974727630615,
379
+ "tpp_threshold_50_total_metric": 0.059250056743621826,
380
+ "tpp_threshold_50_intended_diff_only": 0.06700003147125244,
381
+ "tpp_threshold_50_unintended_diff_only": 0.007749974727630615,
382
+ "tpp_threshold_100_total_metric": 0.11825007200241089,
383
+ "tpp_threshold_100_intended_diff_only": 0.1390000581741333,
384
+ "tpp_threshold_100_unintended_diff_only": 0.020749986171722412,
385
+ "tpp_threshold_500_total_metric": 0.2575000822544098,
386
+ "tpp_threshold_500_intended_diff_only": 0.28700006008148193,
387
+ "tpp_threshold_500_unintended_diff_only": 0.029499977827072144
388
+ },
389
+ "6": {
390
+ "tpp_threshold_2_total_metric": 0.019999966025352478,
391
+ "tpp_threshold_2_intended_diff_only": 0.02499997615814209,
392
+ "tpp_threshold_2_unintended_diff_only": 0.005000010132789612,
393
+ "tpp_threshold_5_total_metric": 0.02250000834465027,
394
+ "tpp_threshold_5_intended_diff_only": 0.023000001907348633,
395
+ "tpp_threshold_5_unintended_diff_only": 0.0004999935626983643,
396
+ "tpp_threshold_10_total_metric": 0.04225000739097595,
397
+ "tpp_threshold_10_intended_diff_only": 0.046000003814697266,
398
+ "tpp_threshold_10_unintended_diff_only": 0.0037499964237213135,
399
+ "tpp_threshold_20_total_metric": 0.06725001335144043,
400
+ "tpp_threshold_20_intended_diff_only": 0.07400000095367432,
401
+ "tpp_threshold_20_unintended_diff_only": 0.006749987602233887,
402
+ "tpp_threshold_50_total_metric": 0.10000000894069672,
403
+ "tpp_threshold_50_intended_diff_only": 0.11100000143051147,
404
+ "tpp_threshold_50_unintended_diff_only": 0.010999992489814758,
405
+ "tpp_threshold_100_total_metric": 0.17874999344348907,
406
+ "tpp_threshold_100_intended_diff_only": 0.19099998474121094,
407
+ "tpp_threshold_100_unintended_diff_only": 0.012249991297721863,
408
+ "tpp_threshold_500_total_metric": 0.3477499783039093,
409
+ "tpp_threshold_500_intended_diff_only": 0.3669999837875366,
410
+ "tpp_threshold_500_unintended_diff_only": 0.01925000548362732
411
+ }
412
+ }
413
+ }
414
+ }