adamkarvonen commited on
Commit
14ef0de
·
verified ·
1 Parent(s): 32ebece

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +36 -0
  2. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +268 -0
  3. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +268 -0
  4. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +268 -0
  5. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +268 -0
  6. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +268 -0
  7. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +268 -0
  8. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +268 -0
  9. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +268 -0
  10. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +268 -0
  11. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +268 -0
  12. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +268 -0
  13. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +268 -0
  14. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +268 -0
  15. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +268 -0
  16. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +268 -0
  17. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +268 -0
  18. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +268 -0
  19. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +268 -0
  20. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +268 -0
  21. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +268 -0
  22. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +268 -0
  23. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +268 -0
  24. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +268 -0
  25. eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +268 -0
  26. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +3 -0
  27. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +3 -0
  28. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +3 -0
  29. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +3 -0
  30. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +3 -0
  31. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +3 -0
  32. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +3 -0
  33. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +3 -0
  34. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +3 -0
  35. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +3 -0
  36. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +3 -0
  37. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +3 -0
  38. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +3 -0
  39. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +3 -0
  40. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +3 -0
  41. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +3 -0
  42. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +3 -0
  43. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +3 -0
  44. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +3 -0
  45. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +3 -0
  46. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +3 -0
  47. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +3 -0
  48. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +3 -0
  49. eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +3 -0
  50. eval_results_finetunes/core/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +0 -0
.gitattributes CHANGED
@@ -33,3 +33,39 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
37
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
38
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
39
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
40
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
41
+ eval_results_finetunes/core/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
42
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
43
+ eval_results_finetunes/core/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
44
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
45
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
46
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
47
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
48
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
49
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
50
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
51
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
52
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
53
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
54
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
55
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
56
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
57
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
58
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
59
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
60
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
61
+ eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
62
+ eval_results_finetunes/core/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
63
+ eval_results_finetunes/core/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
64
+ eval_results_finetunes/core/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
65
+ eval_results_finetunes/core/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
66
+ eval_results_finetunes/core/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
67
+ eval_results_finetunes/core/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
68
+ eval_results_finetunes/core/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
69
+ eval_results_finetunes/core/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
70
+ eval_results_finetunes/core/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
71
+ eval_results_finetunes/core/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "26cf7785-b764-4b2f-9cd4-b388819364f4",
17
+ "datetime_epoch_millis": 1740072497386,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.08756063184284377,
21
+ "mean_full_absorption_score": 0.0026708784824093696,
22
+ "mean_num_split_features": 1.1153846153846154,
23
+ "std_dev_absorption_fraction_score": 0.10310418368085361,
24
+ "std_dev_full_absorption_score": 0.004668474177881613,
25
+ "std_dev_num_split_features": 0.3258125936084211
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.08140821007145073,
32
+ "full_absorption_rate": 0.0,
33
+ "num_full_absorption": 0,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 1
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.01681192611231011,
40
+ "full_absorption_rate": 0.0006485084306095979,
41
+ "num_full_absorption": 1,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.35926263607439873,
48
+ "full_absorption_rate": 0.012121212121212121,
49
+ "num_full_absorption": 34,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 1
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.028519339166013014,
56
+ "full_absorption_rate": 0.0006024096385542169,
57
+ "num_full_absorption": 1,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 2
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.08212543463683289,
64
+ "full_absorption_rate": 0.0024752475247524753,
65
+ "num_full_absorption": 4,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 2
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.015080013168122549,
72
+ "full_absorption_rate": 0.0,
73
+ "num_full_absorption": 0,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.030999187614493637,
80
+ "full_absorption_rate": 0.0,
81
+ "num_full_absorption": 0,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 2
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.011943369017875526,
88
+ "full_absorption_rate": 0.000966183574879227,
89
+ "num_full_absorption": 1,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.1279149210080362,
96
+ "full_absorption_rate": 0.0,
97
+ "num_full_absorption": 0,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 1
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.015672704411350203,
104
+ "full_absorption_rate": 0.0,
105
+ "num_full_absorption": 0,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.020683345579076196,
112
+ "full_absorption_rate": 0.0,
113
+ "num_full_absorption": 0,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.038069472005201645,
120
+ "full_absorption_rate": 0.0,
121
+ "num_full_absorption": 0,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.017109632297467238,
128
+ "full_absorption_rate": 0.001098297638660077,
129
+ "num_full_absorption": 2,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 1
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.0499469606653892,
136
+ "full_absorption_rate": 0.0,
137
+ "num_full_absorption": 0,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.19349678964164968,
144
+ "full_absorption_rate": 0.005623242736644799,
145
+ "num_full_absorption": 6,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.18385323993434824,
152
+ "full_absorption_rate": 0.0,
153
+ "num_full_absorption": 0,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.027994167957066447,
160
+ "full_absorption_rate": 0.0,
161
+ "num_full_absorption": 0,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.05001762156507236,
168
+ "full_absorption_rate": 0.0,
169
+ "num_full_absorption": 0,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 1
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.24798331778260943,
176
+ "full_absorption_rate": 0.008906305664410403,
177
+ "num_full_absorption": 25,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 1
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.030864278273271966,
184
+ "full_absorption_rate": 0.0,
185
+ "num_full_absorption": 0,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.058126416439307944,
192
+ "full_absorption_rate": 0.0026490066225165563,
193
+ "num_full_absorption": 2,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 1
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.02767831174987224,
200
+ "full_absorption_rate": 0.0025348542458808617,
201
+ "num_full_absorption": 2,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.04302083964242241,
208
+ "full_absorption_rate": 0.0027548209366391185,
209
+ "num_full_absorption": 2,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.3811482605125039,
216
+ "full_absorption_rate": 0.017699115044247787,
217
+ "num_full_absorption": 2,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.09440012354552016,
224
+ "full_absorption_rate": 0.011363636363636364,
225
+ "num_full_absorption": 2,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.04244590904227524,
232
+ "full_absorption_rate": 0.0,
233
+ "num_full_absorption": 0,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_0",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 16384,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "standard_april_update",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "a406d96b-4764-4870-87ad-ffb6f39febf6",
17
+ "datetime_epoch_millis": 1740070455827,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.10957305234695539,
21
+ "mean_full_absorption_score": 0.009237782749944001,
22
+ "mean_num_split_features": 1.2692307692307692,
23
+ "std_dev_absorption_fraction_score": 0.13197935301164276,
24
+ "std_dev_full_absorption_score": 0.02373718118524806,
25
+ "std_dev_num_split_features": 0.533493565673837
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.13154150609047785,
32
+ "full_absorption_rate": 0.00039872408293460925,
33
+ "num_full_absorption": 1,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 1
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.02933484749706098,
40
+ "full_absorption_rate": 0.0006485084306095979,
41
+ "num_full_absorption": 1,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.5181347299930564,
48
+ "full_absorption_rate": 0.057754010695187166,
49
+ "num_full_absorption": 162,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 1
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.06592090170151613,
56
+ "full_absorption_rate": 0.0018072289156626507,
57
+ "num_full_absorption": 3,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 3
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.1328337800923092,
64
+ "full_absorption_rate": 0.003094059405940594,
65
+ "num_full_absorption": 5,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 2
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.02871150615595956,
72
+ "full_absorption_rate": 0.0,
73
+ "num_full_absorption": 0,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.03038662265781444,
80
+ "full_absorption_rate": 0.0008733624454148472,
81
+ "num_full_absorption": 1,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 2
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.012250889606148977,
88
+ "full_absorption_rate": 0.001932367149758454,
89
+ "num_full_absorption": 2,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.18872682839441868,
96
+ "full_absorption_rate": 0.008547008547008548,
97
+ "num_full_absorption": 14,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 2
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.006006265772572784,
104
+ "full_absorption_rate": 0.0,
105
+ "num_full_absorption": 0,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.0139049237721171,
112
+ "full_absorption_rate": 0.0,
113
+ "num_full_absorption": 0,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.08373047207179418,
120
+ "full_absorption_rate": 0.000856898029134533,
121
+ "num_full_absorption": 1,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.051899966232708414,
128
+ "full_absorption_rate": 0.003844041735310269,
129
+ "num_full_absorption": 7,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 1
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.06348820232865938,
136
+ "full_absorption_rate": 0.0,
137
+ "num_full_absorption": 0,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.20622782125334888,
144
+ "full_absorption_rate": 0.015932521087160263,
145
+ "num_full_absorption": 17,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.28848948459991025,
152
+ "full_absorption_rate": 0.004820333041191937,
153
+ "num_full_absorption": 11,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.015337079699260047,
160
+ "full_absorption_rate": 0.0,
161
+ "num_full_absorption": 0,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.07880539011960401,
168
+ "full_absorption_rate": 0.0011757789535567313,
169
+ "num_full_absorption": 2,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 2
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.47082284356769066,
176
+ "full_absorption_rate": 0.11115069469184183,
177
+ "num_full_absorption": 312,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 1
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.05863498588324972,
184
+ "full_absorption_rate": 0.0,
185
+ "num_full_absorption": 0,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.08018276488751244,
192
+ "full_absorption_rate": 0.0026490066225165563,
193
+ "num_full_absorption": 2,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 1
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.014063794282346212,
200
+ "full_absorption_rate": 0.005069708491761723,
201
+ "num_full_absorption": 4,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.05768679269514189,
208
+ "full_absorption_rate": 0.008264462809917356,
209
+ "num_full_absorption": 6,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.12267083087401717,
216
+ "full_absorption_rate": 0.0,
217
+ "num_full_absorption": 0,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 2
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.06181458936192774,
224
+ "full_absorption_rate": 0.011363636363636364,
225
+ "num_full_absorption": 2,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.0372915414302168,
232
+ "full_absorption_rate": 0.0,
233
+ "num_full_absorption": 0,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_1",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 16384,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "standard_april_update",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "0a428c0f-b6b1-48a7-bb19-d7086f4270d6",
17
+ "datetime_epoch_millis": 1740073177210,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.2006084021429049,
21
+ "mean_full_absorption_score": 0.04185121640812415,
22
+ "mean_num_split_features": 1.1923076923076923,
23
+ "std_dev_absorption_fraction_score": 0.1932477405174448,
24
+ "std_dev_full_absorption_score": 0.07870423678832646,
25
+ "std_dev_num_split_features": 0.4019184762342502
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.3269017199955651,
32
+ "full_absorption_rate": 0.006778309409888357,
33
+ "num_full_absorption": 17,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 1
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.3153361615591299,
40
+ "full_absorption_rate": 0.01621271076523995,
41
+ "num_full_absorption": 25,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.7681073287240997,
48
+ "full_absorption_rate": 0.3679144385026738,
49
+ "num_full_absorption": 1032,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 1
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.4844772034986525,
56
+ "full_absorption_rate": 0.15963855421686746,
57
+ "num_full_absorption": 265,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 1
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.4189963358401263,
64
+ "full_absorption_rate": 0.08787128712871287,
65
+ "num_full_absorption": 142,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 1
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.0906370665145521,
72
+ "full_absorption_rate": 0.0016155088852988692,
73
+ "num_full_absorption": 2,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.05344715412809589,
80
+ "full_absorption_rate": 0.0017467248908296944,
81
+ "num_full_absorption": 2,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 2
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.017331113075409323,
88
+ "full_absorption_rate": 0.001932367149758454,
89
+ "num_full_absorption": 2,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.27869038889672915,
96
+ "full_absorption_rate": 0.036019536019536016,
97
+ "num_full_absorption": 59,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 2
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.02339215944422728,
104
+ "full_absorption_rate": 0.0048543689320388345,
105
+ "num_full_absorption": 2,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.010137989127819028,
112
+ "full_absorption_rate": 0.0014814814814814814,
113
+ "num_full_absorption": 1,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.16146520974781348,
120
+ "full_absorption_rate": 0.017994858611825194,
121
+ "num_full_absorption": 21,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.12957510375875406,
128
+ "full_absorption_rate": 0.0060406370126304225,
129
+ "num_full_absorption": 11,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 2
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.067508810290958,
136
+ "full_absorption_rate": 0.0,
137
+ "num_full_absorption": 0,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.30204240974545166,
144
+ "full_absorption_rate": 0.061855670103092786,
145
+ "num_full_absorption": 66,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.5004589950119995,
152
+ "full_absorption_rate": 0.09640666082383874,
153
+ "num_full_absorption": 220,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.02447369883434256,
160
+ "full_absorption_rate": 0.0,
161
+ "num_full_absorption": 0,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.13838726937556015,
168
+ "full_absorption_rate": 0.011757789535567314,
169
+ "num_full_absorption": 20,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 2
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.4160579727849388,
176
+ "full_absorption_rate": 0.11364446027787674,
177
+ "num_full_absorption": 319,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 1
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.1625731588674795,
184
+ "full_absorption_rate": 0.00471976401179941,
185
+ "num_full_absorption": 8,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.1731853313821131,
192
+ "full_absorption_rate": 0.05695364238410596,
193
+ "num_full_absorption": 43,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 2
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.041442721515423414,
200
+ "full_absorption_rate": 0.0038022813688212928,
201
+ "num_full_absorption": 3,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.06787815048597441,
208
+ "full_absorption_rate": 0.005509641873278237,
209
+ "num_full_absorption": 4,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.19401323494686296,
216
+ "full_absorption_rate": 0.017699115044247787,
217
+ "num_full_absorption": 2,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.03275891889001838,
224
+ "full_absorption_rate": 0.005681818181818182,
225
+ "num_full_absorption": 1,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.016542849273431318,
232
+ "full_absorption_rate": 0.0,
233
+ "num_full_absorption": 0,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_2",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 16384,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "standard_april_update",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "cbe48292-6c0e-450f-a703-383be5405b0b",
17
+ "datetime_epoch_millis": 1740073855524,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.3073508786945687,
21
+ "mean_full_absorption_score": 0.12017129252511788,
22
+ "mean_num_split_features": 1.1923076923076923,
23
+ "std_dev_absorption_fraction_score": 0.23919102364176312,
24
+ "std_dev_full_absorption_score": 0.14148906885538548,
25
+ "std_dev_num_split_features": 0.6336706254344299
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.7535526101510682,
32
+ "full_absorption_rate": 0.3616427432216906,
33
+ "num_full_absorption": 907,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 1
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.443139983018839,
40
+ "full_absorption_rate": 0.1556420233463035,
41
+ "num_full_absorption": 240,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.8014968498975634,
48
+ "full_absorption_rate": 0.4859180035650624,
49
+ "num_full_absorption": 1363,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 1
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.5862511734750558,
56
+ "full_absorption_rate": 0.27951807228915665,
57
+ "num_full_absorption": 464,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 1
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.5525257217578151,
64
+ "full_absorption_rate": 0.27722772277227725,
65
+ "num_full_absorption": 448,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 1
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.25453275102142786,
72
+ "full_absorption_rate": 0.04361873990306947,
73
+ "num_full_absorption": 54,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.28917468092769205,
80
+ "full_absorption_rate": 0.10393013100436681,
81
+ "num_full_absorption": 119,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 1
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.08357516271680715,
88
+ "full_absorption_rate": 0.004830917874396135,
89
+ "num_full_absorption": 5,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.2977907134368573,
96
+ "full_absorption_rate": 0.09584859584859586,
97
+ "num_full_absorption": 157,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 1
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.059716156278724446,
104
+ "full_absorption_rate": 0.0024271844660194173,
105
+ "num_full_absorption": 1,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.030590905131696624,
112
+ "full_absorption_rate": 0.005925925925925926,
113
+ "num_full_absorption": 4,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.291295481380707,
120
+ "full_absorption_rate": 0.0702656383890317,
121
+ "num_full_absorption": 82,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.41191895007535845,
128
+ "full_absorption_rate": 0.09115870400878638,
129
+ "num_full_absorption": 166,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 4
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.15726987522678162,
136
+ "full_absorption_rate": 0.02392947103274559,
137
+ "num_full_absorption": 19,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.4208217252218873,
144
+ "full_absorption_rate": 0.14245548266166824,
145
+ "num_full_absorption": 152,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.6289941240335383,
152
+ "full_absorption_rate": 0.27256792287467135,
153
+ "num_full_absorption": 622,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.071798199256888,
160
+ "full_absorption_rate": 0.010526315789473684,
161
+ "num_full_absorption": 2,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.19783672547023598,
168
+ "full_absorption_rate": 0.021164021164021163,
169
+ "num_full_absorption": 36,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 2
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.6855947838560892,
176
+ "full_absorption_rate": 0.40969006056287854,
177
+ "num_full_absorption": 1150,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 2
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.28160576517739044,
184
+ "full_absorption_rate": 0.061946902654867256,
185
+ "num_full_absorption": 105,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.3022976453913539,
192
+ "full_absorption_rate": 0.1629139072847682,
193
+ "num_full_absorption": 123,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 1
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.06957494713769145,
200
+ "full_absorption_rate": 0.022813688212927757,
201
+ "num_full_absorption": 18,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.13656791006773783,
208
+ "full_absorption_rate": 0.009641873278236915,
209
+ "num_full_absorption": 7,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.1448744395789566,
216
+ "full_absorption_rate": 0.008849557522123894,
217
+ "num_full_absorption": 1,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.01812094705888303,
224
+ "full_absorption_rate": 0.0,
225
+ "num_full_absorption": 0,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.020204619311739412,
232
+ "full_absorption_rate": 0.0,
233
+ "num_full_absorption": 0,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_3",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 16384,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "standard_april_update",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "825c5398-a5f2-4bdc-aed3-be71f8948c48",
17
+ "datetime_epoch_millis": 1740071810704,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.31437811121228376,
21
+ "mean_full_absorption_score": 0.14400194476050063,
22
+ "mean_num_split_features": 1.3076923076923077,
23
+ "std_dev_absorption_fraction_score": 0.21692006101845815,
24
+ "std_dev_full_absorption_score": 0.14764320616941257,
25
+ "std_dev_num_split_features": 0.7358929688062399
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.6700946207748417,
32
+ "full_absorption_rate": 0.27910685805422647,
33
+ "num_full_absorption": 700,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 1
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.24812878681697556,
40
+ "full_absorption_rate": 0.05966277561608301,
41
+ "num_full_absorption": 92,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 2
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.6216311352605427,
48
+ "full_absorption_rate": 0.3479500891265597,
49
+ "num_full_absorption": 976,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 4
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.48471932315961974,
56
+ "full_absorption_rate": 0.2246987951807229,
57
+ "num_full_absorption": 373,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 1
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.5598971921091018,
64
+ "full_absorption_rate": 0.3100247524752475,
65
+ "num_full_absorption": 501,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 1
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.37532803679140847,
72
+ "full_absorption_rate": 0.11550888529886914,
73
+ "num_full_absorption": 143,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.238732932480143,
80
+ "full_absorption_rate": 0.08122270742358079,
81
+ "num_full_absorption": 93,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 1
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.14743501683060636,
88
+ "full_absorption_rate": 0.02318840579710145,
89
+ "num_full_absorption": 24,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.35531076845345727,
96
+ "full_absorption_rate": 0.14224664224664224,
97
+ "num_full_absorption": 233,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 2
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.06722788757390594,
104
+ "full_absorption_rate": 0.0048543689320388345,
105
+ "num_full_absorption": 2,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.025713322600569993,
112
+ "full_absorption_rate": 0.0044444444444444444,
113
+ "num_full_absorption": 3,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.2608917646713939,
120
+ "full_absorption_rate": 0.10025706940874037,
121
+ "num_full_absorption": 117,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.5039165322808883,
128
+ "full_absorption_rate": 0.2311916529379462,
129
+ "num_full_absorption": 421,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 1
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.16836587854963553,
136
+ "full_absorption_rate": 0.031486146095717885,
137
+ "num_full_absorption": 25,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.26251439529076037,
144
+ "full_absorption_rate": 0.08809746954076851,
145
+ "num_full_absorption": 94,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.7250538832113292,
152
+ "full_absorption_rate": 0.42375109553023665,
153
+ "num_full_absorption": 967,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.08274907313028433,
160
+ "full_absorption_rate": 0.005263157894736842,
161
+ "num_full_absorption": 1,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.35246887286225786,
168
+ "full_absorption_rate": 0.0993533215755438,
169
+ "num_full_absorption": 169,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 2
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.6186526433047977,
176
+ "full_absorption_rate": 0.4324902030637692,
177
+ "num_full_absorption": 1214,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 3
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.35549654950125187,
184
+ "full_absorption_rate": 0.13274336283185842,
185
+ "num_full_absorption": 225,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.548820273967667,
192
+ "full_absorption_rate": 0.4728476821192053,
193
+ "num_full_absorption": 357,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 1
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.09142207483594575,
200
+ "full_absorption_rate": 0.032953105196451206,
201
+ "num_full_absorption": 26,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.17764068045934867,
208
+ "full_absorption_rate": 0.03581267217630854,
209
+ "num_full_absorption": 26,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.12268279551514197,
216
+ "full_absorption_rate": 0.02654867256637168,
217
+ "num_full_absorption": 3,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.08832088254864336,
224
+ "full_absorption_rate": 0.03409090909090909,
225
+ "num_full_absorption": 6,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.020615568538858722,
232
+ "full_absorption_rate": 0.00425531914893617,
233
+ "num_full_absorption": 1,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_4",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 16384,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "standard_april_update",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "1a765d01-f368-4f32-ac3f-112701f7a594",
17
+ "datetime_epoch_millis": 1740071130337,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.3576698915735376,
21
+ "mean_full_absorption_score": 0.19249329486644487,
22
+ "mean_num_split_features": 1.6153846153846154,
23
+ "std_dev_absorption_fraction_score": 0.2065944618933617,
24
+ "std_dev_full_absorption_score": 0.1505229817991518,
25
+ "std_dev_num_split_features": 1.061203960675725
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.6340099406185813,
32
+ "full_absorption_rate": 0.3141945773524721,
33
+ "num_full_absorption": 788,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 2
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.3637546041985076,
40
+ "full_absorption_rate": 0.17380025940337224,
41
+ "num_full_absorption": 268,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 2
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.5699543239021706,
48
+ "full_absorption_rate": 0.33475935828877007,
49
+ "num_full_absorption": 939,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 5
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.42378677187369035,
56
+ "full_absorption_rate": 0.23132530120481928,
57
+ "num_full_absorption": 384,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 2
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.37917700106716457,
64
+ "full_absorption_rate": 0.1806930693069307,
65
+ "num_full_absorption": 292,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 3
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.2694528890929361,
72
+ "full_absorption_rate": 0.09773828756058159,
73
+ "num_full_absorption": 121,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.33762684174409513,
80
+ "full_absorption_rate": 0.2034934497816594,
81
+ "num_full_absorption": 233,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 1
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.21553467858627987,
88
+ "full_absorption_rate": 0.05990338164251208,
89
+ "num_full_absorption": 62,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.3133461994941835,
96
+ "full_absorption_rate": 0.1221001221001221,
97
+ "num_full_absorption": 200,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 2
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.09022134602155767,
104
+ "full_absorption_rate": 0.014563106796116505,
105
+ "num_full_absorption": 6,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.05866300627458284,
112
+ "full_absorption_rate": 0.008888888888888889,
113
+ "num_full_absorption": 6,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.4974789370506606,
120
+ "full_absorption_rate": 0.3273350471293916,
121
+ "num_full_absorption": 382,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.5123970181859486,
128
+ "full_absorption_rate": 0.2904997254255903,
129
+ "num_full_absorption": 529,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 3
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.26753510683966114,
136
+ "full_absorption_rate": 0.0982367758186398,
137
+ "num_full_absorption": 78,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.28532511297821034,
144
+ "full_absorption_rate": 0.1246485473289597,
145
+ "num_full_absorption": 133,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.7526527841045251,
152
+ "full_absorption_rate": 0.46757230499561786,
153
+ "num_full_absorption": 1067,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 2
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.2352011783959387,
160
+ "full_absorption_rate": 0.06842105263157895,
161
+ "num_full_absorption": 13,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.5935578512060531,
168
+ "full_absorption_rate": 0.38741916519694297,
169
+ "num_full_absorption": 659,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 1
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.6336584581511494,
176
+ "full_absorption_rate": 0.4720342002137513,
177
+ "num_full_absorption": 1325,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 4
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.6346142754558641,
184
+ "full_absorption_rate": 0.35988200589970504,
185
+ "num_full_absorption": 610,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.5560926658622606,
192
+ "full_absorption_rate": 0.41324503311258276,
193
+ "num_full_absorption": 312,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 1
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.13204581985673397,
200
+ "full_absorption_rate": 0.03929024081115336,
201
+ "num_full_absorption": 31,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.2592258683246165,
208
+ "full_absorption_rate": 0.12258953168044077,
209
+ "num_full_absorption": 89,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.14300134539704432,
216
+ "full_absorption_rate": 0.035398230088495575,
217
+ "num_full_absorption": 4,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.10338290562531721,
224
+ "full_absorption_rate": 0.03977272727272727,
225
+ "num_full_absorption": 7,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.03772025060424493,
232
+ "full_absorption_rate": 0.01702127659574468,
233
+ "num_full_absorption": 4,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_5",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 16384,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "standard_april_update",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "ac7d6a5d-0d00-4a5b-abd1-3aeae988bb97",
17
+ "datetime_epoch_millis": 1740106423402,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.3354590245907481,
21
+ "mean_full_absorption_score": 0.031849435853675334,
22
+ "mean_num_split_features": 1.0,
23
+ "std_dev_absorption_fraction_score": 0.1793908795821037,
24
+ "std_dev_full_absorption_score": 0.07822514491503704,
25
+ "std_dev_num_split_features": 0.0
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.6705902831278018,
32
+ "full_absorption_rate": 0.04824561403508772,
33
+ "num_full_absorption": 121,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 1
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.3728156359136282,
40
+ "full_absorption_rate": 0.00648508430609598,
41
+ "num_full_absorption": 10,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.5348743583874349,
48
+ "full_absorption_rate": 0.020320855614973262,
49
+ "num_full_absorption": 57,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 1
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.39798907235407177,
56
+ "full_absorption_rate": 0.02469879518072289,
57
+ "num_full_absorption": 41,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 1
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.533886743466549,
64
+ "full_absorption_rate": 0.01485148514851485,
65
+ "num_full_absorption": 24,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 1
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.27790151084705284,
72
+ "full_absorption_rate": 0.0024232633279483036,
73
+ "num_full_absorption": 3,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.27757546015014967,
80
+ "full_absorption_rate": 0.03668122270742358,
81
+ "num_full_absorption": 42,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 1
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.2768691562378332,
88
+ "full_absorption_rate": 0.007729468599033816,
89
+ "num_full_absorption": 8,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.82576509541867,
96
+ "full_absorption_rate": 0.4084249084249084,
97
+ "num_full_absorption": 669,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 1
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.17559629722958997,
104
+ "full_absorption_rate": 0.012135922330097087,
105
+ "num_full_absorption": 5,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.09880944880998055,
112
+ "full_absorption_rate": 0.005925925925925926,
113
+ "num_full_absorption": 4,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.2382625256082734,
120
+ "full_absorption_rate": 0.005998286203941731,
121
+ "num_full_absorption": 7,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.31249057733024366,
128
+ "full_absorption_rate": 0.007138934651290499,
129
+ "num_full_absorption": 13,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 1
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.28150837185990335,
136
+ "full_absorption_rate": 0.012594458438287154,
137
+ "num_full_absorption": 10,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.480505577799689,
144
+ "full_absorption_rate": 0.05248359887535145,
145
+ "num_full_absorption": 56,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.47784453048938713,
152
+ "full_absorption_rate": 0.018843120070113933,
153
+ "num_full_absorption": 43,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.14627500048163214,
160
+ "full_absorption_rate": 0.0,
161
+ "num_full_absorption": 0,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.42047647042346825,
168
+ "full_absorption_rate": 0.02821869488536155,
169
+ "num_full_absorption": 48,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 1
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.46815558803513757,
176
+ "full_absorption_rate": 0.032418952618453865,
177
+ "num_full_absorption": 91,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 1
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.31041426393272636,
184
+ "full_absorption_rate": 0.008259587020648967,
185
+ "num_full_absorption": 14,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.3376080066325288,
192
+ "full_absorption_rate": 0.04105960264900662,
193
+ "num_full_absorption": 31,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 1
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.20292887001215182,
200
+ "full_absorption_rate": 0.0063371356147021544,
201
+ "num_full_absorption": 5,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.17713092282140977,
208
+ "full_absorption_rate": 0.005509641873278237,
209
+ "num_full_absorption": 4,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.15669316359265703,
216
+ "full_absorption_rate": 0.0,
217
+ "num_full_absorption": 0,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.183909327242874,
224
+ "full_absorption_rate": 0.017045454545454544,
225
+ "num_full_absorption": 3,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.08505838115460661,
232
+ "full_absorption_rate": 0.00425531914893617,
233
+ "num_full_absorption": 1,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_0",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 65536,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "standard_april_update",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "44014436-2277-4932-8035-15dfefe49292",
17
+ "datetime_epoch_millis": 1740104119403,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.347605051511975,
21
+ "mean_full_absorption_score": 0.03857317859306944,
22
+ "mean_num_split_features": 1.0384615384615385,
23
+ "std_dev_absorption_fraction_score": 0.17133887402196588,
24
+ "std_dev_full_absorption_score": 0.046258397055203106,
25
+ "std_dev_num_split_features": 0.19611613513818404
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.7399088796725409,
32
+ "full_absorption_rate": 0.12081339712918661,
33
+ "num_full_absorption": 303,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 1
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.41786808469850445,
40
+ "full_absorption_rate": 0.03631647211413749,
41
+ "num_full_absorption": 56,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.5593407106875781,
48
+ "full_absorption_rate": 0.037076648841354726,
49
+ "num_full_absorption": 104,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 1
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.45953918169479785,
56
+ "full_absorption_rate": 0.04216867469879518,
57
+ "num_full_absorption": 70,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 1
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.5119789949378576,
64
+ "full_absorption_rate": 0.0297029702970297,
65
+ "num_full_absorption": 48,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 1
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.37566264645277836,
72
+ "full_absorption_rate": 0.016962843295638127,
73
+ "num_full_absorption": 21,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.22625273738522275,
80
+ "full_absorption_rate": 0.023580786026200874,
81
+ "num_full_absorption": 27,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 1
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.37416087626156275,
88
+ "full_absorption_rate": 0.021256038647342997,
89
+ "num_full_absorption": 22,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.6517323393277149,
96
+ "full_absorption_rate": 0.22466422466422467,
97
+ "num_full_absorption": 368,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 1
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.15898688303273234,
104
+ "full_absorption_rate": 0.007281553398058253,
105
+ "num_full_absorption": 3,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.05484682779040442,
112
+ "full_absorption_rate": 0.0044444444444444444,
113
+ "num_full_absorption": 3,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.35469782028287083,
120
+ "full_absorption_rate": 0.027420736932305057,
121
+ "num_full_absorption": 32,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.28887345983707763,
128
+ "full_absorption_rate": 0.0060406370126304225,
129
+ "num_full_absorption": 11,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 1
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.30536576036448354,
136
+ "full_absorption_rate": 0.02518891687657431,
137
+ "num_full_absorption": 20,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.5108695858931614,
144
+ "full_absorption_rate": 0.08528584817244611,
145
+ "num_full_absorption": 91,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.41906548997444415,
152
+ "full_absorption_rate": 0.007887817703768623,
153
+ "num_full_absorption": 18,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.18782440951973378,
160
+ "full_absorption_rate": 0.02631578947368421,
161
+ "num_full_absorption": 5,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.4157013866326478,
168
+ "full_absorption_rate": 0.05584950029394474,
169
+ "num_full_absorption": 95,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 2
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.46749630339725584,
176
+ "full_absorption_rate": 0.0441752760954756,
177
+ "num_full_absorption": 124,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 1
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.39811444693801784,
184
+ "full_absorption_rate": 0.024778761061946902,
185
+ "num_full_absorption": 42,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.3173131816990261,
192
+ "full_absorption_rate": 0.05033112582781457,
193
+ "num_full_absorption": 38,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 1
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.1644279320997771,
200
+ "full_absorption_rate": 0.017743979721166033,
201
+ "num_full_absorption": 14,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.2168683380240976,
208
+ "full_absorption_rate": 0.01790633608815427,
209
+ "num_full_absorption": 13,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.1809079054261136,
216
+ "full_absorption_rate": 0.0,
217
+ "num_full_absorption": 0,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.21409290660015243,
224
+ "full_absorption_rate": 0.045454545454545456,
225
+ "num_full_absorption": 8,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.0658342506807965,
232
+ "full_absorption_rate": 0.00425531914893617,
233
+ "num_full_absorption": 1,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_1",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 65536,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "standard_april_update",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "a25add70-d8fc-4e77-a5ac-f9b20ed47a90",
17
+ "datetime_epoch_millis": 1740107190845,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.35606277259778024,
21
+ "mean_full_absorption_score": 0.058416307732367695,
22
+ "mean_num_split_features": 1.1538461538461537,
23
+ "std_dev_absorption_fraction_score": 0.20313388269593746,
24
+ "std_dev_full_absorption_score": 0.06309863260264571,
25
+ "std_dev_num_split_features": 0.36794648440311994
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.6914944003282536,
32
+ "full_absorption_rate": 0.09928229665071771,
33
+ "num_full_absorption": 249,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 1
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.5346865856027213,
40
+ "full_absorption_rate": 0.12062256809338522,
41
+ "num_full_absorption": 186,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.6022704850600882,
48
+ "full_absorption_rate": 0.10338680926916222,
49
+ "num_full_absorption": 290,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 1
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.4691487366121355,
56
+ "full_absorption_rate": 0.05783132530120482,
57
+ "num_full_absorption": 96,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 1
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.5822165186131862,
64
+ "full_absorption_rate": 0.09777227722772278,
65
+ "num_full_absorption": 158,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 1
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.34109857549209605,
72
+ "full_absorption_rate": 0.027463651050080775,
73
+ "num_full_absorption": 34,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.2975657369760735,
80
+ "full_absorption_rate": 0.0462882096069869,
81
+ "num_full_absorption": 53,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 1
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.35129528088332335,
88
+ "full_absorption_rate": 0.035748792270531404,
89
+ "num_full_absorption": 37,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.6614939578585769,
96
+ "full_absorption_rate": 0.2869352869352869,
97
+ "num_full_absorption": 470,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 1
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.1501980866745374,
104
+ "full_absorption_rate": 0.007281553398058253,
105
+ "num_full_absorption": 3,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.04632368976363547,
112
+ "full_absorption_rate": 0.0044444444444444444,
113
+ "num_full_absorption": 3,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.3433001642791041,
120
+ "full_absorption_rate": 0.02570694087403599,
121
+ "num_full_absorption": 30,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 2
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.3545827375756091,
128
+ "full_absorption_rate": 0.022515101592531575,
129
+ "num_full_absorption": 41,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 1
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.28102067711277673,
136
+ "full_absorption_rate": 0.022670025188916875,
137
+ "num_full_absorption": 18,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.3869171083857235,
144
+ "full_absorption_rate": 0.05435801312089972,
145
+ "num_full_absorption": 58,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.6588443437832601,
152
+ "full_absorption_rate": 0.15030674846625766,
153
+ "num_full_absorption": 343,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.1851751902884397,
160
+ "full_absorption_rate": 0.02631578947368421,
161
+ "num_full_absorption": 5,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.5037574452444985,
168
+ "full_absorption_rate": 0.08465608465608465,
169
+ "num_full_absorption": 144,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 2
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.5589132911026052,
176
+ "full_absorption_rate": 0.11400071250445315,
177
+ "num_full_absorption": 320,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 1
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.4614050815777875,
184
+ "full_absorption_rate": 0.06371681415929203,
185
+ "num_full_absorption": 108,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.12407794887818784,
192
+ "full_absorption_rate": 0.006622516556291391,
193
+ "num_full_absorption": 5,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 2
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.16250522051692146,
200
+ "full_absorption_rate": 0.016476552598225603,
201
+ "num_full_absorption": 13,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.25975629765923597,
208
+ "full_absorption_rate": 0.03305785123966942,
209
+ "num_full_absorption": 24,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.10259413193221943,
216
+ "full_absorption_rate": 0.0,
217
+ "num_full_absorption": 0,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.09755416501625133,
224
+ "full_absorption_rate": 0.011363636363636364,
225
+ "num_full_absorption": 2,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 2
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.04943623032503822,
232
+ "full_absorption_rate": 0.0,
233
+ "num_full_absorption": 0,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_2",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 65536,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "standard_april_update",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "67975d88-3e1e-45d1-9c8f-dfb138e9f448",
17
+ "datetime_epoch_millis": 1740107954355,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.44380362556359265,
21
+ "mean_full_absorption_score": 0.16020534428445912,
22
+ "mean_num_split_features": 1.0769230769230769,
23
+ "std_dev_absorption_fraction_score": 0.19712103165471032,
24
+ "std_dev_full_absorption_score": 0.0950978390878325,
25
+ "std_dev_num_split_features": 0.271746488194703
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.6839817030406431,
32
+ "full_absorption_rate": 0.19577352472089316,
33
+ "num_full_absorption": 491,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 1
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.6290547365203768,
40
+ "full_absorption_rate": 0.2821011673151751,
41
+ "num_full_absorption": 435,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.6674229089281046,
48
+ "full_absorption_rate": 0.2103386809269162,
49
+ "num_full_absorption": 590,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 1
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.5200649016554985,
56
+ "full_absorption_rate": 0.12048192771084337,
57
+ "num_full_absorption": 200,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 1
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.5727495906700607,
64
+ "full_absorption_rate": 0.20606435643564355,
65
+ "num_full_absorption": 333,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 1
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.5293144892122851,
72
+ "full_absorption_rate": 0.15751211631663975,
73
+ "num_full_absorption": 195,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.4212924218838792,
80
+ "full_absorption_rate": 0.15021834061135372,
81
+ "num_full_absorption": 172,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 1
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.5288618679423563,
88
+ "full_absorption_rate": 0.1468599033816425,
89
+ "num_full_absorption": 152,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.48231909319996086,
96
+ "full_absorption_rate": 0.26495726495726496,
97
+ "num_full_absorption": 434,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 2
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.31945510015807715,
104
+ "full_absorption_rate": 0.09951456310679611,
105
+ "num_full_absorption": 41,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.10917495607823761,
112
+ "full_absorption_rate": 0.014814814814814815,
113
+ "num_full_absorption": 10,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.5170836823568926,
120
+ "full_absorption_rate": 0.1868037703513282,
121
+ "num_full_absorption": 218,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 2
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.47433744153500806,
128
+ "full_absorption_rate": 0.10049423393739704,
129
+ "num_full_absorption": 183,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 1
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.456688962722974,
136
+ "full_absorption_rate": 0.13350125944584382,
137
+ "num_full_absorption": 106,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.429842671281088,
144
+ "full_absorption_rate": 0.14620431115276475,
145
+ "num_full_absorption": 156,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.7336453968814676,
152
+ "full_absorption_rate": 0.3347940403155127,
153
+ "num_full_absorption": 764,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.21841852715273,
160
+ "full_absorption_rate": 0.05789473684210526,
161
+ "num_full_absorption": 11,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.664077195798576,
168
+ "full_absorption_rate": 0.3550852439741329,
169
+ "num_full_absorption": 604,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 1
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.6076130947697026,
176
+ "full_absorption_rate": 0.23619522622016387,
177
+ "num_full_absorption": 663,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 1
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.6065091469749866,
184
+ "full_absorption_rate": 0.19469026548672566,
185
+ "num_full_absorption": 330,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.3811763776255798,
192
+ "full_absorption_rate": 0.25298013245033113,
193
+ "num_full_absorption": 191,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 1
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.2316869655239408,
200
+ "full_absorption_rate": 0.06337135614702155,
201
+ "num_full_absorption": 50,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.4702754754474782,
208
+ "full_absorption_rate": 0.1887052341597796,
209
+ "num_full_absorption": 137,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.13971775461100902,
216
+ "full_absorption_rate": 0.017699115044247787,
217
+ "num_full_absorption": 2,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.09684929978197893,
224
+ "full_absorption_rate": 0.03977272727272727,
225
+ "num_full_absorption": 7,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.04728050290051698,
232
+ "full_absorption_rate": 0.00851063829787234,
233
+ "num_full_absorption": 2,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_3",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 65536,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "standard_april_update",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "c676f0a0-53b5-4849-9ef8-53b1afa1bb75",
17
+ "datetime_epoch_millis": 1740105664938,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.49673005917063284,
21
+ "mean_full_absorption_score": 0.26388583248007075,
22
+ "mean_num_split_features": 1.2692307692307692,
23
+ "std_dev_absorption_fraction_score": 0.2113161375367725,
24
+ "std_dev_full_absorption_score": 0.14871901551206065,
25
+ "std_dev_num_split_features": 0.7243033788512826
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.7138046192508011,
32
+ "full_absorption_rate": 0.310207336523126,
33
+ "num_full_absorption": 778,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 1
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.6674758017576328,
40
+ "full_absorption_rate": 0.4280155642023346,
41
+ "num_full_absorption": 660,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.7629196965454134,
48
+ "full_absorption_rate": 0.46096256684491976,
49
+ "num_full_absorption": 1293,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 1
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.5509837850383651,
56
+ "full_absorption_rate": 0.26385542168674697,
57
+ "num_full_absorption": 438,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 1
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.6106759626477273,
64
+ "full_absorption_rate": 0.3193069306930693,
65
+ "num_full_absorption": 516,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 1
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.6850841851784408,
72
+ "full_absorption_rate": 0.4079159935379645,
73
+ "num_full_absorption": 505,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.5521196093178781,
80
+ "full_absorption_rate": 0.2925764192139738,
81
+ "num_full_absorption": 335,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 1
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.5727047223894772,
88
+ "full_absorption_rate": 0.22995169082125605,
89
+ "num_full_absorption": 238,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.5935284275560726,
96
+ "full_absorption_rate": 0.37484737484737485,
97
+ "num_full_absorption": 614,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 2
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.36914701836970715,
104
+ "full_absorption_rate": 0.18446601941747573,
105
+ "num_full_absorption": 76,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.14630322313929153,
112
+ "full_absorption_rate": 0.028148148148148148,
113
+ "num_full_absorption": 19,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.5786165566811846,
120
+ "full_absorption_rate": 0.31362467866323906,
121
+ "num_full_absorption": 366,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.6447573755425703,
128
+ "full_absorption_rate": 0.28610653487095,
129
+ "num_full_absorption": 521,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 1
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.4502983371728396,
136
+ "full_absorption_rate": 0.16624685138539042,
137
+ "num_full_absorption": 132,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.4333315336903838,
144
+ "full_absorption_rate": 0.1583880037488285,
145
+ "num_full_absorption": 169,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.76016403003186,
152
+ "full_absorption_rate": 0.45267309377738824,
153
+ "num_full_absorption": 1033,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 2
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.4128471718435671,
160
+ "full_absorption_rate": 0.19473684210526315,
161
+ "num_full_absorption": 37,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.6314906541291778,
168
+ "full_absorption_rate": 0.3985890652557319,
169
+ "num_full_absorption": 678,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 3
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.7506050472722071,
176
+ "full_absorption_rate": 0.6034912718204489,
177
+ "num_full_absorption": 1694,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 4
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.6150172352711047,
184
+ "full_absorption_rate": 0.2902654867256637,
185
+ "num_full_absorption": 492,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.36322090311483735,
192
+ "full_absorption_rate": 0.24370860927152319,
193
+ "num_full_absorption": 184,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 1
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.40699263233588356,
200
+ "full_absorption_rate": 0.1761723700887199,
201
+ "num_full_absorption": 139,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.39281073164811625,
208
+ "full_absorption_rate": 0.209366391184573,
209
+ "num_full_absorption": 152,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.1217508800357222,
216
+ "full_absorption_rate": 0.017699115044247787,
217
+ "num_full_absorption": 2,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.0886418130654087,
224
+ "full_absorption_rate": 0.045454545454545456,
225
+ "num_full_absorption": 8,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.03968958541078437,
232
+ "full_absorption_rate": 0.00425531914893617,
233
+ "num_full_absorption": 1,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_4",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 65536,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "standard_april_update",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "7a23eda6-43f2-42d3-b41f-82aa14ab0bcd",
17
+ "datetime_epoch_millis": 1740104875458,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.5282848269512149,
21
+ "mean_full_absorption_score": 0.35722963768366117,
22
+ "mean_num_split_features": 1.9230769230769231,
23
+ "std_dev_absorption_fraction_score": 0.20756776737189886,
24
+ "std_dev_full_absorption_score": 0.17409113695625567,
25
+ "std_dev_num_split_features": 1.3834182859302366
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.7070839083555905,
32
+ "full_absorption_rate": 0.36762360446570974,
33
+ "num_full_absorption": 922,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 1
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.6648609736653764,
40
+ "full_absorption_rate": 0.4961089494163424,
41
+ "num_full_absorption": 765,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 4
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.8005478702573291,
48
+ "full_absorption_rate": 0.6081996434937611,
49
+ "num_full_absorption": 1706,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 1
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.6749493296742596,
56
+ "full_absorption_rate": 0.45542168674698796,
57
+ "num_full_absorption": 756,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 3
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.6332021290003637,
64
+ "full_absorption_rate": 0.4350247524752475,
65
+ "num_full_absorption": 703,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 2
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.6800208255796995,
72
+ "full_absorption_rate": 0.4806138933764136,
73
+ "num_full_absorption": 595,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.6579759079289633,
80
+ "full_absorption_rate": 0.5414847161572053,
81
+ "num_full_absorption": 620,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 2
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.6481991163632241,
88
+ "full_absorption_rate": 0.41642512077294686,
89
+ "num_full_absorption": 431,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.5566601715573842,
96
+ "full_absorption_rate": 0.3882783882783883,
97
+ "num_full_absorption": 636,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 1
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.41230771985239695,
104
+ "full_absorption_rate": 0.27184466019417475,
105
+ "num_full_absorption": 112,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.22952222086566945,
112
+ "full_absorption_rate": 0.09481481481481481,
113
+ "num_full_absorption": 64,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.5737482843930954,
120
+ "full_absorption_rate": 0.4087403598971722,
121
+ "num_full_absorption": 477,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 2
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.6814245446786736,
128
+ "full_absorption_rate": 0.500274574409665,
129
+ "num_full_absorption": 911,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 3
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.5440130265045751,
136
+ "full_absorption_rate": 0.3136020151133501,
137
+ "num_full_absorption": 249,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.4040905603924752,
144
+ "full_absorption_rate": 0.19400187441424555,
145
+ "num_full_absorption": 207,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.7797710366668161,
152
+ "full_absorption_rate": 0.5473269062226117,
153
+ "num_full_absorption": 1249,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 3
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.3222575918031264,
160
+ "full_absorption_rate": 0.16842105263157894,
161
+ "num_full_absorption": 32,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 2
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.6852024934359321,
168
+ "full_absorption_rate": 0.5038212815990594,
169
+ "num_full_absorption": 857,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 3
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.6981286454286791,
176
+ "full_absorption_rate": 0.5575347345920912,
177
+ "num_full_absorption": 1565,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 7
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.7110423532653868,
184
+ "full_absorption_rate": 0.5079646017699115,
185
+ "num_full_absorption": 861,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 3
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.3995663241148787,
192
+ "full_absorption_rate": 0.31920529801324504,
193
+ "num_full_absorption": 241,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 2
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.3717835736899836,
200
+ "full_absorption_rate": 0.20785804816223066,
201
+ "num_full_absorption": 164,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.5231441138039113,
208
+ "full_absorption_rate": 0.3760330578512397,
209
+ "num_full_absorption": 273,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.11941588125068914,
216
+ "full_absorption_rate": 0.02654867256637168,
217
+ "num_full_absorption": 3,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.13165534351732078,
224
+ "full_absorption_rate": 0.0625,
225
+ "num_full_absorption": 11,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.12483155468578631,
232
+ "full_absorption_rate": 0.03829787234042553,
233
+ "num_full_absorption": 9,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_5",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 65536,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "standard_april_update",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "f9074aa1-0b4f-4b68-9e3c-3bfabf71bc13",
17
+ "datetime_epoch_millis": 1740076749888,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.37747633872113084,
21
+ "mean_full_absorption_score": 0.4139575465645789,
22
+ "mean_num_split_features": 3.269230769230769,
23
+ "std_dev_absorption_fraction_score": 0.19789018654007454,
24
+ "std_dev_full_absorption_score": 0.18954624360986377,
25
+ "std_dev_num_split_features": 2.42582262018792
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.5971456051033638,
32
+ "full_absorption_rate": 0.4800637958532695,
33
+ "num_full_absorption": 1204,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 8
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.3813263071869703,
40
+ "full_absorption_rate": 0.4701686121919585,
41
+ "num_full_absorption": 725,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 6
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.7258237506928806,
48
+ "full_absorption_rate": 0.718716577540107,
49
+ "num_full_absorption": 2016,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 5
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.48482520471847396,
56
+ "full_absorption_rate": 0.5078313253012048,
57
+ "num_full_absorption": 843,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 4
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.5224461002137466,
64
+ "full_absorption_rate": 0.47339108910891087,
65
+ "num_full_absorption": 765,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 3
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.5491052342123783,
72
+ "full_absorption_rate": 0.5823909531502424,
73
+ "num_full_absorption": 721,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 3
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.27565515017431547,
80
+ "full_absorption_rate": 0.3467248908296943,
81
+ "num_full_absorption": 397,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 3
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.3776238034975393,
88
+ "full_absorption_rate": 0.4,
89
+ "num_full_absorption": 414,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 2
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.4793555693867835,
96
+ "full_absorption_rate": 0.49633699633699635,
97
+ "num_full_absorption": 813,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 6
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.12459906400823838,
104
+ "full_absorption_rate": 0.1553398058252427,
105
+ "num_full_absorption": 64,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.07497296105837896,
112
+ "full_absorption_rate": 0.1037037037037037,
113
+ "num_full_absorption": 70,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.3189773190709259,
120
+ "full_absorption_rate": 0.38046272493573263,
121
+ "num_full_absorption": 444,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 4
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.42581561984285576,
128
+ "full_absorption_rate": 0.4728171334431631,
129
+ "num_full_absorption": 861,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 8
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.45881668609062815,
136
+ "full_absorption_rate": 0.45591939546599497,
137
+ "num_full_absorption": 362,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 3
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.3059020128826346,
144
+ "full_absorption_rate": 0.45454545454545453,
145
+ "num_full_absorption": 485,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.6640428801113221,
152
+ "full_absorption_rate": 0.6305872042068361,
153
+ "num_full_absorption": 1439,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 7
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.23857383539688107,
160
+ "full_absorption_rate": 0.3105263157894737,
161
+ "num_full_absorption": 59,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.4896607321852771,
168
+ "full_absorption_rate": 0.5537918871252204,
169
+ "num_full_absorption": 942,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 4
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.47620660054740455,
176
+ "full_absorption_rate": 0.6294976843605272,
177
+ "num_full_absorption": 1767,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 7
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.629838138645014,
184
+ "full_absorption_rate": 0.6005899705014749,
185
+ "num_full_absorption": 1018,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.5066990836428441,
192
+ "full_absorption_rate": 0.6410596026490066,
193
+ "num_full_absorption": 484,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 2
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.12582986991851342,
200
+ "full_absorption_rate": 0.21166032953105196,
201
+ "num_full_absorption": 167,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.3654424833313806,
208
+ "full_absorption_rate": 0.4228650137741047,
209
+ "num_full_absorption": 307,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.051502797875670484,
216
+ "full_absorption_rate": 0.07079646017699115,
217
+ "num_full_absorption": 8,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.1337897577016078,
224
+ "full_absorption_rate": 0.14204545454545456,
225
+ "num_full_absorption": 25,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.030408239253373628,
232
+ "full_absorption_rate": 0.05106382978723404,
233
+ "num_full_absorption": 12,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_0",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 16384,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "topk",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "8afc9485-7c23-46a6-a571-8dfcd88b74a0",
17
+ "datetime_epoch_millis": 1740074545743,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.31839490776468177,
21
+ "mean_full_absorption_score": 0.3314631805182705,
22
+ "mean_num_split_features": 2.730769230769231,
23
+ "std_dev_absorption_fraction_score": 0.18921047426421872,
24
+ "std_dev_full_absorption_score": 0.18421106793113218,
25
+ "std_dev_num_split_features": 2.0505158825562373
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.6049809639558894,
32
+ "full_absorption_rate": 0.49800637958532695,
33
+ "num_full_absorption": 1249,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 5
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.26007062348535115,
40
+ "full_absorption_rate": 0.3125810635538262,
41
+ "num_full_absorption": 482,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 5
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.5776837402582738,
48
+ "full_absorption_rate": 0.5440285204991088,
49
+ "num_full_absorption": 1526,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 7
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.3839260656787951,
56
+ "full_absorption_rate": 0.3493975903614458,
57
+ "num_full_absorption": 580,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 4
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.5491400236273614,
64
+ "full_absorption_rate": 0.4975247524752475,
65
+ "num_full_absorption": 804,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 3
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.41603058308666035,
72
+ "full_absorption_rate": 0.4087237479806139,
73
+ "num_full_absorption": 506,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 2
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.3562791807345914,
80
+ "full_absorption_rate": 0.3868995633187773,
81
+ "num_full_absorption": 443,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 1
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.22981263470261395,
88
+ "full_absorption_rate": 0.2289855072463768,
89
+ "num_full_absorption": 237,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.3856963045568308,
96
+ "full_absorption_rate": 0.42918192918192916,
97
+ "num_full_absorption": 703,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 5
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.01846723298578015,
104
+ "full_absorption_rate": 0.038834951456310676,
105
+ "num_full_absorption": 16,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.04136762125390637,
112
+ "full_absorption_rate": 0.05037037037037037,
113
+ "num_full_absorption": 34,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.30929793561492136,
120
+ "full_absorption_rate": 0.3401885175664096,
121
+ "num_full_absorption": 397,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 3
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.4736418912102383,
128
+ "full_absorption_rate": 0.5172981878088962,
129
+ "num_full_absorption": 942,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 6
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.28225493966228277,
136
+ "full_absorption_rate": 0.26448362720403024,
137
+ "num_full_absorption": 210,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 2
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.37481096091469307,
144
+ "full_absorption_rate": 0.46485473289597,
145
+ "num_full_absorption": 496,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.6552642696406819,
152
+ "full_absorption_rate": 0.6156879929886064,
153
+ "num_full_absorption": 1405,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 5
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.14073819277923014,
160
+ "full_absorption_rate": 0.17894736842105263,
161
+ "num_full_absorption": 34,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.45016305961443226,
168
+ "full_absorption_rate": 0.47677836566725457,
169
+ "num_full_absorption": 811,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 2
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.45928419865327924,
176
+ "full_absorption_rate": 0.5529034556465978,
177
+ "num_full_absorption": 1552,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 7
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.4171686934889313,
184
+ "full_absorption_rate": 0.34926253687315634,
185
+ "num_full_absorption": 592,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 2
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.3753684140171086,
192
+ "full_absorption_rate": 0.5311258278145695,
193
+ "num_full_absorption": 401,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 2
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.14320502373748673,
200
+ "full_absorption_rate": 0.2002534854245881,
201
+ "num_full_absorption": 158,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.25757599356476546,
208
+ "full_absorption_rate": 0.2699724517906336,
209
+ "num_full_absorption": 196,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.03557787974406646,
216
+ "full_absorption_rate": 0.02654867256637168,
217
+ "num_full_absorption": 3,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.06861018177768921,
224
+ "full_absorption_rate": 0.06818181818181818,
225
+ "num_full_absorption": 12,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.011850993135864673,
232
+ "full_absorption_rate": 0.01702127659574468,
233
+ "num_full_absorption": 4,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_1",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 16384,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "topk",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "7192a0e1-a7da-41bd-a84d-9026148fcf82",
17
+ "datetime_epoch_millis": 1740077437804,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.17942797907165925,
21
+ "mean_full_absorption_score": 0.15978218783987327,
22
+ "mean_num_split_features": 1.5,
23
+ "std_dev_absorption_fraction_score": 0.15226072446759698,
24
+ "std_dev_full_absorption_score": 0.13533222488741237,
25
+ "std_dev_num_split_features": 0.8602325267042626
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.47564895197099144,
32
+ "full_absorption_rate": 0.31259968102073366,
33
+ "num_full_absorption": 784,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 1
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.02365185253295002,
40
+ "full_absorption_rate": 0.02594033722438392,
41
+ "num_full_absorption": 40,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.4598973519414802,
48
+ "full_absorption_rate": 0.37219251336898396,
49
+ "num_full_absorption": 1044,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 3
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.2749116720207556,
56
+ "full_absorption_rate": 0.20662650602409638,
57
+ "num_full_absorption": 343,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 3
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.2707575688664681,
64
+ "full_absorption_rate": 0.2004950495049505,
65
+ "num_full_absorption": 324,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 3
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.1519379314336452,
72
+ "full_absorption_rate": 0.12762520193861066,
73
+ "num_full_absorption": 158,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.08931513277897936,
80
+ "full_absorption_rate": 0.09344978165938865,
81
+ "num_full_absorption": 107,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 1
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.04798358268604414,
88
+ "full_absorption_rate": 0.033816425120772944,
89
+ "num_full_absorption": 35,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.41216370077647757,
96
+ "full_absorption_rate": 0.4297924297924298,
97
+ "num_full_absorption": 704,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 2
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.0,
104
+ "full_absorption_rate": 0.009708737864077669,
105
+ "num_full_absorption": 4,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.010591802895094848,
112
+ "full_absorption_rate": 0.013333333333333334,
113
+ "num_full_absorption": 9,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.18751227466897116,
120
+ "full_absorption_rate": 0.20051413881748073,
121
+ "num_full_absorption": 234,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.10548850032097844,
128
+ "full_absorption_rate": 0.10049423393739704,
129
+ "num_full_absorption": 183,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 1
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.11818217053319924,
136
+ "full_absorption_rate": 0.08564231738035265,
137
+ "num_full_absorption": 68,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.27228559065459534,
144
+ "full_absorption_rate": 0.3064667291471415,
145
+ "num_full_absorption": 327,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.42231676025499576,
152
+ "full_absorption_rate": 0.3299737072743208,
153
+ "num_full_absorption": 753,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 2
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.08127272823941951,
160
+ "full_absorption_rate": 0.06842105263157895,
161
+ "num_full_absorption": 13,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.2961293111494162,
168
+ "full_absorption_rate": 0.2569077013521458,
169
+ "num_full_absorption": 437,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 2
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.3695603008892001,
176
+ "full_absorption_rate": 0.4324902030637692,
177
+ "num_full_absorption": 1214,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 4
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.18786499442114749,
184
+ "full_absorption_rate": 0.11209439528023599,
185
+ "num_full_absorption": 190,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.1637863224612186,
192
+ "full_absorption_rate": 0.21589403973509932,
193
+ "num_full_absorption": 163,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 2
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.02925016974477176,
200
+ "full_absorption_rate": 0.043092522179974654,
201
+ "num_full_absorption": 34,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.1275726157048305,
208
+ "full_absorption_rate": 0.09090909090909091,
209
+ "num_full_absorption": 66,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.028161295653006697,
216
+ "full_absorption_rate": 0.017699115044247787,
217
+ "num_full_absorption": 2,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.048059342038807434,
224
+ "full_absorption_rate": 0.05113636363636364,
225
+ "num_full_absorption": 9,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.010825531225695472,
232
+ "full_absorption_rate": 0.01702127659574468,
233
+ "num_full_absorption": 4,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_2",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 16384,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "topk",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "2ede38b6-345d-47f1-b639-8da6aa368fbe",
17
+ "datetime_epoch_millis": 1740078140625,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.04801302443670268,
21
+ "mean_full_absorption_score": 0.03591255684680933,
22
+ "mean_num_split_features": 1.0384615384615385,
23
+ "std_dev_absorption_fraction_score": 0.07947032446059547,
24
+ "std_dev_full_absorption_score": 0.08062070797631418,
25
+ "std_dev_num_split_features": 0.19611613513818404
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.02934165591566395,
32
+ "full_absorption_rate": 0.009170653907496013,
33
+ "num_full_absorption": 23,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 1
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.0029935259579446072,
40
+ "full_absorption_rate": 0.004539559014267186,
41
+ "num_full_absorption": 7,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.005569506519538388,
48
+ "full_absorption_rate": 0.0017825311942959,
49
+ "num_full_absorption": 5,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 1
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.00922397878267749,
56
+ "full_absorption_rate": 0.005421686746987952,
57
+ "num_full_absorption": 9,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 1
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.10168121881529071,
64
+ "full_absorption_rate": 0.03589108910891089,
65
+ "num_full_absorption": 58,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 1
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.008827253461870332,
72
+ "full_absorption_rate": 0.0032310177705977385,
73
+ "num_full_absorption": 4,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.012313091462540756,
80
+ "full_absorption_rate": 0.00611353711790393,
81
+ "num_full_absorption": 7,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 2
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.012699860300682255,
88
+ "full_absorption_rate": 0.00676328502415459,
89
+ "num_full_absorption": 7,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.09975263121604687,
96
+ "full_absorption_rate": 0.06898656898656899,
97
+ "num_full_absorption": 113,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 1
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.0037571585606403575,
104
+ "full_absorption_rate": 0.009708737864077669,
105
+ "num_full_absorption": 4,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.0051757245171220635,
112
+ "full_absorption_rate": 0.0044444444444444444,
113
+ "num_full_absorption": 3,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.058294956943712124,
120
+ "full_absorption_rate": 0.023993144815766924,
121
+ "num_full_absorption": 28,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.001762598898069777,
128
+ "full_absorption_rate": 0.004942339373970346,
129
+ "num_full_absorption": 9,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 1
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.0417621897858296,
136
+ "full_absorption_rate": 0.006297229219143577,
137
+ "num_full_absorption": 5,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.1499792308079282,
144
+ "full_absorption_rate": 0.16119962511715089,
145
+ "num_full_absorption": 172,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.07528312050104714,
152
+ "full_absorption_rate": 0.01621384750219106,
153
+ "num_full_absorption": 37,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.011651399259752978,
160
+ "full_absorption_rate": 0.021052631578947368,
161
+ "num_full_absorption": 4,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.05169549625768419,
168
+ "full_absorption_rate": 0.026455026455026454,
169
+ "num_full_absorption": 45,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 1
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.018073780196159923,
176
+ "full_absorption_rate": 0.0110438190238689,
177
+ "num_full_absorption": 31,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 1
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.015837549421101646,
184
+ "full_absorption_rate": 0.0029498525073746312,
185
+ "num_full_absorption": 5,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.39172802729272294,
192
+ "full_absorption_rate": 0.3973509933774834,
193
+ "num_full_absorption": 300,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 1
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.0007882202694839606,
200
+ "full_absorption_rate": 0.0025348542458808617,
201
+ "num_full_absorption": 2,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.03700330112932826,
208
+ "full_absorption_rate": 0.02203856749311295,
209
+ "num_full_absorption": 16,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.05503942363659827,
216
+ "full_absorption_rate": 0.017699115044247787,
217
+ "num_full_absorption": 2,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.03850760997536622,
224
+ "full_absorption_rate": 0.05113636363636364,
225
+ "num_full_absorption": 9,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.009596125469466737,
232
+ "full_absorption_rate": 0.01276595744680851,
233
+ "num_full_absorption": 3,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_3",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 16384,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "topk",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "fca0882d-facd-4d4a-829b-88d40a83269a",
17
+ "datetime_epoch_millis": 1740076039093,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.04725406186343194,
21
+ "mean_full_absorption_score": 0.035376663712700394,
22
+ "mean_num_split_features": 1.1538461538461537,
23
+ "std_dev_absorption_fraction_score": 0.12702547722858154,
24
+ "std_dev_full_absorption_score": 0.1273809249096068,
25
+ "std_dev_num_split_features": 0.36794648440311994
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.009415356574908502,
32
+ "full_absorption_rate": 0.001993620414673046,
33
+ "num_full_absorption": 5,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 1
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.0010598118823535877,
40
+ "full_absorption_rate": 0.0006485084306095979,
41
+ "num_full_absorption": 1,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 2.681482172260622e-05,
48
+ "full_absorption_rate": 0.0,
49
+ "num_full_absorption": 0,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 1
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.0,
56
+ "full_absorption_rate": 0.0030120481927710845,
57
+ "num_full_absorption": 5,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 1
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.005805138800008344,
64
+ "full_absorption_rate": 0.0,
65
+ "num_full_absorption": 0,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 1
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.0002805702876438644,
72
+ "full_absorption_rate": 0.0,
73
+ "num_full_absorption": 0,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.006430103531010465,
80
+ "full_absorption_rate": 0.0017467248908296944,
81
+ "num_full_absorption": 2,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 2
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.005265016174444728,
88
+ "full_absorption_rate": 0.001932367149758454,
89
+ "num_full_absorption": 2,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.07734533383832189,
96
+ "full_absorption_rate": 0.014652014652014652,
97
+ "num_full_absorption": 24,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 2
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.00013682976302465064,
104
+ "full_absorption_rate": 0.0024271844660194173,
105
+ "num_full_absorption": 1,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.09316016842349063,
112
+ "full_absorption_rate": 0.013333333333333334,
113
+ "num_full_absorption": 9,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.0027611929011460462,
120
+ "full_absorption_rate": 0.0,
121
+ "num_full_absorption": 0,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.0,
128
+ "full_absorption_rate": 0.007138934651290499,
129
+ "num_full_absorption": 13,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 1
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.0018469457675600843,
136
+ "full_absorption_rate": 0.0,
137
+ "num_full_absorption": 0,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.018485424239876518,
144
+ "full_absorption_rate": 0.0018744142455482662,
145
+ "num_full_absorption": 2,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 2
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.00015561067287864704,
152
+ "full_absorption_rate": 0.0,
153
+ "num_full_absorption": 0,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.014953244171397297,
160
+ "full_absorption_rate": 0.010526315789473684,
161
+ "num_full_absorption": 2,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.0004312995824857611,
168
+ "full_absorption_rate": 0.0,
169
+ "num_full_absorption": 0,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 1
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.00032280375537369337,
176
+ "full_absorption_rate": 0.0003562522265764161,
177
+ "num_full_absorption": 1,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 1
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.008179936000677979,
184
+ "full_absorption_rate": 0.0,
185
+ "num_full_absorption": 0,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.5644845103885536,
192
+ "full_absorption_rate": 0.6251655629139072,
193
+ "num_full_absorption": 472,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 1
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.001315398452670934,
200
+ "full_absorption_rate": 0.0038022813688212928,
201
+ "num_full_absorption": 3,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.0167619192257724,
208
+ "full_absorption_rate": 0.011019283746556474,
209
+ "num_full_absorption": 8,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.01788328707276735,
216
+ "full_absorption_rate": 0.0,
217
+ "num_full_absorption": 0,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 2
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.3549107104349042,
224
+ "full_absorption_rate": 0.2159090909090909,
225
+ "num_full_absorption": 38,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.0271881816862366,
232
+ "full_absorption_rate": 0.00425531914893617,
233
+ "num_full_absorption": 1,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_4",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 16384,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "topk",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "4b1b8fa0-4e18-48e5-8a0b-ce7bb5b4208b",
17
+ "datetime_epoch_millis": 1740075302374,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.10691279716460964,
21
+ "mean_full_absorption_score": 0.04496650031498632,
22
+ "mean_num_split_features": 1.3076923076923077,
23
+ "std_dev_absorption_fraction_score": 0.2646622409455047,
24
+ "std_dev_full_absorption_score": 0.15377861098967333,
25
+ "std_dev_num_split_features": 0.5491251783869153
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.0019515094681566653,
32
+ "full_absorption_rate": 0.0,
33
+ "num_full_absorption": 0,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 2
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.0015650618455625462,
40
+ "full_absorption_rate": 0.0,
41
+ "num_full_absorption": 0,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.0,
48
+ "full_absorption_rate": 0.0,
49
+ "num_full_absorption": 0,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 1
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.0002489021118178152,
56
+ "full_absorption_rate": 0.0,
57
+ "num_full_absorption": 0,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 1
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.0017399766884406232,
64
+ "full_absorption_rate": 0.0,
65
+ "num_full_absorption": 0,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 2
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.002086390044012097,
72
+ "full_absorption_rate": 0.0,
73
+ "num_full_absorption": 0,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.019592569798226284,
80
+ "full_absorption_rate": 0.0,
81
+ "num_full_absorption": 0,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 1
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.010286585437156805,
88
+ "full_absorption_rate": 0.0,
89
+ "num_full_absorption": 0,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.025307686740506496,
96
+ "full_absorption_rate": 0.0,
97
+ "num_full_absorption": 0,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 2
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.001778383083826356,
104
+ "full_absorption_rate": 0.0,
105
+ "num_full_absorption": 0,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 2
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.02774139524886622,
112
+ "full_absorption_rate": 0.0,
113
+ "num_full_absorption": 0,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.02970714069454916,
120
+ "full_absorption_rate": 0.0,
121
+ "num_full_absorption": 0,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.005959470315798746,
128
+ "full_absorption_rate": 0.004393190554640308,
129
+ "num_full_absorption": 8,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 1
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.0036818374231465958,
136
+ "full_absorption_rate": 0.0,
137
+ "num_full_absorption": 0,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.0025381797908818992,
144
+ "full_absorption_rate": 0.0,
145
+ "num_full_absorption": 0,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 2
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.002444093867357725,
152
+ "full_absorption_rate": 0.0,
153
+ "num_full_absorption": 0,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.8720913195010103,
160
+ "full_absorption_rate": 0.5157894736842106,
161
+ "num_full_absorption": 98,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 3
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.0,
168
+ "full_absorption_rate": 0.0,
169
+ "num_full_absorption": 0,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 1
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.0001310025196193003,
176
+ "full_absorption_rate": 0.0,
177
+ "num_full_absorption": 0,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 1
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.004794285383742862,
184
+ "full_absorption_rate": 0.0,
185
+ "num_full_absorption": 0,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.0082685023894774,
192
+ "full_absorption_rate": 0.0,
193
+ "num_full_absorption": 0,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 2
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.008778392470257317,
200
+ "full_absorption_rate": 0.0012674271229404308,
201
+ "num_full_absorption": 1,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.004039336538578031,
208
+ "full_absorption_rate": 0.0,
209
+ "num_full_absorption": 0,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.16255359383165666,
216
+ "full_absorption_rate": 0.0,
217
+ "num_full_absorption": 0,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.8471691439845103,
224
+ "full_absorption_rate": 0.6136363636363636,
225
+ "num_full_absorption": 108,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.7352779671026924,
232
+ "full_absorption_rate": 0.03404255319148936,
233
+ "num_full_absorption": 8,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_5",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 16384,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "topk",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "9c26caea-7ab6-40f7-b9ff-92a9e95bc77b",
17
+ "datetime_epoch_millis": 1740111330754,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.5571745900551895,
21
+ "mean_full_absorption_score": 0.6205963032975313,
22
+ "mean_num_split_features": 3.3461538461538463,
23
+ "std_dev_absorption_fraction_score": 0.21989389200707546,
24
+ "std_dev_full_absorption_score": 0.22468883182683483,
25
+ "std_dev_num_split_features": 2.115510485765697
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.8328389947558087,
32
+ "full_absorption_rate": 0.7372408293460925,
33
+ "num_full_absorption": 1849,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 5
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.5479417963209664,
40
+ "full_absorption_rate": 0.6582360570687419,
41
+ "num_full_absorption": 1015,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 7
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.86249459792848,
48
+ "full_absorption_rate": 0.9094474153297682,
49
+ "num_full_absorption": 2551,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 2
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.7076732984318926,
56
+ "full_absorption_rate": 0.7518072289156627,
57
+ "num_full_absorption": 1248,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 4
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.6826856855395601,
64
+ "full_absorption_rate": 0.7048267326732673,
65
+ "num_full_absorption": 1139,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 4
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.6202403786327095,
72
+ "full_absorption_rate": 0.6672051696284329,
73
+ "num_full_absorption": 826,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 9
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.6125840391486669,
80
+ "full_absorption_rate": 0.7406113537117904,
81
+ "num_full_absorption": 848,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 4
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.5458358390619358,
88
+ "full_absorption_rate": 0.561352657004831,
89
+ "num_full_absorption": 581,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 6
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.6988443248524734,
96
+ "full_absorption_rate": 0.7576312576312576,
97
+ "num_full_absorption": 1241,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 2
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.3962094458509169,
104
+ "full_absorption_rate": 0.4441747572815534,
105
+ "num_full_absorption": 183,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 4
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.24225838471380645,
112
+ "full_absorption_rate": 0.2548148148148148,
113
+ "num_full_absorption": 172,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.4866409619756762,
120
+ "full_absorption_rate": 0.6101113967437874,
121
+ "num_full_absorption": 712,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 4
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.6178430577948258,
128
+ "full_absorption_rate": 0.7040087863811093,
129
+ "num_full_absorption": 1282,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 6
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.6032541710977196,
136
+ "full_absorption_rate": 0.6259445843828715,
137
+ "num_full_absorption": 497,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 3
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.6203477114750064,
144
+ "full_absorption_rate": 0.7806935332708529,
145
+ "num_full_absorption": 833,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 2
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.9127210191414626,
152
+ "full_absorption_rate": 0.9189307624890447,
153
+ "num_full_absorption": 2097,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.4269980273815769,
160
+ "full_absorption_rate": 0.45263157894736844,
161
+ "num_full_absorption": 86,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.6736719884689889,
168
+ "full_absorption_rate": 0.746031746031746,
169
+ "num_full_absorption": 1269,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 4
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.7408915278900652,
176
+ "full_absorption_rate": 0.8589241182757392,
177
+ "num_full_absorption": 2411,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 3
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.7782384749711955,
184
+ "full_absorption_rate": 0.7787610619469026,
185
+ "num_full_absorption": 1320,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 3
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.44644447032672796,
192
+ "full_absorption_rate": 0.7311258278145696,
193
+ "num_full_absorption": 552,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 1
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.4525626767954696,
200
+ "full_absorption_rate": 0.5690747782002535,
201
+ "num_full_absorption": 449,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 5
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.6077924133789215,
208
+ "full_absorption_rate": 0.7107438016528925,
209
+ "num_full_absorption": 516,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 3
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.07823997577754759,
216
+ "full_absorption_rate": 0.07079646017699115,
217
+ "num_full_absorption": 8,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.16327547847243773,
224
+ "full_absorption_rate": 0.2159090909090909,
225
+ "num_full_absorption": 38,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.12801060125008876,
232
+ "full_absorption_rate": 0.17446808510638298,
233
+ "num_full_absorption": 41,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_0",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 65536,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "topk",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "efd023b3-0586-4aba-8ca4-647050643711",
17
+ "datetime_epoch_millis": 1740108755984,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.5428689946157796,
21
+ "mean_full_absorption_score": 0.5340971732661811,
22
+ "mean_num_split_features": 3.5384615384615383,
23
+ "std_dev_absorption_fraction_score": 0.24710052044084302,
24
+ "std_dev_full_absorption_score": 0.24258449258739415,
25
+ "std_dev_num_split_features": 1.964296703265965
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.7381686628198274,
32
+ "full_absorption_rate": 0.32137161084529503,
33
+ "num_full_absorption": 806,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 6
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.5984969751995518,
40
+ "full_absorption_rate": 0.6368352788586251,
41
+ "num_full_absorption": 982,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 7
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.8509632004082076,
48
+ "full_absorption_rate": 0.8281639928698752,
49
+ "num_full_absorption": 2323,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 3
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.7385909936977806,
56
+ "full_absorption_rate": 0.6879518072289157,
57
+ "num_full_absorption": 1142,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 6
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.7064702427486708,
64
+ "full_absorption_rate": 0.713490099009901,
65
+ "num_full_absorption": 1153,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 4
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.8272320803216332,
72
+ "full_absorption_rate": 0.8206785137318255,
73
+ "num_full_absorption": 1016,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 3
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.48885130442705776,
80
+ "full_absorption_rate": 0.5048034934497817,
81
+ "num_full_absorption": 578,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 7
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.5506681226653267,
88
+ "full_absorption_rate": 0.523671497584541,
89
+ "num_full_absorption": 542,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 6
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.5955470934241096,
96
+ "full_absorption_rate": 0.6172161172161172,
97
+ "num_full_absorption": 1011,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 4
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.4661751002159926,
104
+ "full_absorption_rate": 0.47572815533980584,
105
+ "num_full_absorption": 196,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.08811882770229734,
112
+ "full_absorption_rate": 0.08296296296296296,
113
+ "num_full_absorption": 56,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.48978935395668166,
120
+ "full_absorption_rate": 0.5029991431019709,
121
+ "num_full_absorption": 587,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 6
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.7243955590490241,
128
+ "full_absorption_rate": 0.7742998352553542,
129
+ "num_full_absorption": 1410,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 4
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.5111219581814586,
136
+ "full_absorption_rate": 0.4622166246851385,
137
+ "num_full_absorption": 367,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 5
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.5677139221288461,
144
+ "full_absorption_rate": 0.5951265229615745,
145
+ "num_full_absorption": 635,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 3
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.9270114805234171,
152
+ "full_absorption_rate": 0.8943908851884312,
153
+ "num_full_absorption": 2041,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 2
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.3246987177931468,
160
+ "full_absorption_rate": 0.3157894736842105,
161
+ "num_full_absorption": 60,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 2
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.6958312111104011,
168
+ "full_absorption_rate": 0.7184009406231628,
169
+ "num_full_absorption": 1222,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 4
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.7227235278168044,
176
+ "full_absorption_rate": 0.8229426433915212,
177
+ "num_full_absorption": 2310,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 5
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.8121763803507712,
184
+ "full_absorption_rate": 0.7457227138643068,
185
+ "num_full_absorption": 1264,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 3
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.2571658200652339,
192
+ "full_absorption_rate": 0.36821192052980134,
193
+ "num_full_absorption": 278,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 2
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.447052814587088,
200
+ "full_absorption_rate": 0.47021546261089986,
201
+ "num_full_absorption": 371,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 3
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.6457433298421121,
208
+ "full_absorption_rate": 0.6487603305785123,
209
+ "num_full_absorption": 471,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 2
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.17761250387893723,
216
+ "full_absorption_rate": 0.1415929203539823,
217
+ "num_full_absorption": 16,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.12538379998253557,
224
+ "full_absorption_rate": 0.1534090909090909,
225
+ "num_full_absorption": 27,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.03689087711335874,
232
+ "full_absorption_rate": 0.059574468085106386,
233
+ "num_full_absorption": 14,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_1",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 65536,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "topk",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "15798b98-1844-4735-aaa8-f77bf7522cd9",
17
+ "datetime_epoch_millis": 1740112140202,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.352325354017403,
21
+ "mean_full_absorption_score": 0.30593847749055475,
22
+ "mean_num_split_features": 1.2307692307692308,
23
+ "std_dev_absorption_fraction_score": 0.22898626550383655,
24
+ "std_dev_full_absorption_score": 0.22145601085815653,
25
+ "std_dev_num_split_features": 0.4296689244236597
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.6216706060419831,
32
+ "full_absorption_rate": 0.4409888357256778,
33
+ "num_full_absorption": 1106,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 1
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.3974572887100187,
40
+ "full_absorption_rate": 0.3521400778210117,
41
+ "num_full_absorption": 543,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.6927646146693088,
48
+ "full_absorption_rate": 0.6413547237076649,
49
+ "num_full_absorption": 1799,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 1
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.47744418964989493,
56
+ "full_absorption_rate": 0.36927710843373496,
57
+ "num_full_absorption": 613,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 2
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.5870694404248965,
64
+ "full_absorption_rate": 0.573019801980198,
65
+ "num_full_absorption": 926,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 2
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.5830104351629766,
72
+ "full_absorption_rate": 0.5201938610662359,
73
+ "num_full_absorption": 644,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.34745234590018287,
80
+ "full_absorption_rate": 0.25851528384279476,
81
+ "num_full_absorption": 296,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 2
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.22967960561507234,
88
+ "full_absorption_rate": 0.13429951690821257,
89
+ "num_full_absorption": 139,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.619077199673277,
96
+ "full_absorption_rate": 0.6501831501831502,
97
+ "num_full_absorption": 1065,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 1
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.00171975497397602,
104
+ "full_absorption_rate": 0.0048543689320388345,
105
+ "num_full_absorption": 2,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.018045039728896824,
112
+ "full_absorption_rate": 0.01925925925925926,
113
+ "num_full_absorption": 13,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.45296058549713225,
120
+ "full_absorption_rate": 0.4670094258783205,
121
+ "num_full_absorption": 545,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.5811030809298412,
128
+ "full_absorption_rate": 0.5612300933552993,
129
+ "num_full_absorption": 1022,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 1
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.22312187000875386,
136
+ "full_absorption_rate": 0.13476070528967254,
137
+ "num_full_absorption": 107,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.2300412012388341,
144
+ "full_absorption_rate": 0.1846298031865042,
145
+ "num_full_absorption": 197,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.6751946688382788,
152
+ "full_absorption_rate": 0.5753724802804557,
153
+ "num_full_absorption": 1313,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.05781321882890537,
160
+ "full_absorption_rate": 0.042105263157894736,
161
+ "num_full_absorption": 8,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.488805156034051,
168
+ "full_absorption_rate": 0.4262198706643151,
169
+ "num_full_absorption": 725,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 2
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.5873766958549872,
176
+ "full_absorption_rate": 0.6127538297114357,
177
+ "num_full_absorption": 1720,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 2
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.3832592488071156,
184
+ "full_absorption_rate": 0.25309734513274335,
185
+ "num_full_absorption": 429,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.13962747521360538,
192
+ "full_absorption_rate": 0.09933774834437085,
193
+ "num_full_absorption": 75,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 2
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.1366646018147327,
200
+ "full_absorption_rate": 0.13688212927756654,
201
+ "num_full_absorption": 108,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.3388312082364926,
208
+ "full_absorption_rate": 0.31129476584022037,
209
+ "num_full_absorption": 226,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.23041273003221366,
216
+ "full_absorption_rate": 0.1415929203539823,
217
+ "num_full_absorption": 16,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.05134630426917764,
224
+ "full_absorption_rate": 0.03977272727272727,
225
+ "num_full_absorption": 7,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.00851063829787234,
232
+ "full_absorption_rate": 0.00425531914893617,
233
+ "num_full_absorption": 1,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_2",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 65536,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "topk",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "26084e69-c5d9-40cd-b4e5-696f309d64fc",
17
+ "datetime_epoch_millis": 1740112965166,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.15174524643532927,
21
+ "mean_full_absorption_score": 0.08589398459398576,
22
+ "mean_num_split_features": 1.0384615384615385,
23
+ "std_dev_absorption_fraction_score": 0.16384394369953284,
24
+ "std_dev_full_absorption_score": 0.14338353890417252,
25
+ "std_dev_num_split_features": 0.19611613513818404
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.13921711721546576,
32
+ "full_absorption_rate": 0.03708133971291866,
33
+ "num_full_absorption": 93,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 1
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.04310752832952938,
40
+ "full_absorption_rate": 0.014267185473411154,
41
+ "num_full_absorption": 22,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.26743136506353554,
48
+ "full_absorption_rate": 0.1319073083778966,
49
+ "num_full_absorption": 370,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 1
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.3161296378836916,
56
+ "full_absorption_rate": 0.16686746987951806,
57
+ "num_full_absorption": 277,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 1
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.2612935127831887,
64
+ "full_absorption_rate": 0.11448019801980198,
65
+ "num_full_absorption": 185,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 1
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.2231852171454896,
72
+ "full_absorption_rate": 0.09289176090468497,
73
+ "num_full_absorption": 115,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.030756473849768082,
80
+ "full_absorption_rate": 0.00960698689956332,
81
+ "num_full_absorption": 11,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 2
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.07832209477769293,
88
+ "full_absorption_rate": 0.025120772946859903,
89
+ "num_full_absorption": 26,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.34438719358668407,
96
+ "full_absorption_rate": 0.27106227106227104,
97
+ "num_full_absorption": 444,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 1
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.004930270721614366,
104
+ "full_absorption_rate": 0.0048543689320388345,
105
+ "num_full_absorption": 2,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.0038580199123774677,
112
+ "full_absorption_rate": 0.002962962962962963,
113
+ "num_full_absorption": 2,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.14294435771033875,
120
+ "full_absorption_rate": 0.06255355612682091,
121
+ "num_full_absorption": 73,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.02202077616866355,
128
+ "full_absorption_rate": 0.011532125205930808,
129
+ "num_full_absorption": 21,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 1
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.07854167753708283,
136
+ "full_absorption_rate": 0.02644836272040302,
137
+ "num_full_absorption": 21,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.22802217761961677,
144
+ "full_absorption_rate": 0.08341143392689784,
145
+ "num_full_absorption": 89,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.30941729908873966,
152
+ "full_absorption_rate": 0.15731814198071867,
153
+ "num_full_absorption": 359,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.005634211490395127,
160
+ "full_absorption_rate": 0.0,
161
+ "num_full_absorption": 0,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.17034694561101904,
168
+ "full_absorption_rate": 0.07818930041152264,
169
+ "num_full_absorption": 133,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 1
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.17729512097612715,
176
+ "full_absorption_rate": 0.10972568578553615,
177
+ "num_full_absorption": 308,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 1
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.03769830907309684,
184
+ "full_absorption_rate": 0.00943952802359882,
185
+ "num_full_absorption": 16,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.750288739748417,
192
+ "full_absorption_rate": 0.7099337748344371,
193
+ "num_full_absorption": 536,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 1
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.014844840313461553,
200
+ "full_absorption_rate": 0.012674271229404309,
201
+ "num_full_absorption": 10,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.1397234584743241,
208
+ "full_absorption_rate": 0.05371900826446281,
209
+ "num_full_absorption": 39,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.013887928657641017,
216
+ "full_absorption_rate": 0.008849557522123894,
217
+ "num_full_absorption": 1,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.028506459333482823,
224
+ "full_absorption_rate": 0.03409090909090909,
225
+ "num_full_absorption": 6,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.11358567424711703,
232
+ "full_absorption_rate": 0.00425531914893617,
233
+ "num_full_absorption": 1,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_3",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 65536,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "topk",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "1a8188af-1e8d-4efc-b36d-0955803fc7b4",
17
+ "datetime_epoch_millis": 1740110494886,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.03942344512163808,
21
+ "mean_full_absorption_score": 0.0140364892361101,
22
+ "mean_num_split_features": 1.1923076923076923,
23
+ "std_dev_absorption_fraction_score": 0.09934591318238874,
24
+ "std_dev_full_absorption_score": 0.05756098788114222,
25
+ "std_dev_num_split_features": 0.4914656259988704
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.028346535429880127,
32
+ "full_absorption_rate": 0.0007974481658692185,
33
+ "num_full_absorption": 2,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 1
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.0022608089680371196,
40
+ "full_absorption_rate": 0.0,
41
+ "num_full_absorption": 0,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.0025546027061933625,
48
+ "full_absorption_rate": 0.00071301247771836,
49
+ "num_full_absorption": 2,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 1
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.0019355578730626043,
56
+ "full_absorption_rate": 0.0018072289156626507,
57
+ "num_full_absorption": 3,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 1
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.07498623830651696,
64
+ "full_absorption_rate": 0.003094059405940594,
65
+ "num_full_absorption": 5,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 1
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.0050345150551234065,
72
+ "full_absorption_rate": 0.0,
73
+ "num_full_absorption": 0,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.010460701096032003,
80
+ "full_absorption_rate": 0.0008733624454148472,
81
+ "num_full_absorption": 1,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 2
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.01170635135150821,
88
+ "full_absorption_rate": 0.001932367149758454,
89
+ "num_full_absorption": 2,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.1089880317607272,
96
+ "full_absorption_rate": 0.017704517704517704,
97
+ "num_full_absorption": 29,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 1
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.0003029988879400231,
104
+ "full_absorption_rate": 0.0,
105
+ "num_full_absorption": 0,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.0029912743470123966,
112
+ "full_absorption_rate": 0.0,
113
+ "num_full_absorption": 0,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.024211372038585182,
120
+ "full_absorption_rate": 0.000856898029134533,
121
+ "num_full_absorption": 1,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.006949418441528633,
128
+ "full_absorption_rate": 0.006589785831960461,
129
+ "num_full_absorption": 12,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 1
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.025325264607950822,
136
+ "full_absorption_rate": 0.0025188916876574307,
137
+ "num_full_absorption": 2,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.06964801703096594,
144
+ "full_absorption_rate": 0.011246485473289597,
145
+ "num_full_absorption": 12,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.0016421080000302358,
152
+ "full_absorption_rate": 0.0,
153
+ "num_full_absorption": 0,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.002114695349444831,
160
+ "full_absorption_rate": 0.0,
161
+ "num_full_absorption": 0,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.0031820979727350375,
168
+ "full_absorption_rate": 0.0,
169
+ "num_full_absorption": 0,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 1
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.004449262686803511,
176
+ "full_absorption_rate": 0.0003562522265764161,
177
+ "num_full_absorption": 1,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 1
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.012846054983407984,
184
+ "full_absorption_rate": 0.0005899705014749262,
185
+ "num_full_absorption": 1,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.04887027767059182,
192
+ "full_absorption_rate": 0.009271523178807948,
193
+ "num_full_absorption": 7,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 2
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.0008588606945117642,
200
+ "full_absorption_rate": 0.0,
201
+ "num_full_absorption": 0,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.04016210362798525,
208
+ "full_absorption_rate": 0.006887052341597796,
209
+ "num_full_absorption": 5,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.010852614455368956,
216
+ "full_absorption_rate": 0.0,
217
+ "num_full_absorption": 0,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 3
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.5077652724254798,
224
+ "full_absorption_rate": 0.29545454545454547,
225
+ "num_full_absorption": 52,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.01656453739516708,
232
+ "full_absorption_rate": 0.00425531914893617,
233
+ "num_full_absorption": 1,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 2
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_4",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 65536,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "topk",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/absorption/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "b39a2bbf-6894-4756-837a-7757325a495d",
17
+ "datetime_epoch_millis": 1740109628891,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.06555661212074526,
21
+ "mean_full_absorption_score": 0.028694289243238084,
22
+ "mean_num_split_features": 1.1153846153846154,
23
+ "std_dev_absorption_fraction_score": 0.17874387812611048,
24
+ "std_dev_full_absorption_score": 0.1282236268349608,
25
+ "std_dev_num_split_features": 0.3258125936084211
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.009520576342205286,
32
+ "full_absorption_rate": 0.0,
33
+ "num_full_absorption": 0,
34
+ "num_probe_true_positives": 2508,
35
+ "num_split_features": 2
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.0025275048111106477,
40
+ "full_absorption_rate": 0.0006485084306095979,
41
+ "num_full_absorption": 1,
42
+ "num_probe_true_positives": 1542,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.0,
48
+ "full_absorption_rate": 0.0,
49
+ "num_full_absorption": 0,
50
+ "num_probe_true_positives": 2805,
51
+ "num_split_features": 1
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.0019839170132145943,
56
+ "full_absorption_rate": 0.0006024096385542169,
57
+ "num_full_absorption": 1,
58
+ "num_probe_true_positives": 1660,
59
+ "num_split_features": 1
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.04418599759645858,
64
+ "full_absorption_rate": 0.0012376237623762376,
65
+ "num_full_absorption": 2,
66
+ "num_probe_true_positives": 1616,
67
+ "num_split_features": 1
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.003045558610690488,
72
+ "full_absorption_rate": 0.0,
73
+ "num_full_absorption": 0,
74
+ "num_probe_true_positives": 1238,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.003541220671457806,
80
+ "full_absorption_rate": 0.0,
81
+ "num_full_absorption": 0,
82
+ "num_probe_true_positives": 1145,
83
+ "num_split_features": 2
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.004566561548265196,
88
+ "full_absorption_rate": 0.0,
89
+ "num_full_absorption": 0,
90
+ "num_probe_true_positives": 1035,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.03134665350983779,
96
+ "full_absorption_rate": 0.0018315018315018315,
97
+ "num_full_absorption": 3,
98
+ "num_probe_true_positives": 1638,
99
+ "num_split_features": 1
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.0007627951868082768,
104
+ "full_absorption_rate": 0.0,
105
+ "num_full_absorption": 0,
106
+ "num_probe_true_positives": 412,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.01549266614777519,
112
+ "full_absorption_rate": 0.0,
113
+ "num_full_absorption": 0,
114
+ "num_probe_true_positives": 675,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.0005719143487719158,
120
+ "full_absorption_rate": 0.0,
121
+ "num_full_absorption": 0,
122
+ "num_probe_true_positives": 1167,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.008494294204347433,
128
+ "full_absorption_rate": 0.006589785831960461,
129
+ "num_full_absorption": 12,
130
+ "num_probe_true_positives": 1821,
131
+ "num_split_features": 1
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.05672687383143593,
136
+ "full_absorption_rate": 0.0,
137
+ "num_full_absorption": 0,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.05122861661179144,
144
+ "full_absorption_rate": 0.0028116213683223993,
145
+ "num_full_absorption": 3,
146
+ "num_probe_true_positives": 1067,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.0019615119799893904,
152
+ "full_absorption_rate": 0.0,
153
+ "num_full_absorption": 0,
154
+ "num_probe_true_positives": 2282,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.39355620461923463,
160
+ "full_absorption_rate": 0.07368421052631578,
161
+ "num_full_absorption": 14,
162
+ "num_probe_true_positives": 190,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.0,
168
+ "full_absorption_rate": 0.0,
169
+ "num_full_absorption": 0,
170
+ "num_probe_true_positives": 1701,
171
+ "num_split_features": 1
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.00032212707935616407,
176
+ "full_absorption_rate": 0.0,
177
+ "num_full_absorption": 0,
178
+ "num_probe_true_positives": 2807,
179
+ "num_split_features": 1
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.005862875355780282,
184
+ "full_absorption_rate": 0.0,
185
+ "num_full_absorption": 0,
186
+ "num_probe_true_positives": 1695,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.04422480500553416,
192
+ "full_absorption_rate": 0.0013245033112582781,
193
+ "num_full_absorption": 1,
194
+ "num_probe_true_positives": 755,
195
+ "num_split_features": 1
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.004069543132561543,
200
+ "full_absorption_rate": 0.0025348542458808617,
201
+ "num_full_absorption": 2,
202
+ "num_probe_true_positives": 789,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.019203869068399586,
208
+ "full_absorption_rate": 0.0013774104683195593,
209
+ "num_full_absorption": 1,
210
+ "num_probe_true_positives": 726,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.06325101886841776,
216
+ "full_absorption_rate": 0.0,
217
+ "num_full_absorption": 0,
218
+ "num_probe_true_positives": 113,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.8563286531653201,
224
+ "full_absorption_rate": 0.6534090909090909,
225
+ "num_full_absorption": 115,
226
+ "num_probe_true_positives": 176,
227
+ "num_split_features": 2
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.08169615643061263,
232
+ "full_absorption_rate": 0.0,
233
+ "num_full_absorption": 0,
234
+ "num_probe_true_positives": 235,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "60579ed19a6281956621283bada7be2e76a7b583",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_5",
241
+ "sae_lens_version": "5.4.2",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 65536,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "topk",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0d19fc459e1f9c514d0097fae277f0831a61abd03d9e726ff6f4b7a652dea9f
3
+ size 27623781
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fca9a6b43a6677b507b976053fb9e656720e0b39cae2f6626dda6c0882bb58a
3
+ size 27478722
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1887153b4660a98b73df19ca3d6929e0a628dbd019daa0767626cc8a3174e668
3
+ size 27326392
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4639d5faecfe47d93388cd85219ec503ec42c2c3efce501ac7cb4c96ba201587
3
+ size 26972592
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:733a1150854b8898c75ecaba2481408d4d03eedf0d9a176c1a04af2761eb92ce
3
+ size 26685823
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9320040cfecad57406c4aee36f6ee3100cab3f8a7fce767da1f159d90109005e
3
+ size 26334864
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df3613f337e54b63e0feec7c52b072ff8d02ca4e8b2cefccb7ebc7ebb35aeada
3
+ size 26846162
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ba2ffbe45d5a2a16ec7ba867974934e88a66209b036561ece8edc07b56580ec
3
+ size 26732924
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72a689e8599a850a9a8beb0f9c6414ff2403e613b7ccff4c8928b2468e9279e0
3
+ size 26438355
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b04aad0de34d397bd245a06c6ba7c5eb821485360e9cae47bd1ae3b10fbdae5
3
+ size 26285690
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:544cdad19a6c89a8374f44af5f98f1dfb2418b5f9350bc9c02f11d822ce680e3
3
+ size 26155343
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_standard_new_width-2pow16_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5304e4005dc5d04c891d20903db02b54c689b76921aa9db3aec1991fa83844f
3
+ size 25984532
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3082c208b6c18c843fc43ee0221547e1a97273f9ec78d78b9290f2030b44453
3
+ size 26444963
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f389b4cbe12e5e559fa5126e6e3ab171d1c979fbf358400600784e750e52c82
3
+ size 26356253
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c276830a7613665611e50bb694a0ea2389c34ce5d52b443daa5d03e60f169b4
3
+ size 26610835
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae1c49b0452d85f33ec36c07e2649801741441c4fa30fc49e1cfa60e6a219ae9
3
+ size 26712914
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:667104ce70a5ec0ef03908c41894b7d592f51fabb11feea7f1119195a2840a4c
3
+ size 26824711
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow14_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b49951403d7d345b98f4104f56dbf932ed2bb5285ef62da752faaf5f28e32ff0
3
+ size 26393676
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33c4b6c6bc4a00b7cab365f25798d5059f5b1a8eb73395ea3c8e5ad8d9ae3204
3
+ size 25068488
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c39909390312c51b98e9b9b812343319a32b3ac403dd5f9e39752df62370d828
3
+ size 25597832
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b895a1eb166c2e94c2d10b32032a7b51dac853c542244972590c6ec9889adf5
3
+ size 25403761
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f7f89e595e738eadb4099bcc96a3408f36eb04346a57b53ff0ab8db448785d7
3
+ size 25537327
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2463cff82576d2ff2fe1bb866256d1b6f77401d54ee95611881fa9399cb6bba
3
+ size 25536425
eval_results_finetunes/autointerp/kl_finetunes_gemma-2-2b_top_k_width-2pow16_date-0107_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6c13849849e6e2277b075568d296d43d091a547356ff229777c9a4d8228986a
3
+ size 25181253
eval_results_finetunes/core/kl_finetunes_gemma-2-2b_standard_new_width-2pow14_date-0107_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
The diff for this file is too large to render. See raw diff