{"sae": {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false}, "batch_size": 2, "grad_acc_steps": 4, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.16.mlp", "layers.17.mlp", "layers.18.mlp", "layers.19.mlp", "layers.20.mlp", "layers.21.mlp", "layers.22.mlp", "layers.23.mlp", "layers.24.mlp", "layers.25.mlp", "layers.26.mlp", "layers.27.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": true, "save_every": 1000, "log_to_wandb": true, "run_name": "sae-R1-1.5B-65k-part-2", "wandb_log_frequency": 1, "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "EleutherAI/fineweb-edu-dedup-10b", "subset": null, "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": null, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 48}