Upload config
Browse files- config.json +61 -0
 
    	
        config.json
    ADDED
    
    | 
         @@ -0,0 +1,61 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
                "model_class_name": "HookedViT",
         
     | 
| 3 | 
         
            +
                "model_name": "open-clip:laion/CLIP-ViT-B-32-DataComp.XL-s13B-b90K",
         
     | 
| 4 | 
         
            +
                "hook_point_layer": 4,
         
     | 
| 5 | 
         
            +
                "layer_subtype": "hook_mlp_out",
         
     | 
| 6 | 
         
            +
                "hook_point_head_index": null,
         
     | 
| 7 | 
         
            +
                "context_size": 50,
         
     | 
| 8 | 
         
            +
                "use_cached_activations": false,
         
     | 
| 9 | 
         
            +
                "use_patches_only": false,
         
     | 
| 10 | 
         
            +
                "cached_activations_path": "activations/_network_scratch_s_sonia.joseph_datasets_kaggle_datasets/open-clip:laion_CLIP-ViT-B-32-DataComp.XL-s13B-b90K/blocks.9.hook_mlp_out",
         
     | 
| 11 | 
         
            +
                "d_in": 768,
         
     | 
| 12 | 
         
            +
                "activation_fn_str": "relu",
         
     | 
| 13 | 
         
            +
                "activation_fn_kwargs": {},
         
     | 
| 14 | 
         
            +
                "cls_token_only": false,
         
     | 
| 15 | 
         
            +
                "max_grad_norm": 1.0,
         
     | 
| 16 | 
         
            +
                "initialization_method": "encoder_transpose_decoder",
         
     | 
| 17 | 
         
            +
                "normalize_activations": null,
         
     | 
| 18 | 
         
            +
                "n_batches_in_buffer": 20,
         
     | 
| 19 | 
         
            +
                "store_batch_size": 32,
         
     | 
| 20 | 
         
            +
                "num_workers": 16,
         
     | 
| 21 | 
         
            +
                "num_epochs": 10,
         
     | 
| 22 | 
         
            +
                "total_training_images": 13000000,
         
     | 
| 23 | 
         
            +
                "total_training_tokens": 650000000,
         
     | 
| 24 | 
         
            +
                "image_size": 224,
         
     | 
| 25 | 
         
            +
                "device": {
         
     | 
| 26 | 
         
            +
                    "__type__": "torch.device",
         
     | 
| 27 | 
         
            +
                    "value": "cuda"
         
     | 
| 28 | 
         
            +
                },
         
     | 
| 29 | 
         
            +
                "seed": 42,
         
     | 
| 30 | 
         
            +
                "dtype": {
         
     | 
| 31 | 
         
            +
                    "__type__": "torch.dtype",
         
     | 
| 32 | 
         
            +
                    "value": "torch.float32"
         
     | 
| 33 | 
         
            +
                },
         
     | 
| 34 | 
         
            +
                "architecture": "standard",
         
     | 
| 35 | 
         
            +
                "verbose": false,
         
     | 
| 36 | 
         
            +
                "b_dec_init_method": "geometric_median",
         
     | 
| 37 | 
         
            +
                "expansion_factor": 64,
         
     | 
| 38 | 
         
            +
                "from_pretrained_path": null,
         
     | 
| 39 | 
         
            +
                "d_sae": 49152,
         
     | 
| 40 | 
         
            +
                "l1_coefficient": 1e-05,
         
     | 
| 41 | 
         
            +
                "lp_norm": 1,
         
     | 
| 42 | 
         
            +
                "lr": 0.0004,
         
     | 
| 43 | 
         
            +
                "lr_scheduler_name": "cosineannealingwarmup",
         
     | 
| 44 | 
         
            +
                "lr_warm_up_steps": 200,
         
     | 
| 45 | 
         
            +
                "train_batch_size": 4096,
         
     | 
| 46 | 
         
            +
                "dataset_name": "imagenet1k",
         
     | 
| 47 | 
         
            +
                "dataset_path": "/network/scratch/s/sonia.joseph/datasets/kaggle_datasets",
         
     | 
| 48 | 
         
            +
                "dataset_train_path": "/network/scratch/s/sonia.joseph/datasets/kaggle_datasets/ILSVRC/Data/CLS-LOC/train",
         
     | 
| 49 | 
         
            +
                "dataset_val_path": "/network/scratch/s/sonia.joseph/datasets/kaggle_datasets/ILSVRC/Data/CLS-LOC/val",
         
     | 
| 50 | 
         
            +
                "use_ghost_grads": true,
         
     | 
| 51 | 
         
            +
                "feature_sampling_window": 1000,
         
     | 
| 52 | 
         
            +
                "dead_feature_window": 5000,
         
     | 
| 53 | 
         
            +
                "dead_feature_threshold": 1e-08,
         
     | 
| 54 | 
         
            +
                "log_to_wandb": true,
         
     | 
| 55 | 
         
            +
                "wandb_project": "clip_b_mlp_out_sae_hyperparam_sweep",
         
     | 
| 56 | 
         
            +
                "wandb_entity": null,
         
     | 
| 57 | 
         
            +
                "wandb_log_frequency": 100,
         
     | 
| 58 | 
         
            +
                "n_validation_runs": 10,
         
     | 
| 59 | 
         
            +
                "n_checkpoints": 10,
         
     | 
| 60 | 
         
            +
                "checkpoint_path": "/network/scratch/s/sonia.joseph/checkpoints/clip-b/13b6f055-clip_b_mlp_out_sae_hyperparam_sweep"
         
     | 
| 61 | 
         
            +
            }
         
     |