upload SAEs and autointerp
Browse files- ef2/autointerp_layer15_res_matryoshka_k256_ef2.csv +0 -0
 - ef2/config.yaml +43 -0
 - ef2/layer15_res_matryoshka_k256_ef2.pt +3 -0
 - ef4/autointerp_layer15_res_matryoshka_k256_ef4.csv +0 -0
 - ef4/config.yaml +43 -0
 - ef4/layer15_res_matryoshka_k256_ef4.pt +3 -0
 - ef8/config.yaml +43 -0
 - ef8/layer15_res_matryoshka_k256_ef8.pt +3 -0
 
    	
        ef2/autointerp_layer15_res_matryoshka_k256_ef2.csv
    ADDED
    
    | 
         The diff for this file is too large to render. 
		See raw diff 
     | 
| 
         | 
    	
        ef2/config.yaml
    ADDED
    
    | 
         @@ -0,0 +1,43 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            sae:
         
     | 
| 2 | 
         
            +
              type: matryoshka_batch_topk
         
     | 
| 3 | 
         
            +
              activation_dim: 4096
         
     | 
| 4 | 
         
            +
              expansion_factor: 2
         
     | 
| 5 | 
         
            +
              layer_id: 15
         
     | 
| 6 | 
         
            +
              hookpoint: residual
         
     | 
| 7 | 
         
            +
              k: 256
         
     | 
| 8 | 
         
            +
              group_fractions:
         
     | 
| 9 | 
         
            +
              - 0.5
         
     | 
| 10 | 
         
            +
              - 0.25
         
     | 
| 11 | 
         
            +
              - 0.125
         
     | 
| 12 | 
         
            +
              - 0.0625
         
     | 
| 13 | 
         
            +
              - 0.0625
         
     | 
| 14 | 
         
            +
              group_weights: null
         
     | 
| 15 | 
         
            +
            trainer:
         
     | 
| 16 | 
         
            +
              epochs: 1
         
     | 
| 17 | 
         
            +
              lr: null
         
     | 
| 18 | 
         
            +
              l1_penalty: 0.1
         
     | 
| 19 | 
         
            +
              warmup_steps: 10
         
     | 
| 20 | 
         
            +
              sparsity_warmup_steps: 0
         
     | 
| 21 | 
         
            +
              decay_start: null
         
     | 
| 22 | 
         
            +
              resample_steps: null
         
     | 
| 23 | 
         
            +
              seed: 42
         
     | 
| 24 | 
         
            +
              device: cuda:0
         
     | 
| 25 | 
         
            +
              log_every_n_steps: 1000
         
     | 
| 26 | 
         
            +
              logger_type: mlflow
         
     | 
| 27 | 
         
            +
              validate: false
         
     | 
| 28 | 
         
            +
              auxk_alpha: 0.03125
         
     | 
| 29 | 
         
            +
              threshold_beta: 0.999
         
     | 
| 30 | 
         
            +
              threshold_start_step: 1000
         
     | 
| 31 | 
         
            +
              threshold_dead_features: 100000
         
     | 
| 32 | 
         
            +
            data:
         
     | 
| 33 | 
         
            +
              dataset_names:
         
     | 
| 34 | 
         
            +
              - mimic_findings_temporal
         
     | 
| 35 | 
         
            +
              activations_type: per_token
         
     | 
| 36 | 
         
            +
              num_workers: 18
         
     | 
| 37 | 
         
            +
              batch_size: 8192
         
     | 
| 38 | 
         
            +
              val_samples: 512000
         
     | 
| 39 | 
         
            +
              train_samples: null
         
     | 
| 40 | 
         
            +
              norm_act: true
         
     | 
| 41 | 
         
            +
              norm_to_sqrt_act_dim: false
         
     | 
| 42 | 
         
            +
              input_unit_norm: false
         
     | 
| 43 | 
         
            +
              filter_dict: null
         
     | 
    	
        ef2/layer15_res_matryoshka_k256_ef2.pt
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:ed20976e0043d310f219473b2f20e98c84a4281a5d418a4ab822501fb79e605e
         
     | 
| 3 | 
         
            +
            size 268487386
         
     | 
    	
        ef4/autointerp_layer15_res_matryoshka_k256_ef4.csv
    ADDED
    
    | 
         The diff for this file is too large to render. 
		See raw diff 
     | 
| 
         | 
    	
        ef4/config.yaml
    ADDED
    
    | 
         @@ -0,0 +1,43 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            sae:
         
     | 
| 2 | 
         
            +
              type: matryoshka_batch_topk
         
     | 
| 3 | 
         
            +
              activation_dim: 4096
         
     | 
| 4 | 
         
            +
              expansion_factor: 4
         
     | 
| 5 | 
         
            +
              layer_id: 15
         
     | 
| 6 | 
         
            +
              hookpoint: residual
         
     | 
| 7 | 
         
            +
              k: 256
         
     | 
| 8 | 
         
            +
              group_fractions:
         
     | 
| 9 | 
         
            +
              - 0.5
         
     | 
| 10 | 
         
            +
              - 0.25
         
     | 
| 11 | 
         
            +
              - 0.125
         
     | 
| 12 | 
         
            +
              - 0.0625
         
     | 
| 13 | 
         
            +
              - 0.0625
         
     | 
| 14 | 
         
            +
              group_weights: null
         
     | 
| 15 | 
         
            +
            trainer:
         
     | 
| 16 | 
         
            +
              epochs: 1
         
     | 
| 17 | 
         
            +
              lr: null
         
     | 
| 18 | 
         
            +
              l1_penalty: 0.1
         
     | 
| 19 | 
         
            +
              warmup_steps: 10
         
     | 
| 20 | 
         
            +
              sparsity_warmup_steps: 0
         
     | 
| 21 | 
         
            +
              decay_start: null
         
     | 
| 22 | 
         
            +
              resample_steps: null
         
     | 
| 23 | 
         
            +
              seed: 42
         
     | 
| 24 | 
         
            +
              device: cuda:0
         
     | 
| 25 | 
         
            +
              log_every_n_steps: 1000
         
     | 
| 26 | 
         
            +
              logger_type: mlflow
         
     | 
| 27 | 
         
            +
              validate: false
         
     | 
| 28 | 
         
            +
              auxk_alpha: 0.03125
         
     | 
| 29 | 
         
            +
              threshold_beta: 0.999
         
     | 
| 30 | 
         
            +
              threshold_start_step: 1000
         
     | 
| 31 | 
         
            +
              threshold_dead_features: 100000
         
     | 
| 32 | 
         
            +
            data:
         
     | 
| 33 | 
         
            +
              dataset_names:
         
     | 
| 34 | 
         
            +
              - mimic_findings_temporal
         
     | 
| 35 | 
         
            +
              activations_type: per_token
         
     | 
| 36 | 
         
            +
              num_workers: 18
         
     | 
| 37 | 
         
            +
              batch_size: 8192
         
     | 
| 38 | 
         
            +
              val_samples: 512000
         
     | 
| 39 | 
         
            +
              train_samples: null
         
     | 
| 40 | 
         
            +
              norm_act: true
         
     | 
| 41 | 
         
            +
              norm_to_sqrt_act_dim: false
         
     | 
| 42 | 
         
            +
              input_unit_norm: false
         
     | 
| 43 | 
         
            +
              filter_dict: null
         
     | 
    	
        ef4/layer15_res_matryoshka_k256_ef4.pt
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:58bde75f2707637c25bd760007647181875068f0bdffe4752fdeeb71a3034ad5
         
     | 
| 3 | 
         
            +
            size 536955610
         
     | 
    	
        ef8/config.yaml
    ADDED
    
    | 
         @@ -0,0 +1,43 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            sae:
         
     | 
| 2 | 
         
            +
              type: matryoshka_batch_topk
         
     | 
| 3 | 
         
            +
              activation_dim: 4096
         
     | 
| 4 | 
         
            +
              expansion_factor: 8
         
     | 
| 5 | 
         
            +
              layer_id: 15
         
     | 
| 6 | 
         
            +
              hookpoint: residual
         
     | 
| 7 | 
         
            +
              k: 256
         
     | 
| 8 | 
         
            +
              group_fractions:
         
     | 
| 9 | 
         
            +
              - 0.5
         
     | 
| 10 | 
         
            +
              - 0.25
         
     | 
| 11 | 
         
            +
              - 0.125
         
     | 
| 12 | 
         
            +
              - 0.0625
         
     | 
| 13 | 
         
            +
              - 0.0625
         
     | 
| 14 | 
         
            +
              group_weights: null
         
     | 
| 15 | 
         
            +
            trainer:
         
     | 
| 16 | 
         
            +
              epochs: 1
         
     | 
| 17 | 
         
            +
              lr: null
         
     | 
| 18 | 
         
            +
              l1_penalty: 0.1
         
     | 
| 19 | 
         
            +
              warmup_steps: 10
         
     | 
| 20 | 
         
            +
              sparsity_warmup_steps: 0
         
     | 
| 21 | 
         
            +
              decay_start: null
         
     | 
| 22 | 
         
            +
              resample_steps: null
         
     | 
| 23 | 
         
            +
              seed: 42
         
     | 
| 24 | 
         
            +
              device: cuda:0
         
     | 
| 25 | 
         
            +
              log_every_n_steps: 1000
         
     | 
| 26 | 
         
            +
              logger_type: mlflow
         
     | 
| 27 | 
         
            +
              validate: false
         
     | 
| 28 | 
         
            +
              auxk_alpha: 0.03125
         
     | 
| 29 | 
         
            +
              threshold_beta: 0.999
         
     | 
| 30 | 
         
            +
              threshold_start_step: 1000
         
     | 
| 31 | 
         
            +
              threshold_dead_features: 100000
         
     | 
| 32 | 
         
            +
            data:
         
     | 
| 33 | 
         
            +
              dataset_names:
         
     | 
| 34 | 
         
            +
              - mimic_findings_temporal
         
     | 
| 35 | 
         
            +
              activations_type: per_token
         
     | 
| 36 | 
         
            +
              num_workers: 18
         
     | 
| 37 | 
         
            +
              batch_size: 8192
         
     | 
| 38 | 
         
            +
              val_samples: 512000
         
     | 
| 39 | 
         
            +
              train_samples: null
         
     | 
| 40 | 
         
            +
              norm_act: true
         
     | 
| 41 | 
         
            +
              norm_to_sqrt_act_dim: false
         
     | 
| 42 | 
         
            +
              input_unit_norm: false
         
     | 
| 43 | 
         
            +
              filter_dict: null
         
     | 
    	
        ef8/layer15_res_matryoshka_k256_ef8.pt
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:81a4f7a3b97b5c31ee1e1b53e791fd50a77cbdfed44e274abbfba3138eed4405
         
     | 
| 3 | 
         
            +
            size 1073892058
         
     |