farukakgul commited on
Commit
b8db39b
·
1 Parent(s): 4ccb8ec

add sae trained from scratch at layer 12

Browse files
config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sae": {"activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 196608, "k": 32, "multi_topk": false, "skip_connection": false, "transcode": false}, "batch_size": 4, "grad_acc_steps": 1, "micro_acc_steps": 1, "optimizer": "signum", "lr": null, "lr_warmup_steps": 1000, "k_decay_steps": 0, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["model.layers.12"], "init_seeds": [42], "layers": [], "layer_stride": 1, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "Training_SAE_from_scratch_sae-OctoThinker-3B-Long-Base-196k_with_still_base__base_2025_09_10_17_40_20", "output_dir": "/home/omer/shangshang/project/reasoning/reasoning-sae/ckpts/saes/sae-OctoThinker-3B-Long-Base-196k/OctoThinker-3B-Long-Base_base/trained_from_scratch_still", "wandb_log_frequency": 1}
model.layers.12/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 196608, "k": 32, "multi_topk": false, "skip_connection": false, "transcode": false, "d_in": 3072}
model.layers.12/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb1d39bc893dc0d0b87e9c75679d215aad6a37d9869ee0d0ceca007867d9b910
3
+ size 4832637272
optimizer_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b8b80c394647b31e9214a15f2cb9edea15c143c549b6b8730bfea406b3ad10c
3
+ size 4832639132
rank_0_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:014eb54d4d94149cc2594b650fc8742bbf965c585b407072d38ba6c90532eeca
3
+ size 1574133
state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:649b8683ce2031876a05a051b3feb11823f3a7afbdac43df079231f60c63a5f1
3
+ size 856