farukakgul
commited on
Commit
·
b8db39b
1
Parent(s):
4ccb8ec
add sae trained from scratch at layer 12
Browse files- config.json +1 -0
- model.layers.12/cfg.json +1 -0
- model.layers.12/sae.safetensors +3 -0
- optimizer_0.pt +3 -0
- rank_0_state.pt +3 -0
- state.pt +3 -0
config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"sae": {"activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 196608, "k": 32, "multi_topk": false, "skip_connection": false, "transcode": false}, "batch_size": 4, "grad_acc_steps": 1, "micro_acc_steps": 1, "optimizer": "signum", "lr": null, "lr_warmup_steps": 1000, "k_decay_steps": 0, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["model.layers.12"], "init_seeds": [42], "layers": [], "layer_stride": 1, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "Training_SAE_from_scratch_sae-OctoThinker-3B-Long-Base-196k_with_still_base__base_2025_09_10_17_40_20", "output_dir": "/home/omer/shangshang/project/reasoning/reasoning-sae/ckpts/saes/sae-OctoThinker-3B-Long-Base-196k/OctoThinker-3B-Long-Base_base/trained_from_scratch_still", "wandb_log_frequency": 1}
|
model.layers.12/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"activation": "topk", "expansion_factor": 64, "normalize_decoder": true, "num_latents": 196608, "k": 32, "multi_topk": false, "skip_connection": false, "transcode": false, "d_in": 3072}
|
model.layers.12/sae.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb1d39bc893dc0d0b87e9c75679d215aad6a37d9869ee0d0ceca007867d9b910
|
| 3 |
+
size 4832637272
|
optimizer_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b8b80c394647b31e9214a15f2cb9edea15c143c549b6b8730bfea406b3ad10c
|
| 3 |
+
size 4832639132
|
rank_0_state.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:014eb54d4d94149cc2594b650fc8742bbf965c585b407072d38ba6c90532eeca
|
| 3 |
+
size 1574133
|
state.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:649b8683ce2031876a05a051b3feb11823f3a7afbdac43df079231f60c63a5f1
|
| 3 |
+
size 856
|