| { | |
| "_name_or_path": "super_linear", | |
| "architectures": [ | |
| "SuperLinearForCausalLM" | |
| ], | |
| "auto_map": { | |
| "AutoConfig": "configuration_super_linear.SuperLinearConfig", | |
| "AutoModelForCausalLM": "modeling_super_linear.SuperLinearForCausalLM" | |
| }, | |
| "auto_regressive": 1, | |
| "con": 0, | |
| "d_model": 512, | |
| "dropout": 0.0, | |
| "fft_len": 10000, | |
| "freeze_experts": 1, | |
| "freq_experts": "mean_naive_1/6_1/7_1/8_1/12_1/14_1/16_1/21_1/24_1/28_1/30_1/32_1/36_1/42_1/48_1/52_1/56_1/60_1/72_1/84_1/96_1/120_1/144_1/168_1/180_1/224_1/252_1/288_1/336_1/365_1/504_1/672_1/1008_1/1440_1/2016_1/3600", | |
| "inf_pred_len": 96, | |
| "ker_len": 50, | |
| "layer_type": "RLinear", | |
| "linear_checkpoints_dir": "checkpoints5", | |
| "linear_checkpoints_path": "/cs/azencot_fsas/MoE/", | |
| "load_linear": 0, | |
| "manual_moe": 0, | |
| "max_horizon": 96, | |
| "misc_moe": 1, | |
| "mlp_gating": 1, | |
| "model_type": "super_linear", | |
| "moe": 1, | |
| "moe_n_experts": 8, | |
| "moe_temp": 1, | |
| "noisy_gating_std": 0.1, | |
| "noisy_gating_std_decay": 1, | |
| "pred_len": 96, | |
| "seq_len": 512, | |
| "top_k_experts": 5, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.40.1", | |
| "use_fft": 1 | |
| } |