SuperLinear / config.json
razmars's picture
Update config.json
ee03db0 verified
raw
history blame
1.13 kB
{
"_name_or_path": "super_linear",
"architectures": [
"SuperLinearForCausalLM"
],
"auto_map": {
"AutoConfig": "configuration_super_linear.SuperLinearConfig",
"AutoModelForCausalLM": "modeling_super_linear.SuperLinearForCausalLM"
},
"auto_regressive": 1,
"con": 0,
"d_model": 512,
"dropout": 0.0,
"fft_len": 10000,
"freeze_experts": 1,
"freq_experts": "mean_naive_1/6_1/7_1/8_1/12_1/14_1/16_1/21_1/24_1/28_1/30_1/32_1/36_1/42_1/48_1/52_1/56_1/60_1/72_1/84_1/96_1/120_1/144_1/168_1/180_1/224_1/252_1/288_1/336_1/365_1/504_1/672_1/1008_1/1440_1/2016_1/3600",
"inf_pred_len": 96,
"ker_len": 50,
"layer_type": "RLinear",
"linear_checkpoints_dir": "checkpoints5",
"linear_checkpoints_path": "/cs/azencot_fsas/MoE/",
"load_linear": 0,
"manual_moe": 0,
"max_horizon": 96,
"misc_moe": 1,
"mlp_gating": 1,
"model_type": "super_linear",
"moe": 1,
"moe_n_experts": 8,
"moe_temp": 1,
"noisy_gating_std": 0.1,
"noisy_gating_std_decay": 1,
"pred_len": 96,
"seq_len": 512,
"top_k_experts": 5,
"torch_dtype": "float32",
"transformers_version": "4.40.1",
"use_fft": 1
}