{ "_name_or_path": "super_linear", "architectures": [ "SuperLinearForCausalLM" ], "auto_map": { "AutoConfig": "configuration_super_linear.SuperLinearConfig", "AutoModelForCausalLM": "modeling_super_linear.SuperLinearForCausalLM" }, "auto_regressive": 1, "con": 0, "d_model": 512, "dropout": 0.0, "fft_len": 10000, "freeze_experts": 1, "freq_experts": "mean_naive_1/6_1/7_1/8_1/12_1/14_1/16_1/21_1/24_1/28_1/30_1/32_1/36_1/42_1/48_1/52_1/56_1/60_1/72_1/84_1/96_1/120_1/144_1/168_1/180_1/224_1/252_1/288_1/336_1/365_1/504_1/672_1/1008_1/1440_1/2016_1/3600", "inf_pred_len": 96, "ker_len": 50, "layer_type": "RLinear", "linear_checkpoints_dir": "checkpoints5", "linear_checkpoints_path": "/cs/azencot_fsas/MoE/", "load_linear": 0, "manual_moe": 0, "max_horizon": 96, "misc_moe": 1, "mlp_gating": 1, "model_type": "super_linear", "moe": 1, "moe_n_experts": 8, "moe_temp": 1, "noisy_gating_std": 0.1, "noisy_gating_std_decay": 1, "pred_len": 96, "seq_len": 512, "top_k_experts": 5, "torch_dtype": "float32", "transformers_version": "4.40.1", "use_fft": 1 }