razmars commited on
Commit
7624ca4
·
verified ·
1 Parent(s): 1a32550

Update configuration_super_linear.py

Browse files
Files changed (1) hide show
  1. configuration_super_linear.py +29 -0
configuration_super_linear.py CHANGED
@@ -26,6 +26,35 @@ class SuperLinearConfig(PretrainedConfig):
26
 
27
  def __init__(
28
  self,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  **kwargs, # any extra CLI args
30
  ):
31
  self.seq_len = seq_len
 
26
 
27
  def __init__(
28
  self,
29
+ seq_len=512,
30
+ pred_len=96,
31
+ inf_pred_len=96,
32
+ max_horizon=96,
33
+ moe_n_experts=8,
34
+ top_k_experts=5,
35
+ moe =1,
36
+ freq_experts='mean_naive_1/6_1/7_1/8_1/12_1/14_1/16_1/21_1/24_1/28_1/30_1/32_1/36_1/42_1/48_1/52_1/56_1/60_1/72_1/84_1/96_1/120_1/144_1/168_1/180_1/224_1/252_1/288_1/336_1/365_1/504_1/672_1/1008_1/1440_1/2016_1/3600',
37
+ auto_regressive= 1,
38
+ con= 0,
39
+ d_model= 128,
40
+ dropout= 0.0,
41
+ fft_len= 10000,
42
+ freeze_experts= 1,
43
+ ker_len= 50,
44
+ layer_type= "RLinear",
45
+ linear_checkpoints_dir= "checkpoints5",
46
+ linear_checkpoints_path= "/cs/azencot_fsas/MoE/",
47
+ load_linear = 1,
48
+ manual_moe = 0,
49
+ misc_moe = 1,
50
+ mlp_gating = 1,
51
+ model_type= "super_linear",
52
+ moe_temp = 1,
53
+ noisy_gating_std = 0.1,
54
+ noisy_gating_std_decay = 1,
55
+ torch_dtype = "float32",
56
+ transformers_version = "4.40.1",
57
+ use_fft = 1,
58
  **kwargs, # any extra CLI args
59
  ):
60
  self.seq_len = seq_len