razmars commited on
Commit
ca53591
·
verified ·
1 Parent(s): 099fc62

Update configuration_super_linear.py

Browse files
Files changed (1) hide show
  1. configuration_super_linear.py +25 -5
configuration_super_linear.py CHANGED
@@ -30,11 +30,31 @@ class SuperLinearConfig(PretrainedConfig):
30
  pred_len=96,
31
  inf_pred_len=96,
32
  max_horizon=96,
33
- auto_regressive=1,
34
  moe_n_experts=8,
35
  top_k_experts=3,
36
  moe =1,
37
  freq_experts='mean_naive_1/6_1/7_1/8_1/12_1/14_1/16_1/21_1/24_1/28_1/30_1/32_1/36_1/42_1/48_1/52_1/56_1/60_1/72_1/84_1/96_1/120_1/144_1/168_1/180_1/224_1/252_1/288_1/336_1/365_1/504_1/672_1/1008_1/1440_1/2016_1/3600',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  **kwargs, # any extra CLI args
39
  ):
40
  self.seq_len = seq_len
@@ -46,10 +66,10 @@ class SuperLinearConfig(PretrainedConfig):
46
  self.moe_n_experts = moe_n_experts
47
  self.top_k_experts = top_k_experts
48
  self.freq_experts = freq_experts
49
- self.freeze_experts = 1
50
- self.layer_type = "RLinear"
51
- self.linear_checkpoints_path = '/cs/azencot_fsas/MoE/'
52
- self.linear_checkpoints_dir = "checkpoints5"
53
  self.load_linear = load_linear
54
  self.manual_moe = manual_moe
55
  self.misc_moe = misc_moe
 
30
  pred_len=96,
31
  inf_pred_len=96,
32
  max_horizon=96,
 
33
  moe_n_experts=8,
34
  top_k_experts=3,
35
  moe =1,
36
  freq_experts='mean_naive_1/6_1/7_1/8_1/12_1/14_1/16_1/21_1/24_1/28_1/30_1/32_1/36_1/42_1/48_1/52_1/56_1/60_1/72_1/84_1/96_1/120_1/144_1/168_1/180_1/224_1/252_1/288_1/336_1/365_1/504_1/672_1/1008_1/1440_1/2016_1/3600',
37
+ auto_regressive: 1,
38
+ con: 0,
39
+ d_model: 128,
40
+ dropout: 0.0,
41
+ fft_len: 10000,
42
+ freeze_experts: 1,
43
+ ker_len: 50,
44
+ layer_type: "RLinear",
45
+ linear_checkpoints_dir: "checkpoints5",
46
+ linear_checkpoints_path: "/cs/azencot_fsas/MoE/",
47
+ load_linear: 1,
48
+ manual_moe: 0,
49
+ misc_moe: 1,
50
+ mlp_gating: 1,
51
+ model_type: "super_linear",
52
+ moe_temp: 1,
53
+ noisy_gating_std: 0.1,
54
+ noisy_gating_std_decay: 1,
55
+ torch_dtype: "float32",
56
+ transformers_version: "4.40.1",
57
+ use_fft: 1
58
  **kwargs, # any extra CLI args
59
  ):
60
  self.seq_len = seq_len
 
66
  self.moe_n_experts = moe_n_experts
67
  self.top_k_experts = top_k_experts
68
  self.freq_experts = freq_experts
69
+ self.freeze_experts = freeze_experts
70
+ self.layer_type = layer_type
71
+ self.linear_checkpoints_path = linear_checkpoints_dir
72
+ self.linear_checkpoints_dir = linear_checkpoints_dir
73
  self.load_linear = load_linear
74
  self.manual_moe = manual_moe
75
  self.misc_moe = misc_moe