razmars commited on
Commit
4a6928f
·
1 Parent(s): 53d4ba3
configuration_super_linear.py CHANGED
@@ -1,21 +1,14 @@
1
  from typing import Optional, Tuple
2
  import torch, torch.nn as nn, torch.nn.functional as F
 
3
 
4
- from transformers import (
5
- PretrainedConfig,
6
- PreTrainedModel,
7
- GenerationMixin,
8
- AutoConfig,
9
- AutoModelForCausalLM,
10
- )
11
- from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions
12
 
13
  # 1) --------------------------------------------------------------------------
14
  # CONFIG
15
  # -----------------------------------------------------------------------------
16
 
17
 
18
- class SuperLinearConfig(PretrainedConfig):
19
  """
20
  Configuration for the SuperLinear MoE time–series foundation model.
21
  Only *model_type* must be unique inside transformers; the rest mirrors
 
1
  from typing import Optional, Tuple
2
  import torch, torch.nn as nn, torch.nn.functional as F
3
+ from configuration_super_linear_base import SuperLinearConfigBase
4
 
 
 
 
 
 
 
 
 
5
 
6
  # 1) --------------------------------------------------------------------------
7
  # CONFIG
8
  # -----------------------------------------------------------------------------
9
 
10
 
11
+ class SuperLinearConfig(SuperLinearConfigBase):
12
  """
13
  Configuration for the SuperLinear MoE time–series foundation model.
14
  Only *model_type* must be unique inside transformers; the rest mirrors
configuration_super_linear_base.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional, Tuple
2
+ import torch, torch.nn as nn, torch.nn.functional as F
3
+
4
+ from transformers import (
5
+ PretrainedConfig,
6
+ PreTrainedModel,
7
+ GenerationMixin,
8
+ AutoConfig,
9
+ AutoModelForCausalLM,
10
+ )
11
+ from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions
12
+
13
+ # 1) --------------------------------------------------------------------------
14
+ # CONFIG
15
+ # -----------------------------------------------------------------------------
16
+
17
+
18
+ class SuperLinearConfigBase(PretrainedConfig):
19
+ """
20
+ Configuration for the SuperLinear MoE time–series foundation model.
21
+ Only *model_type* must be unique inside transformers; the rest mirrors
22
+ the __init__ arguments of your original Config object.
23
+ """
24
+
25
+ model_type = "super_linear"
26
+
27
+ def __init__(
28
+ self,
29
+ seq_len=512,
30
+ pred_len=96,
31
+ inf_pred_len=96,
32
+ max_horizon=96,
33
+ moe_n_experts=12,
34
+ top_k_experts=5,
35
+ moe =1,
36
+ freq_experts= 'mean_naive_1/4_1/6_1/7_1/8_1/12_1/14_1/16_1/21_1/24_1/28_1/30_1/32_1/36_1/42_1/48_1/52_1/56_1/60_1/72_1/84_1/90_1/96_1/120_1/144_1/168_1/180_1/224_1/252_1/288_1/336_1/365_1/504_1/672_1/1008_1/1440_1/2016_1/3600',
37
+ auto_regressive= 1,
38
+ d_model= 128,
39
+ dropout= 0.0,
40
+ fft_len= 5000,
41
+ freeze_experts= 1,
42
+ layer_type= "RLinear",
43
+ linear_checkpoints_dir= "checkpoints5",
44
+ linear_checkpoints_path= "/cs/azencot_fsas/MoE/",
45
+ load_linear = 0,
46
+ load_weights =0,
47
+ misc_moe = 10,
48
+ mlp_gating = 0,
49
+ moe_norm = 1,
50
+ model_type= "super_linear",
51
+ moe_temp = 1,
52
+ noisy_gating_std = 0.1,
53
+ noisy_gating_std_decay = 1,
54
+ torch_dtype = "float32",
55
+ transformers_version = "4.40.1",
56
+ use_fft = 1,
57
+ **kwargs, # any extra CLI args
58
+ ):
59
+ self.seq_len = seq_len
60
+ self.moe = moe
61
+ self.pred_len = pred_len
62
+ self.inf_pred_len = inf_pred_len
63
+ self.max_horizon = max_horizon
64
+ self.auto_regressive = auto_regressive
65
+ self.moe_n_experts = moe_n_experts
66
+ self.top_k_experts = top_k_experts
67
+ self.freq_experts = freq_experts
68
+ self.freeze_experts = freeze_experts
69
+ self.layer_type = layer_type
70
+ self.linear_checkpoints_path = linear_checkpoints_path
71
+ self.linear_checkpoints_dir = linear_checkpoints_dir
72
+ self.load_linear = load_linear
73
+ self.load_weights = load_weights
74
+ self.misc_moe = misc_moe
75
+ self.noisy_gating_std = noisy_gating_std
76
+ self.noisy_gating_std_decay = noisy_gating_std_decay
77
+ self.d_model = d_model
78
+ self.mlp_gating = mlp_gating
79
+ self.moe_norm = moe_norm
80
+ self.moe_temp = moe_temp
81
+ self.use_fft = use_fft
82
+ self.fft_len = fft_len
83
+ self.dropout = dropout
84
+ super().__init__(**kwargs)
configuration_super_linear_fs.py CHANGED
@@ -1,13 +1,7 @@
1
  from typing import Optional, Tuple
2
  import torch, torch.nn as nn, torch.nn.functional as F
3
 
4
- from transformers import (
5
- PretrainedConfig,
6
- PreTrainedModel,
7
- GenerationMixin,
8
- AutoConfig,
9
- AutoModelForCausalLM,
10
- )
11
  from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions
12
 
13
  # 1) --------------------------------------------------------------------------
@@ -15,7 +9,7 @@ from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions
15
  # -----------------------------------------------------------------------------
16
 
17
 
18
- class SuperLinearConfigFS(PretrainedConfig):
19
  """
20
  Configuration for the SuperLinear MoE time–series foundation model.
21
  Only *model_type* must be unique inside transformers; the rest mirrors
 
1
  from typing import Optional, Tuple
2
  import torch, torch.nn as nn, torch.nn.functional as F
3
 
4
+ from configuration_super_linear_base import SuperLinearConfigBase
 
 
 
 
 
 
5
  from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions
6
 
7
  # 1) --------------------------------------------------------------------------
 
9
  # -----------------------------------------------------------------------------
10
 
11
 
12
+ class SuperLinearConfigFS(SuperLinearConfigBase):
13
  """
14
  Configuration for the SuperLinear MoE time–series foundation model.
15
  Only *model_type* must be unique inside transformers; the rest mirrors
modeling_super_linear.py CHANGED
@@ -6,6 +6,7 @@ from transformers import (PreTrainedModel,GenerationMix
6
  from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions
7
  from .configuration_super_linear import SuperLinearConfig
8
  from .configuration_super_linear_fs import SuperLinearConfigFS
 
9
  from typing import Tuple, Union
10
 
11
 
@@ -547,7 +548,7 @@ class superLinear(nn.Module):
547
  "-------------------------------------------------------------------------------------------------------------------"
548
  class SuperLinearForCausalLM(PreTrainedModel, GenerationMixin):
549
 
550
- config_class = AutoConfig
551
 
552
  def __init__(self, config: Union[SuperLinearConfig, SuperLinearConfigFS]):
553
  super().__init__(config)
 
6
  from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions
7
  from .configuration_super_linear import SuperLinearConfig
8
  from .configuration_super_linear_fs import SuperLinearConfigFS
9
+ from .configuration_super_linear_base import SuperLinearConfigBase
10
  from typing import Tuple, Union
11
 
12
 
 
548
  "-------------------------------------------------------------------------------------------------------------------"
549
  class SuperLinearForCausalLM(PreTrainedModel, GenerationMixin):
550
 
551
+ config_class = SuperLinearConfigBase
552
 
553
  def __init__(self, config: Union[SuperLinearConfig, SuperLinearConfigFS]):
554
  super().__init__(config)