both

Browse files

Files changed (4) hide show

configuration_super_linear.py +2 -9
configuration_super_linear_base.py +84 -0
configuration_super_linear_fs.py +2 -8
modeling_super_linear.py +2 -1

configuration_super_linear.py CHANGED Viewed

@@ -1,21 +1,14 @@
 from typing import Optional, Tuple
 import torch, torch.nn as nn, torch.nn.functional as F
-from transformers import (
-    PretrainedConfig,
-    PreTrainedModel,
-    GenerationMixin,
-    AutoConfig,
-    AutoModelForCausalLM,
-)
-from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions
 # 1) --------------------------------------------------------------------------
 # CONFIG
 # -----------------------------------------------------------------------------
-class SuperLinearConfig(PretrainedConfig):
     """
     Configuration for the SuperLinear MoE time–series foundation model.
     Only *model_type* must be unique inside transformers; the rest mirrors

 from typing import Optional, Tuple
 import torch, torch.nn as nn, torch.nn.functional as F
+from configuration_super_linear_base import SuperLinearConfigBase
 # 1) --------------------------------------------------------------------------
 # CONFIG
 # -----------------------------------------------------------------------------
+class SuperLinearConfig(SuperLinearConfigBase):
     """
     Configuration for the SuperLinear MoE time–series foundation model.
     Only *model_type* must be unique inside transformers; the rest mirrors

configuration_super_linear_base.py ADDED Viewed

	@@ -0,0 +1,84 @@

+from typing import Optional, Tuple
+import torch, torch.nn as nn, torch.nn.functional as F
+from transformers import (
+    PretrainedConfig,
+    PreTrainedModel,
+    GenerationMixin,
+    AutoConfig,
+    AutoModelForCausalLM,
+)
+from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions
+# 1) --------------------------------------------------------------------------
+# CONFIG
+# -----------------------------------------------------------------------------
+class SuperLinearConfigBase(PretrainedConfig):
+    """
+    Configuration for the SuperLinear MoE time–series foundation model.
+    Only *model_type* must be unique inside transformers; the rest mirrors
+    the __init__ arguments of your original Config object.
+    """
+    model_type = "super_linear"
+    def __init__(
+        self,
+        seq_len=512,
+        pred_len=96,
+        inf_pred_len=96,
+        max_horizon=96,
+        moe_n_experts=12,
+        top_k_experts=5,
+        moe =1,
+        freq_experts= 'mean_naive_1/4_1/6_1/7_1/8_1/12_1/14_1/16_1/21_1/24_1/28_1/30_1/32_1/36_1/42_1/48_1/52_1/56_1/60_1/72_1/84_1/90_1/96_1/120_1/144_1/168_1/180_1/224_1/252_1/288_1/336_1/365_1/504_1/672_1/1008_1/1440_1/2016_1/3600',
+        auto_regressive= 1,
+        d_model= 128,
+        dropout= 0.0,
+        fft_len= 5000,
+        freeze_experts= 1,
+        layer_type= "RLinear",
+        linear_checkpoints_dir= "checkpoints5",
+        linear_checkpoints_path= "/cs/azencot_fsas/MoE/",
+        load_linear = 0,
+        load_weights =0,
+        misc_moe = 10,
+        mlp_gating = 0,
+        moe_norm = 1,
+        model_type= "super_linear",
+        moe_temp = 1,
+        noisy_gating_std = 0.1,
+        noisy_gating_std_decay = 1,
+        torch_dtype = "float32",
+        transformers_version = "4.40.1",
+        use_fft = 1,
+        **kwargs,                          # any extra CLI args
+    ):
+        self.seq_len         = seq_len
+        self.moe             = moe
+        self.pred_len        = pred_len
+        self.inf_pred_len    = inf_pred_len
+        self.max_horizon     = max_horizon
+        self.auto_regressive = auto_regressive
+        self.moe_n_experts   = moe_n_experts
+        self.top_k_experts   = top_k_experts
+        self.freq_experts    = freq_experts
+        self.freeze_experts  = freeze_experts
+        self.layer_type      = layer_type
+        self.linear_checkpoints_path  = linear_checkpoints_path
+        self.linear_checkpoints_dir   = linear_checkpoints_dir
+        self.load_linear              = load_linear
+        self.load_weights             = load_weights
+        self.misc_moe                = misc_moe
+        self.noisy_gating_std        = noisy_gating_std
+        self.noisy_gating_std_decay  = noisy_gating_std_decay
+        self.d_model                 = d_model
+        self.mlp_gating              = mlp_gating
+        self.moe_norm                = moe_norm
+        self.moe_temp                = moe_temp
+        self.use_fft                 = use_fft
+        self.fft_len                 = fft_len
+        self.dropout                 = dropout
+        super().__init__(**kwargs)

configuration_super_linear_fs.py CHANGED Viewed

@@ -1,13 +1,7 @@
 from typing import Optional, Tuple
 import torch, torch.nn as nn, torch.nn.functional as F
-from transformers import (
-    PretrainedConfig,
-    PreTrainedModel,
-    GenerationMixin,
-    AutoConfig,
-    AutoModelForCausalLM,
-)
 from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions
 # 1) --------------------------------------------------------------------------
@@ -15,7 +9,7 @@ from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions
 # -----------------------------------------------------------------------------
-class SuperLinearConfigFS(PretrainedConfig):
     """
     Configuration for the SuperLinear MoE time–series foundation model.
     Only *model_type* must be unique inside transformers; the rest mirrors

 from typing import Optional, Tuple
 import torch, torch.nn as nn, torch.nn.functional as F
+from configuration_super_linear_base import SuperLinearConfigBase
 from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions
 # 1) --------------------------------------------------------------------------
 # -----------------------------------------------------------------------------
+class SuperLinearConfigFS(SuperLinearConfigBase):
     """
     Configuration for the SuperLinear MoE time–series foundation model.
     Only *model_type* must be unique inside transformers; the rest mirrors

modeling_super_linear.py CHANGED Viewed

@@ -6,6 +6,7 @@ from transformers                          import (PreTrainedModel,GenerationMix
 from transformers.modeling_outputs         import CausalLMOutputWithCrossAttentions
 from .configuration_super_linear           import SuperLinearConfig
 from .configuration_super_linear_fs        import SuperLinearConfigFS
 from typing import Tuple, Union
@@ -547,7 +548,7 @@ class superLinear(nn.Module):
 "-------------------------------------------------------------------------------------------------------------------"
 class SuperLinearForCausalLM(PreTrainedModel, GenerationMixin):
-    config_class = AutoConfig
     def __init__(self, config: Union[SuperLinearConfig, SuperLinearConfigFS]):
         super().__init__(config)

 from transformers.modeling_outputs         import CausalLMOutputWithCrossAttentions
 from .configuration_super_linear           import SuperLinearConfig
 from .configuration_super_linear_fs        import SuperLinearConfigFS
+from .configuration_super_linear_base      import SuperLinearConfigBase
 from typing import Tuple, Union
 "-------------------------------------------------------------------------------------------------------------------"
 class SuperLinearForCausalLM(PreTrainedModel, GenerationMixin):
+    config_class = SuperLinearConfigBase
     def __init__(self, config: Union[SuperLinearConfig, SuperLinearConfigFS]):
         super().__init__(config)