SequentialLearning
/

SuperLinear

mixture-of-experts

Model card Files Files and versions

razmars commited on Apr 27, 2025

Commit

b8743eb

·

verified ·

1 Parent(s): aa9daa3

Update modeling_super_linear.py

Files changed (1) hide show

modeling_super_linear.py +25 -1

modeling_super_linear.py CHANGED Viewed

@@ -597,6 +597,29 @@ class SuperLinearForCausalLM(PreTrainedModel, GenerationMixin):
         return y
     def forward(self,
                 inputs_embeds: torch.Tensor = None,
                 attention_mask: Optional[torch.Tensor] = None,
@@ -620,11 +643,12 @@ class SuperLinearForCausalLM(PreTrainedModel, GenerationMixin):
         self.backbone.inf_pred_len = 336
         # backbone returns (B, pred_len, C)
         preds = self.backbone(x_enc)
         preds = self.revin_layer(preds, 'denorm')
         return CausalLMOutputWithCrossAttentions(loss=None,logits=preds,past_key_values=None,hidden_states=None,attentions=None,)

         return y
+    def fourier_downsample_dim1(self,x, target_len: int):
+        # 1. Forward real FFT along dim-1
+        X = torch.fft.rfft(x, dim=1)                    # shape (..., L//2 + 1, ...)
+        # 2. Keep only the low-frequency bins needed for the shorter series
+        keep = target_len // 2 + 1                      # rfft size for the target grid
+        X_crop = X[..., :keep]                          # ideal brick-wall low-pass
+        # 3. Inverse FFT to the shorter grid
+        y = torch.fft.irfft(X_crop, n=target_len, dim=1)
+        # 4. Renormalise amplitudes:
+        #    irfft divides by `target_len`, whereas the forward rfft used length `L`.
+        #    Multiply by (target_len / L) so DC and low-freq amplitudes match input.
+        #y *= target_len / L
+        return y
     def forward(self,
                 inputs_embeds: torch.Tensor = None,
                 attention_mask: Optional[torch.Tensor] = None,
         self.backbone.inf_pred_len = 336
         # backbone returns (B, pred_len, C)
         preds = self.backbone(x_enc)
         preds = self.revin_layer(preds, 'denorm')
+        preds = self.fourier_downsample_dim1(preds,96)
         return CausalLMOutputWithCrossAttentions(loss=None,logits=preds,past_key_values=None,hidden_states=None,attentions=None,)