SequentialLearning
/

SuperLinear

mixture-of-experts

Model card Files Files and versions

razmars commited on Apr 23

Commit

c12ca9e

·

verified ·

1 Parent(s): 1e0be9a

Update modeling_super_linear.py

Files changed (1) hide show

modeling_super_linear.py +1 -15

modeling_super_linear.py CHANGED Viewed

@@ -526,21 +526,7 @@ class SuperLinearForCausalLM(PreTrainedModel, GenerationMixin):
         # backbone returns (B, pred_len, C)
         preds = self.backbone(x_enc)
-        print(F"preds shape: {preds.shape}")
-        #preds = preds[0]
-        print(F"preds shape: {preds.shape}")
-        # if we keep continuous values, treat them as logits directly
-        logits = (preds if self.vocab_size is None else self.lm_head(preds).transpose(1, 2))
-        loss = None
-        if labels is not None:
-            # shift for causal objective
-            shift_logits = logits[..., :-1, :].contiguous()
-            shift_labels = labels[..., 1:].contiguous()
-            loss = F.cross_entropy(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
-        return CausalLMOutputWithCrossAttentions(loss=loss,logits=logits,past_key_values=None,hidden_states=None,attentions=None,)
     def prepare_inputs_for_generation(self, inputs_embeds, past_key_values=None, **kwargs):

         # backbone returns (B, pred_len, C)
         preds = self.backbone(x_enc)
+        return CausalLMOutputWithCrossAttentions(loss=None,logits=preds,past_key_values=None,hidden_states=None,attentions=None,)
     def prepare_inputs_for_generation(self, inputs_embeds, past_key_values=None, **kwargs):