Update configuration_super_linear.py
Browse files
configuration_super_linear.py
CHANGED
|
@@ -45,6 +45,7 @@ class SuperLinearConfig(PretrainedConfig):
|
|
| 45 |
load_linear = 1,
|
| 46 |
misc_moe = 10,
|
| 47 |
mlp_gating = 0,
|
|
|
|
| 48 |
model_type= "super_linear",
|
| 49 |
moe_temp = 1,
|
| 50 |
noisy_gating_std = 0.1,
|
|
@@ -73,6 +74,7 @@ class SuperLinearConfig(PretrainedConfig):
|
|
| 73 |
self.noisy_gating_std_decay = noisy_gating_std_decay
|
| 74 |
self.d_model = d_model
|
| 75 |
self.mlp_gating = mlp_gating
|
|
|
|
| 76 |
self.moe_temp = moe_temp
|
| 77 |
self.use_fft = use_fft
|
| 78 |
self.fft_len = fft_len
|
|
|
|
| 45 |
load_linear = 1,
|
| 46 |
misc_moe = 10,
|
| 47 |
mlp_gating = 0,
|
| 48 |
+
moe_norm = 1,
|
| 49 |
model_type= "super_linear",
|
| 50 |
moe_temp = 1,
|
| 51 |
noisy_gating_std = 0.1,
|
|
|
|
| 74 |
self.noisy_gating_std_decay = noisy_gating_std_decay
|
| 75 |
self.d_model = d_model
|
| 76 |
self.mlp_gating = mlp_gating
|
| 77 |
+
self.moe_norm = moe_norm
|
| 78 |
self.moe_temp = moe_temp
|
| 79 |
self.use_fft = use_fft
|
| 80 |
self.fft_len = fft_len
|