Update configuration_xlm_roberta.py
Browse files
configuration_xlm_roberta.py
CHANGED
|
@@ -21,6 +21,7 @@ class XLMRobertaFlashConfig(PretrainedConfig):
|
|
| 21 |
position_embedding_type="absolute",
|
| 22 |
use_cache=True,
|
| 23 |
classifier_dropout=None,
|
|
|
|
| 24 |
**kwargs,
|
| 25 |
):
|
| 26 |
super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
|
|
@@ -39,4 +40,6 @@ class XLMRobertaFlashConfig(PretrainedConfig):
|
|
| 39 |
self.layer_norm_eps = layer_norm_eps
|
| 40 |
self.position_embedding_type = position_embedding_type
|
| 41 |
self.use_cache = use_cache
|
| 42 |
-
self.classifier_dropout = classifier_dropout
|
|
|
|
|
|
|
|
|
| 21 |
position_embedding_type="absolute",
|
| 22 |
use_cache=True,
|
| 23 |
classifier_dropout=None,
|
| 24 |
+
use_flash_attn=True
|
| 25 |
**kwargs,
|
| 26 |
):
|
| 27 |
super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
|
|
|
|
| 40 |
self.layer_norm_eps = layer_norm_eps
|
| 41 |
self.position_embedding_type = position_embedding_type
|
| 42 |
self.use_cache = use_cache
|
| 43 |
+
self.classifier_dropout = classifier_dropout
|
| 44 |
+
self.use_flash_attn = use_flash_attn
|
| 45 |
+
|