| from packaging import version | |
| import transformers | |
| if version.parse(transformers.__version__) < version.parse("4.31.0"): | |
| raise ImportError( | |
| f"You are using transformers=={transformers.__version__}, but transformers>=4.31.0 is required to use DeciCoder. Please upgrade transformers." | |
| ) | |
| from transformers.models.llama.configuration_llama import LlamaConfig | |
| from transformers.utils import logging | |
| logger = logging.get_logger(__name__) | |
| LLAMA_PRETRAINED_CONFIG_ARCHIVE_MAP = {} | |
| class DeciCoderConfig(LlamaConfig): | |
| r""" | |
| This is the configuration class to store the configuration of a [`LlamaModel`]. It is used to instantiate an LLaMA | |
| model according to the specified arguments, defining the model architecture. Instantiating a configuration with the | |
| defaults will yield a similar configuration to that of the LLaMA-7B. | |
| Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the | |
| documentation from [`PretrainedConfig`] for more information. | |
| Args: | |
| naive_attention_prefill (`bool`, *optional*, defaults to False): | |
| Whether to use naive matmul or scaled dot product attention during prefill. | |
| naive_attention_decode_batched (`bool`, *optional*, defaults to True): | |
| Whether to use naive matmul or scaled dot product attention during decode for batch_size > 1. | |
| naive_attention_decode_single (`bool`, *optional*, defaults to False): | |
| Whether to use naive matmul or scaled dot product attention during decode for batch_size == 1. | |
| ```""" | |
| keys_to_ignore_at_inference = ["past_key_values"] | |
| def __init__( | |
| self, | |
| naive_attention_prefill: bool = False, | |
| naive_attention_decode_batched: bool = True, | |
| naive_attention_decode_single: bool = False, | |
| **kwargs, | |
| ): | |
| self.naive_attention_prefill = naive_attention_prefill | |
| self.naive_attention_decode_batched = naive_attention_decode_batched | |
| self.naive_attention_decode_single = naive_attention_decode_single | |
| super().__init__(**kwargs,) | |