Patent-Test-Radix-65536-AutoTokenizer / configuration_binaryllm.py
PhysiQuanty's picture
AutoTokenizer-Radix-65536
1b9c3af verified
from transformers import PretrainedConfig
class BinaryLLMConfig(PretrainedConfig):
model_type = "binaryllm"
def __init__(
self,
vocab_size: int = 65538,
hidden_size: int = 512,
num_hidden_layers: int = 4,
num_attention_heads: int = 4,
intermediate_size: int = 2048,
max_position_embeddings: int = 2048,
dropout: float = 0.1,
activation: str = "gelu",
bos_token_id: int = 65536,
eos_token_id: int = 65537,
pad_token_id: int = 65537,
**kwargs,
):
self.vocab_size = int(vocab_size)
self.hidden_size = int(hidden_size)
self.num_hidden_layers = int(num_hidden_layers)
self.num_attention_heads = int(num_attention_heads)
self.intermediate_size = int(intermediate_size)
self.max_position_embeddings = int(max_position_embeddings)
self.dropout = float(dropout)
self.activation = str(activation)
self.bos_token_id = int(bos_token_id)
self.eos_token_id = int(eos_token_id)
self.pad_token_id = int(pad_token_id)
super().__init__(**kwargs)