from transformers import PretrainedConfig class BinaryLLMConfig(PretrainedConfig): model_type = "binaryllm" def __init__( self, vocab_size: int = 65538, hidden_size: int = 512, num_hidden_layers: int = 4, num_attention_heads: int = 4, intermediate_size: int = 2048, max_position_embeddings: int = 2048, dropout: float = 0.1, activation: str = "gelu", bos_token_id: int = 65536, eos_token_id: int = 65537, pad_token_id: int = 65537, **kwargs, ): self.vocab_size = int(vocab_size) self.hidden_size = int(hidden_size) self.num_hidden_layers = int(num_hidden_layers) self.num_attention_heads = int(num_attention_heads) self.intermediate_size = int(intermediate_size) self.max_position_embeddings = int(max_position_embeddings) self.dropout = float(dropout) self.activation = str(activation) self.bos_token_id = int(bos_token_id) self.eos_token_id = int(eos_token_id) self.pad_token_id = int(pad_token_id) super().__init__(**kwargs)