from transformers import PretrainedConfig class ByteGPTConfig(PretrainedConfig): model_type = "ijk_byte_gpt" def __init__( self, vocab_size: int = 259, block_size: int = 128, n_embd: int = 64, n_head: int = 4, n_layer: int = 4, dropout: float = 0.1, use_flash_attention: bool = False, _attn_implementation_autoset: bool = False, **kwargs ): super().__init__(**kwargs) self.auto_map = { "AutoConfig": "configuration_bytegpt.ByteGPTConfig", "AutoModelForCausalLM": "modeling_bytegpt.ByteGPTForCausalLM", } self.vocab_size = vocab_size self.block_size = block_size self.n_embd = n_embd self.n_head = n_head self.n_layer = n_layer self.dropout = dropout self.use_flash_attention = use_flash_attention