{ "RoPE": 1, "act_method": "swiglu", "architectures": [ "GPT" ], "block_size": 1024, "group_size": 2, "model_type": "custom-gpt2", "n_embd": 1024, "n_head": 16, "n_layer": 24, "norm_method": "rmsnorm", "torch_dtype": "float32", "transformers_version": "4.45.2", "vocab_size": 50257 }