{"name": "pythia-410m-deduped", "hf_config": {"org": "EleutherAI", "name": "pythia-410m-deduped"}, "block_size": 2048, "vocab_size": 50254, "padding_multiple": 128, "padded_vocab_size": 50304, "n_layer": 24, "n_head": 16, "n_embd": 1024, "rotary_percentage": 0.25, "parallel_residual": true, "bias": true, "lm_head_bias": false, "n_query_groups": 16, "shared_attention_norm": false, "_norm_class": "LayerNorm", "norm_eps": 1e-05, "_mlp_class": "GptNeoxMLP", "gelu_approximate": "none", "intermediate_size": 4096, "rope_condense_ratio": 1, "rope_base": 10000}