stickbreaking-3b / config.json
Shawn Tan
New model files with new dolomite-engine.
8c1b2ba
raw
history blame contribute delete
874 Bytes
{
"activation_function": "swiglu",
"add_bias": false,
"add_qkv_bias": false,
"attention_head_type": "mha",
"attention_multiplier": 0.015625,
"attention_softmax_in_fp32": true,
"attn_pdrop": 0,
"bos_token_id": 0,
"embd_pdrop": 0,
"eos_token_id": 0,
"init_method": "mup",
"initializer_range": 0.1,
"layer_norm_epsilon": 1e-05,
"m_emb": 12,
"m_residual": 0.22,
"m_width": 9,
"model_type": "stickbreaking",
"multi_query": false,
"n_embd": 2304,
"n_head": 36,
"n_inner": 9216,
"n_layer": 40,
"n_positions": 4096,
"normalization_function": "rmsnorm",
"num_key_value_heads": 36,
"pad_token_id": 50256,
"position_embedding_type": "nope",
"resid_pdrop": 0,
"sb_remainder": false,
"scale_attn_weights": true,
"transformers_version": "4.45.0",
"upcast_logits_for_loss": true,
"use_cache": true,
"vocab_size": 50304
}