{ | |
"dropout": 0.1, | |
"hidden_dim": 96, | |
"intermediate_dim": 256, | |
"max_seq_len": 128, | |
"n_head": 4, | |
"n_kv_head": 2, | |
"n_layer": 3, | |
"vocab_size": 1024 | |
} |
{ | |
"dropout": 0.1, | |
"hidden_dim": 96, | |
"intermediate_dim": 256, | |
"max_seq_len": 128, | |
"n_head": 4, | |
"n_kv_head": 2, | |
"n_layer": 3, | |
"vocab_size": 1024 | |
} |