|
|
|
slices: |
|
|
|
- sources: |
|
- model: m-a-p/neo_7b |
|
layer_range: [0, 0] |
|
- model: DewEfresh/neo_7b |
|
layer_range: [3, 3] |
|
- sources: |
|
- model: m-a-p/neo_7b |
|
layer_range: [1, 1] |
|
- model: DewEfresh/neo_7b |
|
layer_range: [3, 3] |
|
- sources: |
|
- model: m-a-p/neo_7b |
|
layer_range: [2, 2] |
|
- model: DewEfresh/neo_7b |
|
layer_range: [3, 3] |
|
|
|
|
|
- sources: |
|
- model: m-a-p/neo_7b |
|
layer_range: [3, 3] |
|
- model: DewEfresh/neo_7b |
|
layer_range: [7, 7] |
|
- sources: |
|
- model: m-a-p/neo_7b |
|
layer_range: [4, 4] |
|
- model: DewEfresh/neo_7b |
|
layer_range: [7, 7] |
|
- sources: |
|
- model: m-a-p/neo_7b |
|
layer_range: [5, 5] |
|
- model: DewEfresh/neo_7b |
|
layer_range: [7, 7] |
|
|
|
|
|
- sources: |
|
- model: m-a-p/neo_7b |
|
layer_range: [6, 6] |
|
- model: DewEfresh/neo_7b |
|
layer_range: [11, 11] |
|
- sources: |
|
- model: m-a-p/neo_7b |
|
layer_range: [7, 7] |
|
- model: DewEfresh/neo_7b |
|
layer_range: [11, 11] |
|
- sources: |
|
- model: m-a-p/neo_7b |
|
layer_range: [8, 8] |
|
- model: DewEfresh/neo_7b |
|
layer_range: [11, 11] |
|
|
|
|
|
- sources: |
|
- model: m-a-p/neo_7b |
|
layer_range: [9, 9] |
|
- model: DewEfresh/neo_7b |
|
layer_range: [15, 15] |
|
- sources: |
|
- model: m-a-p/neo_7b |
|
layer_range: [10, 10] |
|
- model: DewEfresh/neo_7b |
|
layer_range: [15, 15] |
|
- sources: |
|
- model: m-a-p/neo_7b |
|
layer_range: [11, 11] |
|
- model: DewEfresh/neo_7b |
|
layer_range: [15, 15] |
|
|
|
|
|
- sources: |
|
- model: m-a-p/neo_7b |
|
layer_range: [12, 12] |
|
- model: DewEfresh/neo_7b |
|
layer_range: [19, 19] |
|
- sources: |
|
- model: m-a-p/neo_7b |
|
layer_range: [13, 13] |
|
- model: DewEfresh/neo_7b |
|
layer_range: [19, 19] |
|
- sources: |
|
- model: m-a-p/neo_7b |
|
layer_range: [14, 14] |
|
- model: DewEfresh/neo_7b |
|
layer_range: [19, 19] |
|
|
|
|
|
- sources: |
|
- model: m-a-p/neo_7b |
|
layer_range: [15, 15] |
|
- model: DewEfresh/neo_7b |
|
layer_range: [23, 23] |
|
- sources: |
|
- model: m-a-p/neo_7b |
|
layer_range: [16, 16] |
|
- model: DewEfresh/neo_7b |
|
layer_range: [23, 23] |
|
- sources: |
|
- model: m-a-p/neo_7b |
|
layer_range: [17, 17] |
|
- model: DewEfresh/neo_7b |
|
layer_range: [23, 23] |
|
|
|
|
|
- sources: |
|
- model: m-a-p/neo_7b |
|
layer_range: [18, 18] |
|
- model: DewEfresh/neo_7b |
|
layer_range: [27, 27] |
|
- sources: |
|
- model: m-a-p/neo_7b |
|
layer_range: [19, 19] |
|
- model: DewEfresh/neo_7b |
|
layer_range: [27, 27] |
|
- sources: |
|
- model: m-a-p/neo_7b |
|
layer_range: [20, 20] |
|
- model: DewEfresh/neo_7b |
|
layer_range: [27, 27] |
|
|
|
merge_method: slerp |
|
base_model: m-a-p/neo_7b |
|
parameters: |
|
t: |
|
- 0.75 |
|
- 0.25 |
|
dtype: bfloat16 |
|
output_path: ./merged_reduced_map_dewefresh_neo_7b |
|
model_config: |
|
architectures: ["LlamaForCausalLM"] |
|
attention_bias: false |
|
attention_dropout: 0.0 |
|
hidden_act: "silu" |
|
hidden_size: 3072 |
|
intermediate_size: 24576 |
|
max_position_embeddings: 8192 |
|
model_type: "llama" |
|
num_attention_heads: 16 |
|
num_hidden_layers: 21 |
|
num_key_value_heads: 16 |
|
rms_norm_eps: 1e-05 |
|
rope_theta: 10000.0 |
|
use_cache: true |
|
vocab_size: 64256 |
|
|