{ "metadata": { "ParamSize": 1037, "ParamBytes": 1795522624.0, "BitsPerParam": 4.507973154540893 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 65536000, "records": [ { "name": "language_model.lm_head.linear.q_weight", "shape": [ 51200, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "fadf435e1cd14c20e2ba7b405c8601ce" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 32179040, "records": [ { "name": "language_model.lm_head.linear.bias", "shape": [ 51200 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 0 }, { "name": "language_model.lm_head.linear.q_scale", "shape": [ 51200, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192000, "byteOffset": 102400 }, { "name": "language_model.lm_head.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8294400 }, { "name": "language_model.lm_head.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8299520 }, { "name": "multi_modal_projector.linear_1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8304640 }, { "name": "multi_modal_projector.linear_1.q_weight", "shape": [ 2560, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1474560, "byteOffset": 8309760 }, { "name": "multi_modal_projector.linear_1.q_scale", "shape": [ 2560, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 184320, "byteOffset": 9784320 }, { "name": "multi_modal_projector.linear_2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9968640 }, { "name": "multi_modal_projector.linear_2.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9973760 }, { "name": "multi_modal_projector.linear_2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13250560 }, { "name": "vision_tower.vision_model.encoder.layers.23.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 13660160 }, { "name": "vision_tower.vision_model.encoder.layers.23.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 13662464 }, { "name": "vision_tower.vision_model.encoder.layers.23.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 13664768 }, { "name": "vision_tower.vision_model.encoder.layers.23.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 13667072 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 13669376 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 13677984 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 16157088 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16466976 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 16469280 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 18957600 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19268640 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 19270944 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 19934496 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20017440 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 20019744 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 20683296 }, { "name": "vision_tower.vision_model.encoder.layers.24.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20766240 }, { "name": "vision_tower.vision_model.encoder.layers.24.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20768544 }, { "name": "vision_tower.vision_model.encoder.layers.24.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20770848 }, { "name": "vision_tower.vision_model.encoder.layers.24.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20773152 }, { "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 20775456 }, { "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 20784064 }, { "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 23263168 }, { "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23573056 }, { "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 23575360 }, { "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 26063680 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 26374720 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 26377024 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 27040576 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27123520 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 27125824 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 27789376 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27872320 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 27874624 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 28538176 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28621120 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 28623424 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 29286976 }, { "name": "vision_tower.vision_model.encoder.layers.25.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 29369920 }, { "name": "vision_tower.vision_model.encoder.layers.25.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 29372224 }, { "name": "vision_tower.vision_model.encoder.layers.25.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 29374528 }, { "name": "vision_tower.vision_model.encoder.layers.25.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 29376832 }, { "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 29379136 }, { "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 29387744 }, { "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 31866848 }, { "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32176736 } ], "md5sum": "dec6f41f4a62d1565aa2d5b9c069b4a4" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 65536000, "records": [ { "name": "language_model.transformer.embd.q_weight", "shape": [ 51200, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "21a3f3014f0bb1e817c0d8fde5a20645" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 28783360, "records": [ { "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 2488320 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2799360 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 2801664 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 3465216 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 3548160 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 3550464 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 4214016 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4296960 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 4299264 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 4962816 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5045760 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 5048064 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 5711616 }, { "name": "language_model.transformer.embd.q_scale", "shape": [ 51200, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192000, "byteOffset": 5794560 }, { "name": "language_model.transformer.h.0.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13986560 }, { "name": "language_model.transformer.h.0.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13991680 }, { "name": "language_model.transformer.h.0.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13996800 }, { "name": "language_model.transformer.h.0.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14012160 }, { "name": "language_model.transformer.h.0.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 23842560 }, { "name": "language_model.transformer.h.0.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25071360 }, { "name": "language_model.transformer.h.0.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25076480 }, { "name": "language_model.transformer.h.0.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 28353280 }, { "name": "language_model.transformer.h.0.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 28762880 } ], "md5sum": "73b52c2f33960234875bb964046dff22" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.0.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.0.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.0.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.0.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.0.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.1.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.1.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.1.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "3a04d0bec6bc32979c95889f06afa609" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.1.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.1.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.1.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.1.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.1.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.1.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.1.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.1.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.1.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "90fc612002721e54adc549d5d0595f74" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.1.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.1.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.2.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.2.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.2.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.2.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.2.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.2.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.2.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.2.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.2.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "35252afd67e9272268389ea88b6acff3" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.2.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.2.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.2.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.2.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.2.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.3.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.3.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.3.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "7efd55f81ed2f390c08a0e6e56485cb2" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.3.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.3.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.3.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.3.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.3.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.3.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.3.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.3.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.3.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "740aee48340d4dbfd1a39072eadb1d99" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.3.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.3.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.4.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.4.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.4.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.4.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.4.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.4.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.4.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.4.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.4.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "02a469ce5f3c5ad5a20e7753ccc5d350" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.4.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.4.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.10.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.10.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.10.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.10.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.10.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.10.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.10.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.10.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.10.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "8e82aa68f97432a0a849ec6ca622376d" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 29511680, "records": [ { "name": "language_model.transformer.h.10.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.10.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.10.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.10.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.10.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.11.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.11.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.4.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29506560 } ], "md5sum": "32c14038f7043018e702add472e24ff9" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.4.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.4.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.5.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.5.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.5.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.5.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.5.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.5.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.5.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.5.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.5.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "9caa0988986109d0cc249a229247f9c7" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.5.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.5.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.5.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.5.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.5.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.6.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.6.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.6.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "bef5a994f97c09bdda90d0df4b112316" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.6.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.6.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.6.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.6.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.6.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.6.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.6.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.6.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.6.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "c638dcecc57d9f1ae790d3626a4ea55a" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.6.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.6.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.7.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.7.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.7.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.7.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.7.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.7.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.7.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.7.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.7.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "c33c9b116e483a40242758fdd003a97d" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.7.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.7.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.7.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.7.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.7.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.8.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.8.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.8.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "244d40f5ad7ac03ac9eb99465486861c" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.8.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.8.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.8.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.8.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.8.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.8.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.8.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.8.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.8.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "27321f471a336d51bbb4171bdbfe192b" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.8.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.8.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.9.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.9.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.9.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.9.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.9.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.9.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.9.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.9.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.9.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "56c0e526484d57b9d7addb69eec7564d" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 29511680, "records": [ { "name": "language_model.transformer.h.9.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.9.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.9.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.9.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.9.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.11.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29496320 } ], "md5sum": "6dd910dd52768f7c81f551115c9abe1a" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.11.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.11.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.11.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.11.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.11.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.11.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.11.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.11.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.11.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "62d123a640b8873dca516471c6feb8d6" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.11.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.11.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.12.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.12.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.12.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.12.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.12.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.12.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.12.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.12.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.12.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "5c8a11846b4a726a1b5af8f9d3b5796b" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.12.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.12.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.12.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.12.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.12.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.13.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.13.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.13.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "8b192b1a89f6308f6a7c7a8c25e6adaa" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.13.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.13.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.13.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.13.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.13.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.13.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.13.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.13.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.13.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "89060d94809a74f50df1743de1826eef" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.13.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.13.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.14.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.14.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.14.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.14.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.14.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.14.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.14.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.14.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.14.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "f3e6c653fdbd458f28b735fc1efb97d4" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.14.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.14.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.14.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.14.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.14.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.15.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.15.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.15.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "ce6d2fcf8d66c6b3953e4bf09c874993" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.15.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.15.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.15.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.15.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.15.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.15.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.15.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.15.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.15.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "d43e493976a3db8b6391c019ae153560" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.15.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.15.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.16.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.16.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.16.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.16.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.16.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.16.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.16.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.16.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.16.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "5f685d617a44b9fcc46c3fdf1fffce2a" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.16.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.16.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.16.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.16.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.16.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.17.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.17.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.17.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "ef6f522ee72a61ee7254fb6a66d63b89" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.17.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.17.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.17.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.17.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.17.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.17.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.17.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.17.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.17.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "8a2d236fb7c43b8c064ad918c11334a7" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.17.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.17.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.18.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.18.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.18.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.18.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.18.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.18.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.18.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.18.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.18.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "7b96c83598ede1a902e941eb8bca9200" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.18.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.18.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.18.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.18.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.18.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.19.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.19.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.19.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "c45627d997e47970cbe5d0a1fd408d3c" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.19.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.19.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.19.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.19.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.19.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.19.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.19.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.19.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.19.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "fc8440038dadd8e500eba9a52d79581a" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.19.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.19.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.20.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.20.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.20.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.20.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.20.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.20.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.20.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.20.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.20.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "eb457ee0497d6145363a78d7cbc7354a" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.20.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.20.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.20.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.20.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.20.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.21.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.21.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.21.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "246916a0dae41d9c1582a3c265168b43" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.21.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.21.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.21.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.21.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.21.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.21.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.21.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.21.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.21.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "c987f1238d665627a4200c38b9abdc4a" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.21.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.21.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.22.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.22.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.22.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.22.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.22.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.22.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.22.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.22.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.22.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "8b9fe909e9d752d1761ec5fd919fbeff" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.22.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.22.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.22.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.22.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.22.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.23.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.23.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.23.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "cfda446123ecfd69721edd6d8480635d" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.23.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.23.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.23.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.23.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.23.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.23.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.23.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.23.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.23.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "81260c7eca1369d16714ea5b2b98674e" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.23.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.23.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.24.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.24.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.24.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.24.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.24.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.24.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.24.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.24.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.24.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "cd81f286493bdb08701941abdde07c9f" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.24.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.24.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.24.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.24.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.24.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.25.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.25.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.25.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "ebf75ad168aa6ceabad07620f6f3d81a" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.25.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.25.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.25.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.25.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.25.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.25.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.25.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.25.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.25.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "f4c5fb04cba717fc29e7ca93c362ec81" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.25.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.25.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.26.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.26.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.26.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.26.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.26.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.26.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.26.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.26.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.26.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "0670b438ad5d757a9583f1e3d36c4112" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.26.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.26.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.26.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.26.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.26.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.27.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.27.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.27.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "1f6633ab003d9d7412e7f64ff1e0ae3e" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.27.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.27.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.27.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.27.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.27.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.27.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.27.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.27.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.27.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "c4be7a752495e084a88b2ed4426ccd54" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.27.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.27.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.28.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.28.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.28.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.28.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.28.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.28.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.28.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.28.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.28.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "528d818163ff27fe182c03138ef79cbe" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.28.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.28.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.28.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.28.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.28.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.29.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.29.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.29.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "b17a3ab0c7f8695be6286fcab094fff8" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.29.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.29.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.29.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.29.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.29.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.29.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.29.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.29.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.29.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "65d5e9742585c1f31b2165786104adf6" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.29.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.29.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.30.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.30.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.30.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.30.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.30.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.30.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.30.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.30.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.30.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "61fe182613f6af812f3290cfcf186eea" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.30.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.30.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.30.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.30.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.30.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.31.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.31.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.31.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "4c467689949d1c4c6f693907446cdfe8" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.31.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.31.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.31.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.31.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.31.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.31.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.31.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.31.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.31.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "79689e042e62cd3f5bd5aa6eb636485d" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 33493728, "records": [ { "name": "language_model.transformer.h.31.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.31.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "vision_tower.vision_model.embeddings.patch_embedding.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14745600 }, { "name": "vision_tower.vision_model.embeddings.patch_embedding.weight", "shape": [ 1152, 3, 14, 14 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1354752, "byteOffset": 14747904 }, { "name": "vision_tower.vision_model.embeddings.position_embedding.q_weight", "shape": [ 256, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 16102656 }, { "name": "vision_tower.vision_model.embeddings.position_embedding.q_scale", "shape": [ 256, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18432, "byteOffset": 16250112 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16268544 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16270848 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16273152 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16275456 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 16277760 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 16286368 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 18765472 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19075360 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 19077664 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 21565984 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 21877024 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 21879328 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 22542880 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 22625824 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 22628128 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 23291680 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23374624 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 23376928 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 24040480 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24123424 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 24125728 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 24789280 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24872224 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24874528 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24876832 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24879136 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 24881440 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 24890048 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 27369152 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27679040 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 27681344 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 30169664 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 30480704 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 30483008 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 31146560 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 31229504 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 31231808 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 31895360 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 31978304 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 31980608 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 32644160 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32727104 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 32729408 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 33392960 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 33475904 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 33478208 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 33480512 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 33482816 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 33485120 } ], "md5sum": "c1bdcf018c4450b60a79489027584354" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 32901600, "records": [ { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 2479104 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2788992 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 2791296 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 5279616 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5590656 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 5592960 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 6256512 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 6339456 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 6341760 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 7005312 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7088256 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7090560 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 7754112 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7837056 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7839360 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 8502912 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8585856 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8588160 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8590464 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8592768 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 8595072 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 8603680 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 11082784 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11392672 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 11394976 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 13883296 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14194336 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 14196640 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 14860192 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14943136 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 14945440 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 15608992 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 15691936 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 15694240 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 16357792 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16440736 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 16443040 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 17106592 }, { "name": "vision_tower.vision_model.encoder.layers.12.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17189536 }, { "name": "vision_tower.vision_model.encoder.layers.12.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17191840 }, { "name": "vision_tower.vision_model.encoder.layers.12.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17194144 }, { "name": "vision_tower.vision_model.encoder.layers.12.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17196448 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 17198752 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 17207360 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 19686464 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19996352 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 19998656 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 22486976 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 22798016 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 22800320 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 23463872 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23546816 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 23549120 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 24212672 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24295616 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 24297920 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 24961472 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25044416 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 25046720 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 25710272 }, { "name": "vision_tower.vision_model.encoder.layers.13.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25793216 }, { "name": "vision_tower.vision_model.encoder.layers.13.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25795520 }, { "name": "vision_tower.vision_model.encoder.layers.13.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25797824 }, { "name": "vision_tower.vision_model.encoder.layers.13.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25800128 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 25802432 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 25811040 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 28290144 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28600032 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 28602336 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 31090656 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 31401696 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 31404000 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 32067552 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32150496 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 32152800 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 32816352 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32899296 } ], "md5sum": "ba17beee7dcff6649ea089918d143c55" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 32917120, "records": [ { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 663552 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 746496 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 748800 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 1412352 }, { "name": "vision_tower.vision_model.encoder.layers.14.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1495296 }, { "name": "vision_tower.vision_model.encoder.layers.14.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1497600 }, { "name": "vision_tower.vision_model.encoder.layers.14.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1499904 }, { "name": "vision_tower.vision_model.encoder.layers.14.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1502208 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 1504512 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 1513120 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 3992224 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4302112 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 4304416 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 6792736 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7103776 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7106080 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 7769632 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7852576 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7854880 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 8518432 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8601376 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 8603680 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 9267232 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 9350176 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 9352480 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 10016032 }, { "name": "vision_tower.vision_model.encoder.layers.15.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10098976 }, { "name": "vision_tower.vision_model.encoder.layers.15.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10101280 }, { "name": "vision_tower.vision_model.encoder.layers.15.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10103584 }, { "name": "vision_tower.vision_model.encoder.layers.15.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10105888 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 10108192 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 10116800 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 12595904 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 12905792 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 12908096 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 15396416 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 15707456 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 15709760 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 16373312 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16456256 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 16458560 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 17122112 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17205056 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 17207360 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 17870912 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17953856 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 17956160 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 18619712 }, { "name": "vision_tower.vision_model.encoder.layers.16.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 18702656 }, { "name": "vision_tower.vision_model.encoder.layers.16.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 18704960 }, { "name": "vision_tower.vision_model.encoder.layers.16.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 18707264 }, { "name": "vision_tower.vision_model.encoder.layers.16.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 18709568 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 18711872 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 18720480 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 21199584 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 21509472 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 21511776 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 24000096 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24311136 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 24313440 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 24976992 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25059936 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 25062240 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 25725792 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25808736 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 25811040 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 26474592 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 26557536 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 26559840 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 27223392 }, { "name": "vision_tower.vision_model.encoder.layers.17.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27306336 }, { "name": "vision_tower.vision_model.encoder.layers.17.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27308640 }, { "name": "vision_tower.vision_model.encoder.layers.17.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27310944 }, { "name": "vision_tower.vision_model.encoder.layers.17.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27313248 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 27315552 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 27324160 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 29803264 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 30113152 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 30115456 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 32603776 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32914816 } ], "md5sum": "6e196d63f48895def405a7d43ad0e3bd" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 31613056, "records": [ { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 663552 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 746496 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 748800 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 1412352 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1495296 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 1497600 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 2161152 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2244096 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 2246400 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 2909952 }, { "name": "vision_tower.vision_model.encoder.layers.18.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2992896 }, { "name": "vision_tower.vision_model.encoder.layers.18.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2995200 }, { "name": "vision_tower.vision_model.encoder.layers.18.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2997504 }, { "name": "vision_tower.vision_model.encoder.layers.18.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2999808 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 3002112 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 3010720 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 5489824 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5799712 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 5802016 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 8290336 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8601376 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 8603680 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 9267232 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 9350176 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 9352480 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 10016032 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10098976 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 10101280 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 10764832 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10847776 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 10850080 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 11513632 }, { "name": "vision_tower.vision_model.encoder.layers.19.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11596576 }, { "name": "vision_tower.vision_model.encoder.layers.19.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11598880 }, { "name": "vision_tower.vision_model.encoder.layers.19.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11601184 }, { "name": "vision_tower.vision_model.encoder.layers.19.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11603488 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 11605792 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 11614400 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 14093504 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14403392 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 14405696 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 16894016 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17205056 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 17207360 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 17870912 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17953856 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 17956160 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 18619712 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 18702656 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 18704960 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 19368512 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19451456 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 19453760 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 20117312 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20200256 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20202560 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20204864 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20207168 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 20209472 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 20218080 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 22697184 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23007072 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 23009376 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 25497696 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25808736 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 25811040 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 26474592 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 26557536 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 26559840 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 27223392 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27306336 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 27308640 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 27972192 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28055136 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 28057440 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 28720992 }, { "name": "vision_tower.vision_model.encoder.layers.20.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28803936 }, { "name": "vision_tower.vision_model.encoder.layers.20.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28806240 }, { "name": "vision_tower.vision_model.encoder.layers.20.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28808544 }, { "name": "vision_tower.vision_model.encoder.layers.20.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28810848 }, { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 28813152 }, { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 28821760 }, { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 31300864 }, { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 31610752 } ], "md5sum": "d864ea3d1a8877f30ea4d857773d4765" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 33121024, "records": [ { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 2488320 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2799360 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 2801664 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 3465216 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 3548160 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 3550464 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 4214016 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4296960 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 4299264 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 4962816 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5045760 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 5048064 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 5711616 }, { "name": "vision_tower.vision_model.encoder.layers.21.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5794560 }, { "name": "vision_tower.vision_model.encoder.layers.21.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5796864 }, { "name": "vision_tower.vision_model.encoder.layers.21.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5799168 }, { "name": "vision_tower.vision_model.encoder.layers.21.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5801472 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 5803776 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 5812384 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 8291488 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8601376 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 8603680 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 11092000 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11403040 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 11405344 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 12068896 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 12151840 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 12154144 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 12817696 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 12900640 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 12902944 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 13566496 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 13649440 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 13651744 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 14315296 }, { "name": "vision_tower.vision_model.encoder.layers.22.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14398240 }, { "name": "vision_tower.vision_model.encoder.layers.22.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14400544 }, { "name": "vision_tower.vision_model.encoder.layers.22.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14402848 }, { "name": "vision_tower.vision_model.encoder.layers.22.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14405152 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 14407456 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 14416064 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 16895168 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17205056 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 17207360 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 19695680 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20006720 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 20009024 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 20672576 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20755520 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 20757824 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 21421376 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 21504320 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 21506624 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 22170176 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 22253120 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 22255424 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 22918976 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23001920 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 23004224 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 23667776 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23750720 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 23753024 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 24416576 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24499520 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24501824 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24504128 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24506432 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 24508736 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 24517344 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 26996448 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27306336 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 27308640 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 29796960 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 30108000 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 30110304 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 30773856 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 30856800 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 30859104 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 31522656 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 31605600 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 31607904 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 32271456 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32354400 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 32356704 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 33020256 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 33103200 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 33105504 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 33107808 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 33110112 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 33112416 } ], "md5sum": "0a29e36bbdabb1f50b2eae09a70a3c06" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 32901600, "records": [ { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 2479104 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2788992 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 2791296 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 5279616 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5590656 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 5592960 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 6256512 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 6339456 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 6341760 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 7005312 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7088256 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7090560 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 7754112 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7837056 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7839360 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 8502912 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8585856 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8588160 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8590464 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8592768 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 8595072 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 8603680 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 11082784 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11392672 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 11394976 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 13883296 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14194336 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 14196640 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 14860192 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14943136 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 14945440 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 15608992 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 15691936 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 15694240 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 16357792 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16440736 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 16443040 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 17106592 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17189536 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17191840 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17194144 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17196448 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 17198752 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 17207360 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 19686464 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19996352 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 19998656 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 22486976 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 22798016 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 22800320 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 23463872 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23546816 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 23549120 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 24212672 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24295616 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 24297920 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 24961472 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25044416 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 25046720 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 25710272 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25793216 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25795520 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25797824 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25800128 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 25802432 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 25811040 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 28290144 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28600032 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 28602336 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 31090656 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 31401696 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 31404000 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 32067552 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32150496 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 32152800 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 32816352 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32899296 } ], "md5sum": "73999b155901a73b57825b4a62bf5363" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 18702656, "records": [ { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 663552 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 746496 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 748800 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 1412352 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1495296 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1497600 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1499904 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1502208 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 1504512 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 1513120 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 3992224 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4302112 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 4304416 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 6792736 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7103776 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7106080 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 7769632 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7852576 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7854880 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 8518432 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8601376 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 8603680 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 9267232 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 9350176 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 9352480 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 10016032 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10098976 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10101280 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10103584 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10105888 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 10108192 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 10116800 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 12595904 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 12905792 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 12908096 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 15396416 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 15707456 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 15709760 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 16373312 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16456256 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 16458560 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 17122112 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17205056 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 17207360 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 17870912 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17953856 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 17956160 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 18619712 } ], "md5sum": "8610acfcb3bf679640fdb61a35cf2b21" } ] }