diff --git "a/DeepSeek_FFN_PF_lut6_chunk_01of08.mlmodelc/model.mil" "b/DeepSeek_FFN_PF_lut6_chunk_01of08.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/DeepSeek_FFN_PF_lut6_chunk_01of08.mlmodelc/model.mil" @@ -0,0 +1,1687 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3404.16.1"}, {"coremlc-version", "3404.23.1"}})] +{ + func infer(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { + tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12583040))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12648640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15794432))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15810880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18956672))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18973120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63013376))))[name = string("model_model_layers_0_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63242816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107283072))))[name = string("model_model_layers_0_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107512512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151552768))))[name = string("model_model_layers_0_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151618368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164201344))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164266944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167412736))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167429184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170574976))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170591424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214631680))))[name = string("model_model_layers_1_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214861120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258901376))))[name = string("model_model_layers_1_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259130816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303171072))))[name = string("model_model_layers_1_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303236672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315819648))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315885248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319031040))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319047488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322193280))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322209728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366249984))))[name = string("model_model_layers_2_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366479424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410519680))))[name = string("model_model_layers_2_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410749120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454789376))))[name = string("model_model_layers_2_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454854976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(467437952))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(467503552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470649344))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470665792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473811584))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473828032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517868288))))[name = string("model_model_layers_3_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518097728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562137984))))[name = string("model_model_layers_3_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562367424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606407680))))[name = string("model_model_layers_3_mlp_down_proj_weight_palettized")]; + int32 var_35 = const()[name = string("op_35"), val = int32(-1)]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = current_pos, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)]; + tensor add_0 = add(x = current_pos, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = current_pos, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 var_144_axis_0 = const()[name = string("op_144_axis_0"), val = int32(1)]; + int32 var_144_batch_dims_0 = const()[name = string("op_144_batch_dims_0"), val = int32(0)]; + bool var_144_validate_indices_0 = const()[name = string("op_144_validate_indices_0"), val = bool(false)]; + tensor var_40_to_fp16 = const()[name = string("op_40_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606473280)))]; + tensor var_144_cast_fp16 = gather(axis = var_144_axis_0, batch_dims = var_144_batch_dims_0, indices = select_0, validate_indices = var_144_validate_indices_0, x = var_40_to_fp16)[name = string("op_144_cast_fp16")]; + tensor var_145 = const()[name = string("op_145"), val = tensor([1, 1, 1, -1])]; + tensor sin_1_cast_fp16 = reshape(shape = var_145, x = var_144_cast_fp16)[name = string("sin_1_cast_fp16")]; + int32 var_149_axis_0 = const()[name = string("op_149_axis_0"), val = int32(1)]; + int32 var_149_batch_dims_0 = const()[name = string("op_149_batch_dims_0"), val = int32(0)]; + bool var_149_validate_indices_0 = const()[name = string("op_149_validate_indices_0"), val = bool(false)]; + tensor var_34_to_fp16 = const()[name = string("op_34_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640027776)))]; + tensor var_149_cast_fp16 = gather(axis = var_149_axis_0, batch_dims = var_149_batch_dims_0, indices = select_0, validate_indices = var_149_validate_indices_0, x = var_34_to_fp16)[name = string("op_149_cast_fp16")]; + tensor var_150 = const()[name = string("op_150"), val = tensor([1, 1, 1, -1])]; + tensor cos_1_cast_fp16 = reshape(shape = var_150, x = var_149_cast_fp16)[name = string("cos_1_cast_fp16")]; + tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; + bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; + tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; + tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; + tensor var_158_axes_0 = const()[name = string("op_158_axes_0"), val = tensor([-1])]; + tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(673582272)))]; + fp16 var_30_to_fp16 = const()[name = string("op_30_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_158_cast_fp16 = layer_norm(axes = var_158_axes_0, epsilon = var_30_to_fp16, gamma = model_model_layers_0_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_158_cast_fp16")]; + tensor var_161 = const()[name = string("op_161"), val = tensor([0, 2, 1])]; + tensor var_163_axes_0 = const()[name = string("op_163_axes_0"), val = tensor([2])]; + tensor var_162 = transpose(perm = var_161, x = var_158_cast_fp16)[name = string("transpose_15")]; + tensor var_163 = expand_dims(axes = var_163_axes_0, x = var_162)[name = string("op_163")]; + string var_170_pad_type_0 = const()[name = string("op_170_pad_type_0"), val = string("valid")]; + tensor var_170_strides_0 = const()[name = string("op_170_strides_0"), val = tensor([1, 1])]; + tensor var_170_pad_0 = const()[name = string("op_170_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_170_dilations_0 = const()[name = string("op_170_dilations_0"), val = tensor([1, 1])]; + int32 var_170_groups_0 = const()[name = string("op_170_groups_0"), val = int32(1)]; + tensor var_170 = conv(dilations = var_170_dilations_0, groups = var_170_groups_0, pad = var_170_pad_0, pad_type = var_170_pad_type_0, strides = var_170_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_163)[name = string("op_170")]; + tensor var_171 = const()[name = string("op_171"), val = tensor([1, 32, 1, 128])]; + tensor var_172 = reshape(shape = var_171, x = var_170)[name = string("op_172")]; + string var_179_pad_type_0 = const()[name = string("op_179_pad_type_0"), val = string("valid")]; + tensor var_179_strides_0 = const()[name = string("op_179_strides_0"), val = tensor([1, 1])]; + tensor var_179_pad_0 = const()[name = string("op_179_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_179_dilations_0 = const()[name = string("op_179_dilations_0"), val = tensor([1, 1])]; + int32 var_179_groups_0 = const()[name = string("op_179_groups_0"), val = int32(1)]; + tensor var_179 = conv(dilations = var_179_dilations_0, groups = var_179_groups_0, pad = var_179_pad_0, pad_type = var_179_pad_type_0, strides = var_179_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_163)[name = string("op_179")]; + tensor var_180 = const()[name = string("op_180"), val = tensor([1, 8, 1, 128])]; + tensor var_181 = reshape(shape = var_180, x = var_179)[name = string("op_181")]; + string var_188_pad_type_0 = const()[name = string("op_188_pad_type_0"), val = string("valid")]; + tensor var_188_strides_0 = const()[name = string("op_188_strides_0"), val = tensor([1, 1])]; + tensor var_188_pad_0 = const()[name = string("op_188_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_188_dilations_0 = const()[name = string("op_188_dilations_0"), val = tensor([1, 1])]; + int32 var_188_groups_0 = const()[name = string("op_188_groups_0"), val = int32(1)]; + tensor var_188 = conv(dilations = var_188_dilations_0, groups = var_188_groups_0, pad = var_188_pad_0, pad_type = var_188_pad_type_0, strides = var_188_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_163)[name = string("op_188")]; + tensor var_189 = const()[name = string("op_189"), val = tensor([1, 8, 1, 128])]; + tensor var_190 = reshape(shape = var_189, x = var_188)[name = string("op_190")]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 32, 1, 64])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_172)[name = string("x1_1")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 32, 1, 128])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_172)[name = string("x2_1")]; + tensor cos_3_begin_0 = const()[name = string("cos_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor cos_3_end_0 = const()[name = string("cos_3_end_0"), val = tensor([1, 1, 1, 64])]; + tensor cos_3_end_mask_0 = const()[name = string("cos_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor cos_3_cast_fp16 = slice_by_index(begin = cos_3_begin_0, end = cos_3_end_0, end_mask = cos_3_end_mask_0, x = cos_1_cast_fp16)[name = string("cos_3_cast_fp16")]; + tensor sin_3_begin_0 = const()[name = string("sin_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor sin_3_end_0 = const()[name = string("sin_3_end_0"), val = tensor([1, 1, 1, 64])]; + tensor sin_3_end_mask_0 = const()[name = string("sin_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor sin_3_cast_fp16 = slice_by_index(begin = sin_3_begin_0, end = sin_3_end_0, end_mask = sin_3_end_mask_0, x = sin_1_cast_fp16)[name = string("sin_3_cast_fp16")]; + tensor var_204_cast_fp16 = mul(x = x1_1, y = cos_3_cast_fp16)[name = string("op_204_cast_fp16")]; + tensor var_205_cast_fp16 = mul(x = x2_1, y = sin_3_cast_fp16)[name = string("op_205_cast_fp16")]; + tensor var_206_cast_fp16 = sub(x = var_204_cast_fp16, y = var_205_cast_fp16)[name = string("op_206_cast_fp16")]; + tensor var_207_cast_fp16 = mul(x = x2_1, y = cos_3_cast_fp16)[name = string("op_207_cast_fp16")]; + tensor var_208_cast_fp16 = mul(x = x1_1, y = sin_3_cast_fp16)[name = string("op_208_cast_fp16")]; + tensor var_209_cast_fp16 = add(x = var_207_cast_fp16, y = var_208_cast_fp16)[name = string("op_209_cast_fp16")]; + bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_35, interleave = rotated_1_interleave_0, values = (var_206_cast_fp16, var_209_cast_fp16))[name = string("rotated_1_cast_fp16")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_181)[name = string("x1_3")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_181)[name = string("x2_3")]; + tensor var_225_cast_fp16 = mul(x = x1_3, y = cos_3_cast_fp16)[name = string("op_225_cast_fp16")]; + tensor var_226_cast_fp16 = mul(x = x2_3, y = sin_3_cast_fp16)[name = string("op_226_cast_fp16")]; + tensor var_227_cast_fp16 = sub(x = var_225_cast_fp16, y = var_226_cast_fp16)[name = string("op_227_cast_fp16")]; + tensor var_228_cast_fp16 = mul(x = x2_3, y = cos_3_cast_fp16)[name = string("op_228_cast_fp16")]; + tensor var_229_cast_fp16 = mul(x = x1_3, y = sin_3_cast_fp16)[name = string("op_229_cast_fp16")]; + tensor var_230_cast_fp16 = add(x = var_228_cast_fp16, y = var_229_cast_fp16)[name = string("op_230_cast_fp16")]; + bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_35, interleave = rotated_3_interleave_0, values = (var_227_cast_fp16, var_230_cast_fp16))[name = string("rotated_3_cast_fp16")]; + int32 var_234 = const()[name = string("op_234"), val = int32(1)]; + tensor var_235 = add(x = current_pos, y = var_234)[name = string("op_235")]; + tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; + int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; + bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; + tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; + tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_235, concat_3_values3_0))[name = string("concat_3")]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3_cast_fp16, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_0_write_state")]; + tensor coreml_update_state_8 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_0")]; + tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([32])]; + tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; + tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; + tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([33])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; + tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; + tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; + int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; + bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; + tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_235, concat_7_values3_0))[name = string("concat_7")]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = var_190, x = coreml_update_state_8)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_1_write_state")]; + tensor coreml_update_state_9 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_1")]; + tensor var_250_begin_0 = const()[name = string("op_250_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_250_end_0 = const()[name = string("op_250_end_0"), val = tensor([1, 8, 1024, 128])]; + tensor var_250_end_mask_0 = const()[name = string("op_250_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_250_cast_fp16 = slice_by_index(begin = var_250_begin_0, end = var_250_end_0, end_mask = var_250_end_mask_0, x = coreml_update_state_9)[name = string("op_250_cast_fp16")]; + tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; + tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_250_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; + tensor var_252_begin_0 = const()[name = string("op_252_begin_0"), val = tensor([32, 0, 0, 0])]; + tensor var_252_end_0 = const()[name = string("op_252_end_0"), val = tensor([33, 8, 1024, 128])]; + tensor var_252_end_mask_0 = const()[name = string("op_252_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_252_cast_fp16 = slice_by_index(begin = var_252_begin_0, end = var_252_end_0, end_mask = var_252_end_mask_0, x = coreml_update_state_9)[name = string("op_252_cast_fp16")]; + tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; + tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_252_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; + tensor x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor([1])]; + tensor x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")]; + tensor var_261 = const()[name = string("op_261"), val = tensor([1, 4, 1, 1])]; + tensor x_13_cast_fp16 = tile(reps = var_261, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_265 = const()[name = string("op_265"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_3_cast_fp16 = reshape(shape = var_265, x = x_13_cast_fp16)[name = string("key_states_3_cast_fp16")]; + tensor x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor([1])]; + tensor x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")]; + tensor var_268 = const()[name = string("op_268"), val = tensor([1, 4, 1, 1])]; + tensor x_19_cast_fp16 = tile(reps = var_268, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")]; + tensor var_272 = const()[name = string("op_272"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_3_cast_fp16 = reshape(shape = var_272, x = x_19_cast_fp16)[name = string("value_states_3_cast_fp16")]; + bool var_275_transpose_x_1 = const()[name = string("op_275_transpose_x_1"), val = bool(false)]; + bool var_275_transpose_y_1 = const()[name = string("op_275_transpose_y_1"), val = bool(true)]; + tensor var_275_cast_fp16 = matmul(transpose_x = var_275_transpose_x_1, transpose_y = var_275_transpose_y_1, x = rotated_1_cast_fp16, y = key_states_3_cast_fp16)[name = string("op_275_cast_fp16")]; + fp16 var_276_to_fp16 = const()[name = string("op_276_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_1_cast_fp16 = mul(x = var_275_cast_fp16, y = var_276_to_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; + tensor x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")]; + tensor exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")]; + tensor var_287_axes_0 = const()[name = string("op_287_axes_0"), val = tensor([-1])]; + bool var_287_keep_dims_0 = const()[name = string("op_287_keep_dims_0"), val = bool(true)]; + tensor var_287_cast_fp16 = reduce_sum(axes = var_287_axes_0, keep_dims = var_287_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_287_cast_fp16")]; + tensor attn_weights_3_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_287_cast_fp16)[name = string("attn_weights_3_cast_fp16")]; + bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; + bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = attn_weights_3_cast_fp16, y = value_states_3_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_290_perm_0 = const()[name = string("op_290_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_292 = const()[name = string("op_292"), val = tensor([1, 1, 4096])]; + tensor var_290_cast_fp16 = transpose(perm = var_290_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_14")]; + tensor input_5_cast_fp16 = reshape(shape = var_292, x = var_290_cast_fp16)[name = string("input_5_cast_fp16")]; + tensor model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(673590528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686173504))))[name = string("model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686239104)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; + bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; + tensor mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")]; + tensor input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")]; + tensor var_303_axes_0 = const()[name = string("op_303_axes_0"), val = tensor([-1])]; + tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686247360)))]; + tensor var_303_cast_fp16 = layer_norm(axes = var_303_axes_0, epsilon = var_30_to_fp16, gamma = model_model_layers_0_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_303_cast_fp16")]; + tensor var_310 = const()[name = string("op_310"), val = tensor([0, 2, 1])]; + tensor input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor([2])]; + tensor var_311 = transpose(perm = var_310, x = var_303_cast_fp16)[name = string("transpose_13")]; + tensor input_9 = expand_dims(axes = input_9_axes_0, x = var_311)[name = string("input_9")]; + string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; + tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; + tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; + int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; + tensor input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")]; + string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")]; + tensor up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor([1, 1])]; + tensor up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor([1, 1])]; + int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)]; + tensor up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")]; + tensor gate_states_1 = silu(x = input_11)[name = string("gate_states_1")]; + tensor input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")]; + string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")]; + tensor hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)]; + tensor hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")]; + tensor var_333_axes_0 = const()[name = string("op_333_axes_0"), val = tensor([2])]; + tensor var_333 = squeeze(axes = var_333_axes_0, x = hidden_states_7)[name = string("op_333")]; + tensor var_334 = const()[name = string("op_334"), val = tensor([0, 2, 1])]; + tensor var_335 = transpose(perm = var_334, x = var_333)[name = string("transpose_12")]; + tensor hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_335)[name = string("hidden_states_9_cast_fp16")]; + tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; + bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; + tensor mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")]; + tensor input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")]; + tensor var_343_axes_0 = const()[name = string("op_343_axes_0"), val = tensor([-1])]; + tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686255616)))]; + tensor var_343_cast_fp16 = layer_norm(axes = var_343_axes_0, epsilon = var_30_to_fp16, gamma = model_model_layers_1_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_343_cast_fp16")]; + tensor var_346 = const()[name = string("op_346"), val = tensor([0, 2, 1])]; + tensor var_348_axes_0 = const()[name = string("op_348_axes_0"), val = tensor([2])]; + tensor var_347 = transpose(perm = var_346, x = var_343_cast_fp16)[name = string("transpose_11")]; + tensor var_348 = expand_dims(axes = var_348_axes_0, x = var_347)[name = string("op_348")]; + string var_355_pad_type_0 = const()[name = string("op_355_pad_type_0"), val = string("valid")]; + tensor var_355_strides_0 = const()[name = string("op_355_strides_0"), val = tensor([1, 1])]; + tensor var_355_pad_0 = const()[name = string("op_355_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_355_dilations_0 = const()[name = string("op_355_dilations_0"), val = tensor([1, 1])]; + int32 var_355_groups_0 = const()[name = string("op_355_groups_0"), val = int32(1)]; + tensor var_355 = conv(dilations = var_355_dilations_0, groups = var_355_groups_0, pad = var_355_pad_0, pad_type = var_355_pad_type_0, strides = var_355_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_348)[name = string("op_355")]; + tensor var_356 = const()[name = string("op_356"), val = tensor([1, 32, 1, 128])]; + tensor var_357 = reshape(shape = var_356, x = var_355)[name = string("op_357")]; + string var_364_pad_type_0 = const()[name = string("op_364_pad_type_0"), val = string("valid")]; + tensor var_364_strides_0 = const()[name = string("op_364_strides_0"), val = tensor([1, 1])]; + tensor var_364_pad_0 = const()[name = string("op_364_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_364_dilations_0 = const()[name = string("op_364_dilations_0"), val = tensor([1, 1])]; + int32 var_364_groups_0 = const()[name = string("op_364_groups_0"), val = int32(1)]; + tensor var_364 = conv(dilations = var_364_dilations_0, groups = var_364_groups_0, pad = var_364_pad_0, pad_type = var_364_pad_type_0, strides = var_364_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_348)[name = string("op_364")]; + tensor var_365 = const()[name = string("op_365"), val = tensor([1, 8, 1, 128])]; + tensor var_366 = reshape(shape = var_365, x = var_364)[name = string("op_366")]; + string var_373_pad_type_0 = const()[name = string("op_373_pad_type_0"), val = string("valid")]; + tensor var_373_strides_0 = const()[name = string("op_373_strides_0"), val = tensor([1, 1])]; + tensor var_373_pad_0 = const()[name = string("op_373_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_373_dilations_0 = const()[name = string("op_373_dilations_0"), val = tensor([1, 1])]; + int32 var_373_groups_0 = const()[name = string("op_373_groups_0"), val = int32(1)]; + tensor var_373 = conv(dilations = var_373_dilations_0, groups = var_373_groups_0, pad = var_373_pad_0, pad_type = var_373_pad_type_0, strides = var_373_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_348)[name = string("op_373")]; + tensor var_374 = const()[name = string("op_374"), val = tensor([1, 8, 1, 128])]; + tensor var_375 = reshape(shape = var_374, x = var_373)[name = string("op_375")]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 32, 1, 64])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_357)[name = string("x1_5")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 32, 1, 128])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_357)[name = string("x2_5")]; + tensor var_389_cast_fp16 = mul(x = x1_5, y = cos_3_cast_fp16)[name = string("op_389_cast_fp16")]; + tensor var_390_cast_fp16 = mul(x = x2_5, y = sin_3_cast_fp16)[name = string("op_390_cast_fp16")]; + tensor var_391_cast_fp16 = sub(x = var_389_cast_fp16, y = var_390_cast_fp16)[name = string("op_391_cast_fp16")]; + tensor var_392_cast_fp16 = mul(x = x2_5, y = cos_3_cast_fp16)[name = string("op_392_cast_fp16")]; + tensor var_393_cast_fp16 = mul(x = x1_5, y = sin_3_cast_fp16)[name = string("op_393_cast_fp16")]; + tensor var_394_cast_fp16 = add(x = var_392_cast_fp16, y = var_393_cast_fp16)[name = string("op_394_cast_fp16")]; + bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_35, interleave = rotated_5_interleave_0, values = (var_391_cast_fp16, var_394_cast_fp16))[name = string("rotated_5_cast_fp16")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_366)[name = string("x1_7")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_366)[name = string("x2_7")]; + tensor var_410_cast_fp16 = mul(x = x1_7, y = cos_3_cast_fp16)[name = string("op_410_cast_fp16")]; + tensor var_411_cast_fp16 = mul(x = x2_7, y = sin_3_cast_fp16)[name = string("op_411_cast_fp16")]; + tensor var_412_cast_fp16 = sub(x = var_410_cast_fp16, y = var_411_cast_fp16)[name = string("op_412_cast_fp16")]; + tensor var_413_cast_fp16 = mul(x = x2_7, y = cos_3_cast_fp16)[name = string("op_413_cast_fp16")]; + tensor var_414_cast_fp16 = mul(x = x1_7, y = sin_3_cast_fp16)[name = string("op_414_cast_fp16")]; + tensor var_415_cast_fp16 = add(x = var_413_cast_fp16, y = var_414_cast_fp16)[name = string("op_415_cast_fp16")]; + bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)]; + tensor rotated_7_cast_fp16 = concat(axis = var_35, interleave = rotated_7_interleave_0, values = (var_412_cast_fp16, var_415_cast_fp16))[name = string("rotated_7_cast_fp16")]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; + tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; + tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; + int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; + bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; + tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_10")]; + tensor concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = tensor([0])]; + tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; + int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; + bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; + tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_16, concat_11_values1_0, var_235, concat_11_values3_0))[name = string("concat_11")]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7_cast_fp16, x = coreml_update_state_9)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_2_write_state")]; + tensor coreml_update_state_10 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_2")]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([33])]; + tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([34])]; + int32 concat_14_axis_0 = const()[name = string("concat_14_axis_0"), val = int32(0)]; + bool concat_14_interleave_0 = const()[name = string("concat_14_interleave_0"), val = bool(false)]; + tensor concat_14 = concat(axis = concat_14_axis_0, interleave = concat_14_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_14")]; + tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; + tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (expand_dims_22, concat_15_values1_0, var_235, concat_15_values3_0))[name = string("concat_15")]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = var_375, x = coreml_update_state_10)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_3_write_state")]; + tensor coreml_update_state_11 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_3")]; + tensor var_435_begin_0 = const()[name = string("op_435_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_435_end_0 = const()[name = string("op_435_end_0"), val = tensor([2, 8, 1024, 128])]; + tensor var_435_end_mask_0 = const()[name = string("op_435_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_435_cast_fp16 = slice_by_index(begin = var_435_begin_0, end = var_435_end_0, end_mask = var_435_end_mask_0, x = coreml_update_state_11)[name = string("op_435_cast_fp16")]; + tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; + tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_435_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; + tensor var_437_begin_0 = const()[name = string("op_437_begin_0"), val = tensor([33, 0, 0, 0])]; + tensor var_437_end_0 = const()[name = string("op_437_end_0"), val = tensor([34, 8, 1024, 128])]; + tensor var_437_end_mask_0 = const()[name = string("op_437_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_437_cast_fp16 = slice_by_index(begin = var_437_begin_0, end = var_437_end_0, end_mask = var_437_end_mask_0, x = coreml_update_state_11)[name = string("op_437_cast_fp16")]; + tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; + tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_437_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; + tensor x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor([1])]; + tensor x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")]; + tensor var_446 = const()[name = string("op_446"), val = tensor([1, 4, 1, 1])]; + tensor x_41_cast_fp16 = tile(reps = var_446, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")]; + tensor var_450 = const()[name = string("op_450"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_7_cast_fp16 = reshape(shape = var_450, x = x_41_cast_fp16)[name = string("key_states_7_cast_fp16")]; + tensor x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor([1])]; + tensor x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")]; + tensor var_453 = const()[name = string("op_453"), val = tensor([1, 4, 1, 1])]; + tensor x_47_cast_fp16 = tile(reps = var_453, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")]; + tensor var_457 = const()[name = string("op_457"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_7_cast_fp16 = reshape(shape = var_457, x = x_47_cast_fp16)[name = string("value_states_7_cast_fp16")]; + bool var_460_transpose_x_1 = const()[name = string("op_460_transpose_x_1"), val = bool(false)]; + bool var_460_transpose_y_1 = const()[name = string("op_460_transpose_y_1"), val = bool(true)]; + tensor var_460_cast_fp16 = matmul(transpose_x = var_460_transpose_x_1, transpose_y = var_460_transpose_y_1, x = rotated_5_cast_fp16, y = key_states_7_cast_fp16)[name = string("op_460_cast_fp16")]; + fp16 var_461_to_fp16 = const()[name = string("op_461_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_5_cast_fp16 = mul(x = var_460_cast_fp16, y = var_461_to_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_49_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; + tensor x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")]; + tensor exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")]; + tensor var_472_axes_0 = const()[name = string("op_472_axes_0"), val = tensor([-1])]; + bool var_472_keep_dims_0 = const()[name = string("op_472_keep_dims_0"), val = bool(true)]; + tensor var_472_cast_fp16 = reduce_sum(axes = var_472_axes_0, keep_dims = var_472_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_472_cast_fp16")]; + tensor attn_weights_7_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_472_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; + bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)]; + bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = attn_weights_7_cast_fp16, y = value_states_7_cast_fp16)[name = string("attn_output_7_cast_fp16")]; + tensor var_475_perm_0 = const()[name = string("op_475_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_477 = const()[name = string("op_477"), val = tensor([1, 1, 4096])]; + tensor var_475_cast_fp16 = transpose(perm = var_475_perm_0, x = attn_output_7_cast_fp16)[name = string("transpose_10")]; + tensor input_19_cast_fp16 = reshape(shape = var_477, x = var_475_cast_fp16)[name = string("input_19_cast_fp16")]; + tensor model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686263872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698846848))))[name = string("model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; + tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; + bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; + tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")]; + tensor input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")]; + tensor var_488_axes_0 = const()[name = string("op_488_axes_0"), val = tensor([-1])]; + tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698912448)))]; + tensor var_488_cast_fp16 = layer_norm(axes = var_488_axes_0, epsilon = var_30_to_fp16, gamma = model_model_layers_1_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_488_cast_fp16")]; + tensor var_495 = const()[name = string("op_495"), val = tensor([0, 2, 1])]; + tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; + tensor var_496 = transpose(perm = var_495, x = var_488_cast_fp16)[name = string("transpose_9")]; + tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_496)[name = string("input_23")]; + string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; + tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; + tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; + int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; + tensor input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")]; + string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")]; + tensor up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor([1, 1])]; + tensor up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor([1, 1])]; + int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)]; + tensor up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")]; + tensor gate_states_3 = silu(x = input_25)[name = string("gate_states_3")]; + tensor input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")]; + string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; + tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; + tensor hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")]; + tensor var_518_axes_0 = const()[name = string("op_518_axes_0"), val = tensor([2])]; + tensor var_518 = squeeze(axes = var_518_axes_0, x = hidden_states_15)[name = string("op_518")]; + tensor var_519 = const()[name = string("op_519"), val = tensor([0, 2, 1])]; + tensor var_520 = transpose(perm = var_519, x = var_518)[name = string("transpose_8")]; + tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_520)[name = string("hidden_states_17_cast_fp16")]; + tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; + bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; + tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")]; + tensor input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")]; + tensor var_528_axes_0 = const()[name = string("op_528_axes_0"), val = tensor([-1])]; + tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698920704)))]; + tensor var_528_cast_fp16 = layer_norm(axes = var_528_axes_0, epsilon = var_30_to_fp16, gamma = model_model_layers_2_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_528_cast_fp16")]; + tensor var_531 = const()[name = string("op_531"), val = tensor([0, 2, 1])]; + tensor var_533_axes_0 = const()[name = string("op_533_axes_0"), val = tensor([2])]; + tensor var_532 = transpose(perm = var_531, x = var_528_cast_fp16)[name = string("transpose_7")]; + tensor var_533 = expand_dims(axes = var_533_axes_0, x = var_532)[name = string("op_533")]; + string var_540_pad_type_0 = const()[name = string("op_540_pad_type_0"), val = string("valid")]; + tensor var_540_strides_0 = const()[name = string("op_540_strides_0"), val = tensor([1, 1])]; + tensor var_540_pad_0 = const()[name = string("op_540_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_540_dilations_0 = const()[name = string("op_540_dilations_0"), val = tensor([1, 1])]; + int32 var_540_groups_0 = const()[name = string("op_540_groups_0"), val = int32(1)]; + tensor var_540 = conv(dilations = var_540_dilations_0, groups = var_540_groups_0, pad = var_540_pad_0, pad_type = var_540_pad_type_0, strides = var_540_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_533)[name = string("op_540")]; + tensor var_541 = const()[name = string("op_541"), val = tensor([1, 32, 1, 128])]; + tensor var_542 = reshape(shape = var_541, x = var_540)[name = string("op_542")]; + string var_549_pad_type_0 = const()[name = string("op_549_pad_type_0"), val = string("valid")]; + tensor var_549_strides_0 = const()[name = string("op_549_strides_0"), val = tensor([1, 1])]; + tensor var_549_pad_0 = const()[name = string("op_549_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_549_dilations_0 = const()[name = string("op_549_dilations_0"), val = tensor([1, 1])]; + int32 var_549_groups_0 = const()[name = string("op_549_groups_0"), val = int32(1)]; + tensor var_549 = conv(dilations = var_549_dilations_0, groups = var_549_groups_0, pad = var_549_pad_0, pad_type = var_549_pad_type_0, strides = var_549_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_533)[name = string("op_549")]; + tensor var_550 = const()[name = string("op_550"), val = tensor([1, 8, 1, 128])]; + tensor var_551 = reshape(shape = var_550, x = var_549)[name = string("op_551")]; + string var_558_pad_type_0 = const()[name = string("op_558_pad_type_0"), val = string("valid")]; + tensor var_558_strides_0 = const()[name = string("op_558_strides_0"), val = tensor([1, 1])]; + tensor var_558_pad_0 = const()[name = string("op_558_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_558_dilations_0 = const()[name = string("op_558_dilations_0"), val = tensor([1, 1])]; + int32 var_558_groups_0 = const()[name = string("op_558_groups_0"), val = int32(1)]; + tensor var_558 = conv(dilations = var_558_dilations_0, groups = var_558_groups_0, pad = var_558_pad_0, pad_type = var_558_pad_type_0, strides = var_558_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_533)[name = string("op_558")]; + tensor var_559 = const()[name = string("op_559"), val = tensor([1, 8, 1, 128])]; + tensor var_560 = reshape(shape = var_559, x = var_558)[name = string("op_560")]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 32, 1, 64])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_542)[name = string("x1_9")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 32, 1, 128])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_542)[name = string("x2_9")]; + tensor var_574_cast_fp16 = mul(x = x1_9, y = cos_3_cast_fp16)[name = string("op_574_cast_fp16")]; + tensor var_575_cast_fp16 = mul(x = x2_9, y = sin_3_cast_fp16)[name = string("op_575_cast_fp16")]; + tensor var_576_cast_fp16 = sub(x = var_574_cast_fp16, y = var_575_cast_fp16)[name = string("op_576_cast_fp16")]; + tensor var_577_cast_fp16 = mul(x = x2_9, y = cos_3_cast_fp16)[name = string("op_577_cast_fp16")]; + tensor var_578_cast_fp16 = mul(x = x1_9, y = sin_3_cast_fp16)[name = string("op_578_cast_fp16")]; + tensor var_579_cast_fp16 = add(x = var_577_cast_fp16, y = var_578_cast_fp16)[name = string("op_579_cast_fp16")]; + bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)]; + tensor rotated_9_cast_fp16 = concat(axis = var_35, interleave = rotated_9_interleave_0, values = (var_576_cast_fp16, var_579_cast_fp16))[name = string("rotated_9_cast_fp16")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_551)[name = string("x1_11")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_551)[name = string("x2_11")]; + tensor var_595_cast_fp16 = mul(x = x1_11, y = cos_3_cast_fp16)[name = string("op_595_cast_fp16")]; + tensor var_596_cast_fp16 = mul(x = x2_11, y = sin_3_cast_fp16)[name = string("op_596_cast_fp16")]; + tensor var_597_cast_fp16 = sub(x = var_595_cast_fp16, y = var_596_cast_fp16)[name = string("op_597_cast_fp16")]; + tensor var_598_cast_fp16 = mul(x = x2_11, y = cos_3_cast_fp16)[name = string("op_598_cast_fp16")]; + tensor var_599_cast_fp16 = mul(x = x1_11, y = sin_3_cast_fp16)[name = string("op_599_cast_fp16")]; + tensor var_600_cast_fp16 = add(x = var_598_cast_fp16, y = var_599_cast_fp16)[name = string("op_600_cast_fp16")]; + bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)]; + tensor rotated_11_cast_fp16 = concat(axis = var_35, interleave = rotated_11_interleave_0, values = (var_597_cast_fp16, var_600_cast_fp16))[name = string("rotated_11_cast_fp16")]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; + tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; + tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; + tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; + int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; + bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; + tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_18")]; + tensor concat_19_values1_0 = const()[name = string("concat_19_values1_0"), val = tensor([0])]; + tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; + int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; + bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; + tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_28, concat_19_values1_0, var_235, concat_19_values3_0))[name = string("concat_19")]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11_cast_fp16, x = coreml_update_state_11)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_4_write_state")]; + tensor coreml_update_state_12 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_4")]; + tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([34])]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; + tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([35])]; + int32 concat_22_axis_0 = const()[name = string("concat_22_axis_0"), val = int32(0)]; + bool concat_22_interleave_0 = const()[name = string("concat_22_interleave_0"), val = bool(false)]; + tensor concat_22 = concat(axis = concat_22_axis_0, interleave = concat_22_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_22")]; + tensor concat_23_values1_0 = const()[name = string("concat_23_values1_0"), val = tensor([0])]; + tensor concat_23_values3_0 = const()[name = string("concat_23_values3_0"), val = tensor([0])]; + int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)]; + bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)]; + tensor concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (expand_dims_34, concat_23_values1_0, var_235, concat_23_values3_0))[name = string("concat_23")]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_22, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_23, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = var_560, x = coreml_update_state_12)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_5_write_state")]; + tensor coreml_update_state_13 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_5")]; + tensor var_620_begin_0 = const()[name = string("op_620_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_620_end_0 = const()[name = string("op_620_end_0"), val = tensor([3, 8, 1024, 128])]; + tensor var_620_end_mask_0 = const()[name = string("op_620_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_620_cast_fp16 = slice_by_index(begin = var_620_begin_0, end = var_620_end_0, end_mask = var_620_end_mask_0, x = coreml_update_state_13)[name = string("op_620_cast_fp16")]; + tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; + tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_620_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; + tensor var_622_begin_0 = const()[name = string("op_622_begin_0"), val = tensor([34, 0, 0, 0])]; + tensor var_622_end_0 = const()[name = string("op_622_end_0"), val = tensor([35, 8, 1024, 128])]; + tensor var_622_end_mask_0 = const()[name = string("op_622_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_622_cast_fp16 = slice_by_index(begin = var_622_begin_0, end = var_622_end_0, end_mask = var_622_end_mask_0, x = coreml_update_state_13)[name = string("op_622_cast_fp16")]; + tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; + tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_622_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; + tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; + tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")]; + tensor var_631 = const()[name = string("op_631"), val = tensor([1, 4, 1, 1])]; + tensor x_69_cast_fp16 = tile(reps = var_631, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_635 = const()[name = string("op_635"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_11_cast_fp16 = reshape(shape = var_635, x = x_69_cast_fp16)[name = string("key_states_11_cast_fp16")]; + tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; + tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_638 = const()[name = string("op_638"), val = tensor([1, 4, 1, 1])]; + tensor x_75_cast_fp16 = tile(reps = var_638, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; + tensor var_642 = const()[name = string("op_642"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_11_cast_fp16 = reshape(shape = var_642, x = x_75_cast_fp16)[name = string("value_states_11_cast_fp16")]; + bool var_645_transpose_x_1 = const()[name = string("op_645_transpose_x_1"), val = bool(false)]; + bool var_645_transpose_y_1 = const()[name = string("op_645_transpose_y_1"), val = bool(true)]; + tensor var_645_cast_fp16 = matmul(transpose_x = var_645_transpose_x_1, transpose_y = var_645_transpose_y_1, x = rotated_9_cast_fp16, y = key_states_11_cast_fp16)[name = string("op_645_cast_fp16")]; + fp16 var_646_to_fp16 = const()[name = string("op_646_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_9_cast_fp16 = mul(x = var_645_cast_fp16, y = var_646_to_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor x_77_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; + tensor x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")]; + tensor exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")]; + tensor var_657_axes_0 = const()[name = string("op_657_axes_0"), val = tensor([-1])]; + bool var_657_keep_dims_0 = const()[name = string("op_657_keep_dims_0"), val = bool(true)]; + tensor var_657_cast_fp16 = reduce_sum(axes = var_657_axes_0, keep_dims = var_657_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_657_cast_fp16")]; + tensor attn_weights_11_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_657_cast_fp16)[name = string("attn_weights_11_cast_fp16")]; + bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; + bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = attn_weights_11_cast_fp16, y = value_states_11_cast_fp16)[name = string("attn_output_13_cast_fp16")]; + tensor var_660_perm_0 = const()[name = string("op_660_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_662 = const()[name = string("op_662"), val = tensor([1, 1, 4096])]; + tensor var_660_cast_fp16 = transpose(perm = var_660_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_6")]; + tensor input_33_cast_fp16 = reshape(shape = var_662, x = var_660_cast_fp16)[name = string("input_33_cast_fp16")]; + tensor model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698928960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711511936))))[name = string("model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; + bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; + tensor mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")]; + tensor input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")]; + tensor var_673_axes_0 = const()[name = string("op_673_axes_0"), val = tensor([-1])]; + tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711577536)))]; + tensor var_673_cast_fp16 = layer_norm(axes = var_673_axes_0, epsilon = var_30_to_fp16, gamma = model_model_layers_2_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_673_cast_fp16")]; + tensor var_680 = const()[name = string("op_680"), val = tensor([0, 2, 1])]; + tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; + tensor var_681 = transpose(perm = var_680, x = var_673_cast_fp16)[name = string("transpose_5")]; + tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_681)[name = string("input_37")]; + string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; + tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; + tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; + int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; + tensor input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")]; + string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")]; + tensor up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor([1, 1])]; + tensor up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor([1, 1])]; + int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)]; + tensor up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")]; + tensor gate_states_5 = silu(x = input_39)[name = string("gate_states_5")]; + tensor input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")]; + string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; + tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; + tensor hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")]; + tensor var_703_axes_0 = const()[name = string("op_703_axes_0"), val = tensor([2])]; + tensor var_703 = squeeze(axes = var_703_axes_0, x = hidden_states_23)[name = string("op_703")]; + tensor var_704 = const()[name = string("op_704"), val = tensor([0, 2, 1])]; + tensor var_705 = transpose(perm = var_704, x = var_703)[name = string("transpose_4")]; + tensor hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_705)[name = string("hidden_states_25_cast_fp16")]; + tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; + bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; + tensor mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")]; + tensor input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")]; + tensor var_713_axes_0 = const()[name = string("op_713_axes_0"), val = tensor([-1])]; + tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711585792)))]; + tensor var_713_cast_fp16 = layer_norm(axes = var_713_axes_0, epsilon = var_30_to_fp16, gamma = model_model_layers_3_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_713_cast_fp16")]; + tensor var_716 = const()[name = string("op_716"), val = tensor([0, 2, 1])]; + tensor var_718_axes_0 = const()[name = string("op_718_axes_0"), val = tensor([2])]; + tensor var_717 = transpose(perm = var_716, x = var_713_cast_fp16)[name = string("transpose_3")]; + tensor var_718 = expand_dims(axes = var_718_axes_0, x = var_717)[name = string("op_718")]; + string var_725_pad_type_0 = const()[name = string("op_725_pad_type_0"), val = string("valid")]; + tensor var_725_strides_0 = const()[name = string("op_725_strides_0"), val = tensor([1, 1])]; + tensor var_725_pad_0 = const()[name = string("op_725_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_725_dilations_0 = const()[name = string("op_725_dilations_0"), val = tensor([1, 1])]; + int32 var_725_groups_0 = const()[name = string("op_725_groups_0"), val = int32(1)]; + tensor var_725 = conv(dilations = var_725_dilations_0, groups = var_725_groups_0, pad = var_725_pad_0, pad_type = var_725_pad_type_0, strides = var_725_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_718)[name = string("op_725")]; + tensor var_726 = const()[name = string("op_726"), val = tensor([1, 32, 1, 128])]; + tensor var_727 = reshape(shape = var_726, x = var_725)[name = string("op_727")]; + string var_734_pad_type_0 = const()[name = string("op_734_pad_type_0"), val = string("valid")]; + tensor var_734_strides_0 = const()[name = string("op_734_strides_0"), val = tensor([1, 1])]; + tensor var_734_pad_0 = const()[name = string("op_734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_734_dilations_0 = const()[name = string("op_734_dilations_0"), val = tensor([1, 1])]; + int32 var_734_groups_0 = const()[name = string("op_734_groups_0"), val = int32(1)]; + tensor var_734 = conv(dilations = var_734_dilations_0, groups = var_734_groups_0, pad = var_734_pad_0, pad_type = var_734_pad_type_0, strides = var_734_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_718)[name = string("op_734")]; + tensor var_735 = const()[name = string("op_735"), val = tensor([1, 8, 1, 128])]; + tensor var_736 = reshape(shape = var_735, x = var_734)[name = string("op_736")]; + string var_743_pad_type_0 = const()[name = string("op_743_pad_type_0"), val = string("valid")]; + tensor var_743_strides_0 = const()[name = string("op_743_strides_0"), val = tensor([1, 1])]; + tensor var_743_pad_0 = const()[name = string("op_743_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_743_dilations_0 = const()[name = string("op_743_dilations_0"), val = tensor([1, 1])]; + int32 var_743_groups_0 = const()[name = string("op_743_groups_0"), val = int32(1)]; + tensor var_743 = conv(dilations = var_743_dilations_0, groups = var_743_groups_0, pad = var_743_pad_0, pad_type = var_743_pad_type_0, strides = var_743_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_718)[name = string("op_743")]; + tensor var_744 = const()[name = string("op_744"), val = tensor([1, 8, 1, 128])]; + tensor var_745 = reshape(shape = var_744, x = var_743)[name = string("op_745")]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 32, 1, 64])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_727)[name = string("x1_13")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 32, 1, 128])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_727)[name = string("x2_13")]; + tensor var_759_cast_fp16 = mul(x = x1_13, y = cos_3_cast_fp16)[name = string("op_759_cast_fp16")]; + tensor var_760_cast_fp16 = mul(x = x2_13, y = sin_3_cast_fp16)[name = string("op_760_cast_fp16")]; + tensor var_761_cast_fp16 = sub(x = var_759_cast_fp16, y = var_760_cast_fp16)[name = string("op_761_cast_fp16")]; + tensor var_762_cast_fp16 = mul(x = x2_13, y = cos_3_cast_fp16)[name = string("op_762_cast_fp16")]; + tensor var_763_cast_fp16 = mul(x = x1_13, y = sin_3_cast_fp16)[name = string("op_763_cast_fp16")]; + tensor var_764_cast_fp16 = add(x = var_762_cast_fp16, y = var_763_cast_fp16)[name = string("op_764_cast_fp16")]; + bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)]; + tensor rotated_13_cast_fp16 = concat(axis = var_35, interleave = rotated_13_interleave_0, values = (var_761_cast_fp16, var_764_cast_fp16))[name = string("rotated_13_cast_fp16")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_736)[name = string("x1")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 1, 128])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_736)[name = string("x2")]; + tensor var_780_cast_fp16 = mul(x = x1, y = cos_3_cast_fp16)[name = string("op_780_cast_fp16")]; + tensor var_781_cast_fp16 = mul(x = x2, y = sin_3_cast_fp16)[name = string("op_781_cast_fp16")]; + tensor var_782_cast_fp16 = sub(x = var_780_cast_fp16, y = var_781_cast_fp16)[name = string("op_782_cast_fp16")]; + tensor var_783_cast_fp16 = mul(x = x2, y = cos_3_cast_fp16)[name = string("op_783_cast_fp16")]; + tensor var_784_cast_fp16 = mul(x = x1, y = sin_3_cast_fp16)[name = string("op_784_cast_fp16")]; + tensor var_785_cast_fp16 = add(x = var_783_cast_fp16, y = var_784_cast_fp16)[name = string("op_785_cast_fp16")]; + bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)]; + tensor rotated_cast_fp16 = concat(axis = var_35, interleave = rotated_interleave_0, values = (var_782_cast_fp16, var_785_cast_fp16))[name = string("rotated_cast_fp16")]; + tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; + tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; + tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; + tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; + int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; + bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; + tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_26")]; + tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_40, concat_27_values1_0, var_235, concat_27_values3_0))[name = string("concat_27")]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_cast_fp16, x = coreml_update_state_13)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_6_write_state")]; + tensor coreml_update_state_14 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_6")]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([35])]; + tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; + tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; + tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([36])]; + int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)]; + bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)]; + tensor concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_30")]; + tensor concat_31_values1_0 = const()[name = string("concat_31_values1_0"), val = tensor([0])]; + tensor concat_31_values3_0 = const()[name = string("concat_31_values3_0"), val = tensor([0])]; + int32 concat_31_axis_0 = const()[name = string("concat_31_axis_0"), val = int32(0)]; + bool concat_31_interleave_0 = const()[name = string("concat_31_interleave_0"), val = bool(false)]; + tensor concat_31 = concat(axis = concat_31_axis_0, interleave = concat_31_interleave_0, values = (expand_dims_46, concat_31_values1_0, var_235, concat_31_values3_0))[name = string("concat_31")]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = var_745, x = coreml_update_state_14)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_7_write_state")]; + tensor coreml_update_state_15 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_7")]; + tensor var_805_begin_0 = const()[name = string("op_805_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_805_end_0 = const()[name = string("op_805_end_0"), val = tensor([4, 8, 1024, 128])]; + tensor var_805_end_mask_0 = const()[name = string("op_805_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_805_cast_fp16 = slice_by_index(begin = var_805_begin_0, end = var_805_end_0, end_mask = var_805_end_mask_0, x = coreml_update_state_15)[name = string("op_805_cast_fp16")]; + tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; + tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_805_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; + tensor var_807_begin_0 = const()[name = string("op_807_begin_0"), val = tensor([35, 0, 0, 0])]; + tensor var_807_end_0 = const()[name = string("op_807_end_0"), val = tensor([36, 8, 1024, 128])]; + tensor var_807_end_mask_0 = const()[name = string("op_807_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_807_cast_fp16 = slice_by_index(begin = var_807_begin_0, end = var_807_end_0, end_mask = var_807_end_mask_0, x = coreml_update_state_15)[name = string("op_807_cast_fp16")]; + tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; + tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_807_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; + tensor x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor([1])]; + tensor x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_95_cast_fp16")]; + tensor var_816 = const()[name = string("op_816"), val = tensor([1, 4, 1, 1])]; + tensor x_97_cast_fp16 = tile(reps = var_816, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")]; + tensor var_820 = const()[name = string("op_820"), val = tensor([1, -1, 1024, 128])]; + tensor key_states_cast_fp16 = reshape(shape = var_820, x = x_97_cast_fp16)[name = string("key_states_cast_fp16")]; + tensor x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor([1])]; + tensor x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_823 = const()[name = string("op_823"), val = tensor([1, 4, 1, 1])]; + tensor x_103_cast_fp16 = tile(reps = var_823, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")]; + tensor var_827 = const()[name = string("op_827"), val = tensor([1, -1, 1024, 128])]; + tensor value_states_cast_fp16 = reshape(shape = var_827, x = x_103_cast_fp16)[name = string("value_states_cast_fp16")]; + bool var_830_transpose_x_1 = const()[name = string("op_830_transpose_x_1"), val = bool(false)]; + bool var_830_transpose_y_1 = const()[name = string("op_830_transpose_y_1"), val = bool(true)]; + tensor var_830_cast_fp16 = matmul(transpose_x = var_830_transpose_x_1, transpose_y = var_830_transpose_y_1, x = rotated_13_cast_fp16, y = key_states_cast_fp16)[name = string("op_830_cast_fp16")]; + fp16 var_831_to_fp16 = const()[name = string("op_831_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_13_cast_fp16 = mul(x = var_830_cast_fp16, y = var_831_to_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; + tensor x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")]; + tensor exp_x_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_cast_fp16")]; + tensor var_842_axes_0 = const()[name = string("op_842_axes_0"), val = tensor([-1])]; + bool var_842_keep_dims_0 = const()[name = string("op_842_keep_dims_0"), val = bool(true)]; + tensor var_842_cast_fp16 = reduce_sum(axes = var_842_axes_0, keep_dims = var_842_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_842_cast_fp16")]; + tensor attn_weights_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_842_cast_fp16)[name = string("attn_weights_cast_fp16")]; + bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)]; + bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = attn_weights_cast_fp16, y = value_states_cast_fp16)[name = string("attn_output_19_cast_fp16")]; + tensor var_845_perm_0 = const()[name = string("op_845_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_847 = const()[name = string("op_847"), val = tensor([1, 1, 4096])]; + tensor var_845_cast_fp16 = transpose(perm = var_845_perm_0, x = attn_output_19_cast_fp16)[name = string("transpose_2")]; + tensor input_47_cast_fp16 = reshape(shape = var_847, x = var_845_cast_fp16)[name = string("input_47_cast_fp16")]; + tensor model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711594048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(724177024))))[name = string("model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; + bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; + tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_cast_fp16")]; + tensor input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_cast_fp16)[name = string("input_49_cast_fp16")]; + tensor var_858_axes_0 = const()[name = string("op_858_axes_0"), val = tensor([-1])]; + tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(724242624)))]; + tensor var_858_cast_fp16 = layer_norm(axes = var_858_axes_0, epsilon = var_30_to_fp16, gamma = model_model_layers_3_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_858_cast_fp16")]; + tensor var_865 = const()[name = string("op_865"), val = tensor([0, 2, 1])]; + tensor input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor([2])]; + tensor var_866 = transpose(perm = var_865, x = var_858_cast_fp16)[name = string("transpose_1")]; + tensor input_51 = expand_dims(axes = input_51_axes_0, x = var_866)[name = string("input_51")]; + string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")]; + tensor input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor([1, 1])]; + tensor input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor([1, 1])]; + int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)]; + tensor input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")]; + string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")]; + tensor up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor([1, 1])]; + tensor up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor([1, 1])]; + int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)]; + tensor up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states")]; + tensor gate_states = silu(x = input_53)[name = string("gate_states")]; + tensor input = mul(x = gate_states, y = up_states)[name = string("input")]; + string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; + tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; + tensor hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")]; + tensor var_888_axes_0 = const()[name = string("op_888_axes_0"), val = tensor([2])]; + tensor var_888 = squeeze(axes = var_888_axes_0, x = hidden_states_1)[name = string("op_888")]; + tensor var_889 = const()[name = string("op_889"), val = tensor([0, 2, 1])]; + tensor var_890 = transpose(perm = var_889, x = var_888)[name = string("transpose_0")]; + tensor output_hidden_states = add(x = hidden_states_29_cast_fp16, y = var_890)[name = string("op_891_cast_fp16")]; + tensor position_ids_tmp = identity(x = position_ids)[name = string("position_ids_tmp")]; + } -> (output_hidden_states); + func prefill(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { + tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12583040))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12648640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15794432))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15810880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18956672))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18973120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63013376))))[name = string("model_model_layers_0_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63242816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107283072))))[name = string("model_model_layers_0_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107512512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151552768))))[name = string("model_model_layers_0_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151618368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164201344))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164266944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167412736))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167429184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170574976))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170591424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214631680))))[name = string("model_model_layers_1_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214861120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258901376))))[name = string("model_model_layers_1_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259130816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303171072))))[name = string("model_model_layers_1_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303236672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315819648))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315885248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319031040))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319047488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322193280))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322209728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366249984))))[name = string("model_model_layers_2_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366479424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410519680))))[name = string("model_model_layers_2_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410749120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454789376))))[name = string("model_model_layers_2_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454854976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(467437952))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(467503552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470649344))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470665792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473811584))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473828032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517868288))))[name = string("model_model_layers_3_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518097728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562137984))))[name = string("model_model_layers_3_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562367424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606407680))))[name = string("model_model_layers_3_mlp_down_proj_weight_palettized")]; + int32 var_30 = const()[name = string("op_30"), val = int32(-1)]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = position_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)]; + tensor add_0 = add(x = position_ids, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = position_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 var_147_axis_0 = const()[name = string("op_147_axis_0"), val = int32(1)]; + int32 var_147_batch_dims_0 = const()[name = string("op_147_batch_dims_0"), val = int32(0)]; + bool var_147_validate_indices_0 = const()[name = string("op_147_validate_indices_0"), val = bool(false)]; + tensor var_41_to_fp16 = const()[name = string("op_41_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640027776)))]; + tensor var_147_cast_fp16 = gather(axis = var_147_axis_0, batch_dims = var_147_batch_dims_0, indices = select_0, validate_indices = var_147_validate_indices_0, x = var_41_to_fp16)[name = string("op_147_cast_fp16")]; + tensor var_148 = const()[name = string("op_148"), val = tensor([1, 256, 1, 128])]; + tensor cos_1_cast_fp16 = reshape(shape = var_148, x = var_147_cast_fp16)[name = string("cos_1_cast_fp16")]; + int32 var_152_axis_0 = const()[name = string("op_152_axis_0"), val = int32(1)]; + int32 var_152_batch_dims_0 = const()[name = string("op_152_batch_dims_0"), val = int32(0)]; + bool var_152_validate_indices_0 = const()[name = string("op_152_validate_indices_0"), val = bool(false)]; + tensor var_36_to_fp16 = const()[name = string("op_36_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606473280)))]; + tensor var_152_cast_fp16 = gather(axis = var_152_axis_0, batch_dims = var_152_batch_dims_0, indices = select_0, validate_indices = var_152_validate_indices_0, x = var_36_to_fp16)[name = string("op_152_cast_fp16")]; + tensor var_153 = const()[name = string("op_153"), val = tensor([1, 256, 1, 128])]; + tensor sin_1_cast_fp16 = reshape(shape = var_153, x = var_152_cast_fp16)[name = string("sin_1_cast_fp16")]; + tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; + bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; + tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; + tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; + tensor var_163_axes_0 = const()[name = string("op_163_axes_0"), val = tensor([-1])]; + tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(673582272)))]; + fp16 var_32_to_fp16 = const()[name = string("op_32_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_163_cast_fp16 = layer_norm(axes = var_163_axes_0, epsilon = var_32_to_fp16, gamma = model_model_layers_0_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_163_cast_fp16")]; + tensor var_167 = const()[name = string("op_167"), val = tensor([0, 2, 1])]; + tensor var_169_axes_0 = const()[name = string("op_169_axes_0"), val = tensor([2])]; + tensor var_168 = transpose(perm = var_167, x = var_163_cast_fp16)[name = string("transpose_29")]; + tensor var_169 = expand_dims(axes = var_169_axes_0, x = var_168)[name = string("op_169")]; + string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; + tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; + tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; + int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; + tensor query_states_1 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_169)[name = string("query_states_1")]; + string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; + tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; + tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; + int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; + tensor key_states_1 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_169)[name = string("key_states_1")]; + string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; + tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; + tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; + int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; + tensor value_states_1 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_169)[name = string("value_states_1")]; + tensor var_189 = const()[name = string("op_189"), val = tensor([1, 32, 128, 256])]; + tensor var_190 = reshape(shape = var_189, x = query_states_1)[name = string("op_190")]; + tensor var_191 = const()[name = string("op_191"), val = tensor([0, 1, 3, 2])]; + tensor var_193 = const()[name = string("op_193"), val = tensor([1, 8, 128, 256])]; + tensor var_194 = reshape(shape = var_193, x = key_states_1)[name = string("op_194")]; + tensor var_195 = const()[name = string("op_195"), val = tensor([0, 1, 3, 2])]; + tensor var_197 = const()[name = string("op_197"), val = tensor([1, 8, 128, 256])]; + tensor var_198 = reshape(shape = var_197, x = value_states_1)[name = string("op_198")]; + tensor var_199 = const()[name = string("op_199"), val = tensor([0, 1, 3, 2])]; + tensor var_201 = const()[name = string("op_201"), val = tensor([0, 2, 1, 3])]; + tensor var_203 = const()[name = string("op_203"), val = tensor([0, 2, 1, 3])]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 32, 256, 64])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_1 = transpose(perm = var_191, x = var_190)[name = string("transpose_28")]; + tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = x_1)[name = string("x1_1")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 32, 256, 128])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = x_1)[name = string("x2_1")]; + tensor cos_7_begin_0 = const()[name = string("cos_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor cos_7_end_0 = const()[name = string("cos_7_end_0"), val = tensor([1, 1, 256, 64])]; + tensor cos_7_end_mask_0 = const()[name = string("cos_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor cos_5 = transpose(perm = var_201, x = cos_1_cast_fp16)[name = string("transpose_27")]; + tensor cos_7 = slice_by_index(begin = cos_7_begin_0, end = cos_7_end_0, end_mask = cos_7_end_mask_0, x = cos_5)[name = string("cos_7")]; + tensor sin_7_begin_0 = const()[name = string("sin_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor sin_7_end_0 = const()[name = string("sin_7_end_0"), val = tensor([1, 1, 256, 64])]; + tensor sin_7_end_mask_0 = const()[name = string("sin_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor sin_5 = transpose(perm = var_203, x = sin_1_cast_fp16)[name = string("transpose_26")]; + tensor sin_7 = slice_by_index(begin = sin_7_begin_0, end = sin_7_end_0, end_mask = sin_7_end_mask_0, x = sin_5)[name = string("sin_7")]; + tensor var_217 = mul(x = x1_1, y = cos_7)[name = string("op_217")]; + tensor var_218 = mul(x = x2_1, y = sin_7)[name = string("op_218")]; + tensor var_219 = sub(x = var_217, y = var_218)[name = string("op_219")]; + tensor var_220 = mul(x = x2_1, y = cos_7)[name = string("op_220")]; + tensor var_221 = mul(x = x1_1, y = sin_7)[name = string("op_221")]; + tensor var_222 = add(x = var_220, y = var_221)[name = string("op_222")]; + bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)]; + tensor rotated_1 = concat(axis = var_30, interleave = rotated_1_interleave_0, values = (var_219, var_222))[name = string("rotated_1")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 256, 64])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_5 = transpose(perm = var_195, x = var_194)[name = string("transpose_25")]; + tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = x_5)[name = string("x1_3")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 256, 128])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = x_5)[name = string("x2_3")]; + tensor var_238 = mul(x = x1_3, y = cos_7)[name = string("op_238")]; + tensor var_239 = mul(x = x2_3, y = sin_7)[name = string("op_239")]; + tensor var_240 = sub(x = var_238, y = var_239)[name = string("op_240")]; + tensor var_241 = mul(x = x2_3, y = cos_7)[name = string("op_241")]; + tensor var_242 = mul(x = x1_3, y = sin_7)[name = string("op_242")]; + tensor var_243 = add(x = var_241, y = var_242)[name = string("op_243")]; + bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)]; + tensor rotated_3 = concat(axis = var_30, interleave = rotated_3_interleave_0, values = (var_240, var_243))[name = string("rotated_3")]; + tensor seq_length_1 = const()[name = string("seq_length_1"), val = tensor([256])]; + tensor var_252 = add(x = current_pos, y = seq_length_1)[name = string("op_252")]; + tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; + int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; + bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; + tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; + tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_252, concat_3_values3_0))[name = string("concat_3")]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_8_write_state")]; + tensor coreml_update_state_8 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_8")]; + tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([32])]; + tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; + tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; + tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([33])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; + tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; + tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; + int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; + bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; + tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_252, concat_7_values3_0))[name = string("concat_7")]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_3 = transpose(perm = var_199, x = var_198)[name = string("transpose_24")]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = value_states_3, x = coreml_update_state_8)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_9_write_state")]; + tensor coreml_update_state_9 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_9")]; + tensor var_266_begin_0 = const()[name = string("op_266_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_266_end_0 = const()[name = string("op_266_end_0"), val = tensor([1, 8, 1024, 128])]; + tensor var_266_end_mask_0 = const()[name = string("op_266_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_266_cast_fp16 = slice_by_index(begin = var_266_begin_0, end = var_266_end_0, end_mask = var_266_end_mask_0, x = coreml_update_state_9)[name = string("op_266_cast_fp16")]; + tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; + tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_266_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; + tensor var_268_begin_0 = const()[name = string("op_268_begin_0"), val = tensor([32, 0, 0, 0])]; + tensor var_268_end_0 = const()[name = string("op_268_end_0"), val = tensor([33, 8, 1024, 128])]; + tensor var_268_end_mask_0 = const()[name = string("op_268_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_268_cast_fp16 = slice_by_index(begin = var_268_begin_0, end = var_268_end_0, end_mask = var_268_end_mask_0, x = coreml_update_state_9)[name = string("op_268_cast_fp16")]; + tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; + tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_268_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; + tensor x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor([1])]; + tensor x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")]; + tensor var_277 = const()[name = string("op_277"), val = tensor([1, 4, 1, 1])]; + tensor x_13_cast_fp16 = tile(reps = var_277, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_281 = const()[name = string("op_281"), val = tensor([1, -1, 1024, 128])]; + tensor var_282_cast_fp16 = reshape(shape = var_281, x = x_13_cast_fp16)[name = string("op_282_cast_fp16")]; + tensor x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor([1])]; + tensor x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")]; + tensor var_284 = const()[name = string("op_284"), val = tensor([1, 4, 1, 1])]; + tensor x_19_cast_fp16 = tile(reps = var_284, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")]; + bool var_291_transpose_x_0 = const()[name = string("op_291_transpose_x_0"), val = bool(false)]; + bool var_291_transpose_y_0 = const()[name = string("op_291_transpose_y_0"), val = bool(true)]; + tensor var_291_cast_fp16 = matmul(transpose_x = var_291_transpose_x_0, transpose_y = var_291_transpose_y_0, x = rotated_1, y = var_282_cast_fp16)[name = string("op_291_cast_fp16")]; + fp16 var_292_to_fp16 = const()[name = string("op_292_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_1_cast_fp16 = mul(x = var_291_cast_fp16, y = var_292_to_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; + tensor x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")]; + tensor exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")]; + tensor var_303_axes_0 = const()[name = string("op_303_axes_0"), val = tensor([-1])]; + bool var_303_keep_dims_0 = const()[name = string("op_303_keep_dims_0"), val = bool(true)]; + tensor var_303_cast_fp16 = reduce_sum(axes = var_303_axes_0, keep_dims = var_303_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_303_cast_fp16")]; + tensor var_304_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_303_cast_fp16)[name = string("op_304_cast_fp16")]; + tensor concat_12 = const()[name = string("concat_12"), val = tensor([32, 256, 1024])]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_12, x = var_304_cast_fp16)[name = string("reshape_0_cast_fp16")]; + tensor concat_13 = const()[name = string("concat_13"), val = tensor([32, 1024, 128])]; + tensor reshape_1_cast_fp16 = reshape(shape = concat_13, x = x_19_cast_fp16)[name = string("reshape_1_cast_fp16")]; + bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)]; + bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(false)]; + tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([1, 32, 256, 128])]; + tensor reshape_2_cast_fp16 = reshape(shape = concat_17, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")]; + tensor var_307_perm_0 = const()[name = string("op_307_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_309 = const()[name = string("op_309"), val = tensor([1, 256, 4096])]; + tensor var_307_cast_fp16 = transpose(perm = var_307_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_23")]; + tensor input_5_cast_fp16 = reshape(shape = var_309, x = var_307_cast_fp16)[name = string("input_5_cast_fp16")]; + tensor model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(673590528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686173504))))[name = string("model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686239104)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; + bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; + tensor mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")]; + tensor input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")]; + tensor var_320_axes_0 = const()[name = string("op_320_axes_0"), val = tensor([-1])]; + tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686247360)))]; + tensor var_320_cast_fp16 = layer_norm(axes = var_320_axes_0, epsilon = var_32_to_fp16, gamma = model_model_layers_0_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_320_cast_fp16")]; + tensor var_327 = const()[name = string("op_327"), val = tensor([0, 2, 1])]; + tensor input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor([2])]; + tensor var_328 = transpose(perm = var_327, x = var_320_cast_fp16)[name = string("transpose_22")]; + tensor input_9 = expand_dims(axes = input_9_axes_0, x = var_328)[name = string("input_9")]; + string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; + tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; + tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; + int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; + tensor input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")]; + string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")]; + tensor up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor([1, 1])]; + tensor up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor([1, 1])]; + int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)]; + tensor up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")]; + tensor gate_states_1 = silu(x = input_11)[name = string("gate_states_1")]; + tensor input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")]; + string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")]; + tensor hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)]; + tensor hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")]; + tensor var_350_axes_0 = const()[name = string("op_350_axes_0"), val = tensor([2])]; + tensor var_350 = squeeze(axes = var_350_axes_0, x = hidden_states_7)[name = string("op_350")]; + tensor var_351 = const()[name = string("op_351"), val = tensor([0, 2, 1])]; + tensor var_352 = transpose(perm = var_351, x = var_350)[name = string("transpose_21")]; + tensor hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_352)[name = string("hidden_states_9_cast_fp16")]; + tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; + bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; + tensor mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")]; + tensor input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")]; + tensor var_360_axes_0 = const()[name = string("op_360_axes_0"), val = tensor([-1])]; + tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686255616)))]; + tensor var_360_cast_fp16 = layer_norm(axes = var_360_axes_0, epsilon = var_32_to_fp16, gamma = model_model_layers_1_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_360_cast_fp16")]; + tensor var_364 = const()[name = string("op_364"), val = tensor([0, 2, 1])]; + tensor var_366_axes_0 = const()[name = string("op_366_axes_0"), val = tensor([2])]; + tensor var_365 = transpose(perm = var_364, x = var_360_cast_fp16)[name = string("transpose_20")]; + tensor var_366 = expand_dims(axes = var_366_axes_0, x = var_365)[name = string("op_366")]; + string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; + tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; + tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; + int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; + tensor query_states_5 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_366)[name = string("query_states_5")]; + string key_states_7_pad_type_0 = const()[name = string("key_states_7_pad_type_0"), val = string("valid")]; + tensor key_states_7_strides_0 = const()[name = string("key_states_7_strides_0"), val = tensor([1, 1])]; + tensor key_states_7_pad_0 = const()[name = string("key_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_7_dilations_0 = const()[name = string("key_states_7_dilations_0"), val = tensor([1, 1])]; + int32 key_states_7_groups_0 = const()[name = string("key_states_7_groups_0"), val = int32(1)]; + tensor key_states_7 = conv(dilations = key_states_7_dilations_0, groups = key_states_7_groups_0, pad = key_states_7_pad_0, pad_type = key_states_7_pad_type_0, strides = key_states_7_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_366)[name = string("key_states_7")]; + string value_states_7_pad_type_0 = const()[name = string("value_states_7_pad_type_0"), val = string("valid")]; + tensor value_states_7_strides_0 = const()[name = string("value_states_7_strides_0"), val = tensor([1, 1])]; + tensor value_states_7_pad_0 = const()[name = string("value_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_7_dilations_0 = const()[name = string("value_states_7_dilations_0"), val = tensor([1, 1])]; + int32 value_states_7_groups_0 = const()[name = string("value_states_7_groups_0"), val = int32(1)]; + tensor value_states_7 = conv(dilations = value_states_7_dilations_0, groups = value_states_7_groups_0, pad = value_states_7_pad_0, pad_type = value_states_7_pad_type_0, strides = value_states_7_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_366)[name = string("value_states_7")]; + tensor var_386 = const()[name = string("op_386"), val = tensor([1, 32, 128, 256])]; + tensor var_387 = reshape(shape = var_386, x = query_states_5)[name = string("op_387")]; + tensor var_388 = const()[name = string("op_388"), val = tensor([0, 1, 3, 2])]; + tensor var_390 = const()[name = string("op_390"), val = tensor([1, 8, 128, 256])]; + tensor var_391 = reshape(shape = var_390, x = key_states_7)[name = string("op_391")]; + tensor var_392 = const()[name = string("op_392"), val = tensor([0, 1, 3, 2])]; + tensor var_394 = const()[name = string("op_394"), val = tensor([1, 8, 128, 256])]; + tensor var_395 = reshape(shape = var_394, x = value_states_7)[name = string("op_395")]; + tensor var_396 = const()[name = string("op_396"), val = tensor([0, 1, 3, 2])]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 32, 256, 64])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_29 = transpose(perm = var_388, x = var_387)[name = string("transpose_19")]; + tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = x_29)[name = string("x1_5")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 32, 256, 128])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = x_29)[name = string("x2_5")]; + tensor var_414 = mul(x = x1_5, y = cos_7)[name = string("op_414")]; + tensor var_415 = mul(x = x2_5, y = sin_7)[name = string("op_415")]; + tensor var_416 = sub(x = var_414, y = var_415)[name = string("op_416")]; + tensor var_417 = mul(x = x2_5, y = cos_7)[name = string("op_417")]; + tensor var_418 = mul(x = x1_5, y = sin_7)[name = string("op_418")]; + tensor var_419 = add(x = var_417, y = var_418)[name = string("op_419")]; + bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)]; + tensor rotated_5 = concat(axis = var_30, interleave = rotated_5_interleave_0, values = (var_416, var_419))[name = string("rotated_5")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 256, 64])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_33 = transpose(perm = var_392, x = var_391)[name = string("transpose_18")]; + tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = x_33)[name = string("x1_7")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 256, 128])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = x_33)[name = string("x2_7")]; + tensor var_435 = mul(x = x1_7, y = cos_7)[name = string("op_435")]; + tensor var_436 = mul(x = x2_7, y = sin_7)[name = string("op_436")]; + tensor var_437 = sub(x = var_435, y = var_436)[name = string("op_437")]; + tensor var_438 = mul(x = x2_7, y = cos_7)[name = string("op_438")]; + tensor var_439 = mul(x = x1_7, y = sin_7)[name = string("op_439")]; + tensor var_440 = add(x = var_438, y = var_439)[name = string("op_440")]; + bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)]; + tensor rotated_7 = concat(axis = var_30, interleave = rotated_7_interleave_0, values = (var_437, var_440))[name = string("rotated_7")]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; + tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; + tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; + int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; + bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; + tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_20")]; + tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; + tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; + int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; + bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; + tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (expand_dims_16, concat_21_values1_0, var_252, concat_21_values3_0))[name = string("concat_21")]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7, x = coreml_update_state_9)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_10_write_state")]; + tensor coreml_update_state_10 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_10")]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([33])]; + tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([34])]; + int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; + bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; + tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_24")]; + tensor concat_25_values1_0 = const()[name = string("concat_25_values1_0"), val = tensor([0])]; + tensor concat_25_values3_0 = const()[name = string("concat_25_values3_0"), val = tensor([0])]; + int32 concat_25_axis_0 = const()[name = string("concat_25_axis_0"), val = int32(0)]; + bool concat_25_interleave_0 = const()[name = string("concat_25_interleave_0"), val = bool(false)]; + tensor concat_25 = concat(axis = concat_25_axis_0, interleave = concat_25_interleave_0, values = (expand_dims_22, concat_25_values1_0, var_252, concat_25_values3_0))[name = string("concat_25")]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_9 = transpose(perm = var_396, x = var_395)[name = string("transpose_17")]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_24, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_25, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = value_states_9, x = coreml_update_state_10)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_11_write_state")]; + tensor coreml_update_state_11 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_11")]; + tensor var_463_begin_0 = const()[name = string("op_463_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_463_end_0 = const()[name = string("op_463_end_0"), val = tensor([2, 8, 1024, 128])]; + tensor var_463_end_mask_0 = const()[name = string("op_463_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_463_cast_fp16 = slice_by_index(begin = var_463_begin_0, end = var_463_end_0, end_mask = var_463_end_mask_0, x = coreml_update_state_11)[name = string("op_463_cast_fp16")]; + tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; + tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_463_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; + tensor var_465_begin_0 = const()[name = string("op_465_begin_0"), val = tensor([33, 0, 0, 0])]; + tensor var_465_end_0 = const()[name = string("op_465_end_0"), val = tensor([34, 8, 1024, 128])]; + tensor var_465_end_mask_0 = const()[name = string("op_465_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_465_cast_fp16 = slice_by_index(begin = var_465_begin_0, end = var_465_end_0, end_mask = var_465_end_mask_0, x = coreml_update_state_11)[name = string("op_465_cast_fp16")]; + tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; + tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_465_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; + tensor x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor([1])]; + tensor x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")]; + tensor var_474 = const()[name = string("op_474"), val = tensor([1, 4, 1, 1])]; + tensor x_41_cast_fp16 = tile(reps = var_474, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")]; + tensor var_478 = const()[name = string("op_478"), val = tensor([1, -1, 1024, 128])]; + tensor var_479_cast_fp16 = reshape(shape = var_478, x = x_41_cast_fp16)[name = string("op_479_cast_fp16")]; + tensor x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor([1])]; + tensor x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")]; + tensor var_481 = const()[name = string("op_481"), val = tensor([1, 4, 1, 1])]; + tensor x_47_cast_fp16 = tile(reps = var_481, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")]; + bool var_488_transpose_x_0 = const()[name = string("op_488_transpose_x_0"), val = bool(false)]; + bool var_488_transpose_y_0 = const()[name = string("op_488_transpose_y_0"), val = bool(true)]; + tensor var_488_cast_fp16 = matmul(transpose_x = var_488_transpose_x_0, transpose_y = var_488_transpose_y_0, x = rotated_5, y = var_479_cast_fp16)[name = string("op_488_cast_fp16")]; + fp16 var_489_to_fp16 = const()[name = string("op_489_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_3_cast_fp16 = mul(x = var_488_cast_fp16, y = var_489_to_fp16)[name = string("attn_weights_3_cast_fp16")]; + tensor x_49_cast_fp16 = add(x = attn_weights_3_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; + tensor x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")]; + tensor exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")]; + tensor var_500_axes_0 = const()[name = string("op_500_axes_0"), val = tensor([-1])]; + bool var_500_keep_dims_0 = const()[name = string("op_500_keep_dims_0"), val = bool(true)]; + tensor var_500_cast_fp16 = reduce_sum(axes = var_500_axes_0, keep_dims = var_500_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_500_cast_fp16")]; + tensor var_501_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_500_cast_fp16)[name = string("op_501_cast_fp16")]; + tensor concat_30 = const()[name = string("concat_30"), val = tensor([32, 256, 1024])]; + tensor reshape_3_cast_fp16 = reshape(shape = concat_30, x = var_501_cast_fp16)[name = string("reshape_3_cast_fp16")]; + tensor concat_31 = const()[name = string("concat_31"), val = tensor([32, 1024, 128])]; + tensor reshape_4_cast_fp16 = reshape(shape = concat_31, x = x_47_cast_fp16)[name = string("reshape_4_cast_fp16")]; + bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)]; + bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(false)]; + tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_3_cast_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")]; + tensor concat_35 = const()[name = string("concat_35"), val = tensor([1, 32, 256, 128])]; + tensor reshape_5_cast_fp16 = reshape(shape = concat_35, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")]; + tensor var_504_perm_0 = const()[name = string("op_504_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_506 = const()[name = string("op_506"), val = tensor([1, 256, 4096])]; + tensor var_504_cast_fp16 = transpose(perm = var_504_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_16")]; + tensor input_19_cast_fp16 = reshape(shape = var_506, x = var_504_cast_fp16)[name = string("input_19_cast_fp16")]; + tensor model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686263872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698846848))))[name = string("model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; + tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; + bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; + tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")]; + tensor input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")]; + tensor var_517_axes_0 = const()[name = string("op_517_axes_0"), val = tensor([-1])]; + tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698912448)))]; + tensor var_517_cast_fp16 = layer_norm(axes = var_517_axes_0, epsilon = var_32_to_fp16, gamma = model_model_layers_1_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_517_cast_fp16")]; + tensor var_524 = const()[name = string("op_524"), val = tensor([0, 2, 1])]; + tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; + tensor var_525 = transpose(perm = var_524, x = var_517_cast_fp16)[name = string("transpose_15")]; + tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_525)[name = string("input_23")]; + string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; + tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; + tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; + int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; + tensor input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")]; + string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")]; + tensor up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor([1, 1])]; + tensor up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor([1, 1])]; + int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)]; + tensor up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")]; + tensor gate_states_3 = silu(x = input_25)[name = string("gate_states_3")]; + tensor input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")]; + string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; + tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; + tensor hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")]; + tensor var_547_axes_0 = const()[name = string("op_547_axes_0"), val = tensor([2])]; + tensor var_547 = squeeze(axes = var_547_axes_0, x = hidden_states_15)[name = string("op_547")]; + tensor var_548 = const()[name = string("op_548"), val = tensor([0, 2, 1])]; + tensor var_549 = transpose(perm = var_548, x = var_547)[name = string("transpose_14")]; + tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_549)[name = string("hidden_states_17_cast_fp16")]; + tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; + bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; + tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")]; + tensor input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")]; + tensor var_557_axes_0 = const()[name = string("op_557_axes_0"), val = tensor([-1])]; + tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698920704)))]; + tensor var_557_cast_fp16 = layer_norm(axes = var_557_axes_0, epsilon = var_32_to_fp16, gamma = model_model_layers_2_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_557_cast_fp16")]; + tensor var_561 = const()[name = string("op_561"), val = tensor([0, 2, 1])]; + tensor var_563_axes_0 = const()[name = string("op_563_axes_0"), val = tensor([2])]; + tensor var_562 = transpose(perm = var_561, x = var_557_cast_fp16)[name = string("transpose_13")]; + tensor var_563 = expand_dims(axes = var_563_axes_0, x = var_562)[name = string("op_563")]; + string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; + tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; + tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; + int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; + tensor query_states_9 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_563)[name = string("query_states_9")]; + string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; + tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; + tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; + int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; + tensor key_states_13 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_563)[name = string("key_states_13")]; + string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; + tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; + tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; + int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; + tensor value_states_13 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_563)[name = string("value_states_13")]; + tensor var_583 = const()[name = string("op_583"), val = tensor([1, 32, 128, 256])]; + tensor var_584 = reshape(shape = var_583, x = query_states_9)[name = string("op_584")]; + tensor var_585 = const()[name = string("op_585"), val = tensor([0, 1, 3, 2])]; + tensor var_587 = const()[name = string("op_587"), val = tensor([1, 8, 128, 256])]; + tensor var_588 = reshape(shape = var_587, x = key_states_13)[name = string("op_588")]; + tensor var_589 = const()[name = string("op_589"), val = tensor([0, 1, 3, 2])]; + tensor var_591 = const()[name = string("op_591"), val = tensor([1, 8, 128, 256])]; + tensor var_592 = reshape(shape = var_591, x = value_states_13)[name = string("op_592")]; + tensor var_593 = const()[name = string("op_593"), val = tensor([0, 1, 3, 2])]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 32, 256, 64])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_57 = transpose(perm = var_585, x = var_584)[name = string("transpose_12")]; + tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = x_57)[name = string("x1_9")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 32, 256, 128])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = x_57)[name = string("x2_9")]; + tensor var_611 = mul(x = x1_9, y = cos_7)[name = string("op_611")]; + tensor var_612 = mul(x = x2_9, y = sin_7)[name = string("op_612")]; + tensor var_613 = sub(x = var_611, y = var_612)[name = string("op_613")]; + tensor var_614 = mul(x = x2_9, y = cos_7)[name = string("op_614")]; + tensor var_615 = mul(x = x1_9, y = sin_7)[name = string("op_615")]; + tensor var_616 = add(x = var_614, y = var_615)[name = string("op_616")]; + bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)]; + tensor rotated_9 = concat(axis = var_30, interleave = rotated_9_interleave_0, values = (var_613, var_616))[name = string("rotated_9")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 256, 64])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_61 = transpose(perm = var_589, x = var_588)[name = string("transpose_11")]; + tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = x_61)[name = string("x1_11")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 256, 128])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = x_61)[name = string("x2_11")]; + tensor var_632 = mul(x = x1_11, y = cos_7)[name = string("op_632")]; + tensor var_633 = mul(x = x2_11, y = sin_7)[name = string("op_633")]; + tensor var_634 = sub(x = var_632, y = var_633)[name = string("op_634")]; + tensor var_635 = mul(x = x2_11, y = cos_7)[name = string("op_635")]; + tensor var_636 = mul(x = x1_11, y = sin_7)[name = string("op_636")]; + tensor var_637 = add(x = var_635, y = var_636)[name = string("op_637")]; + bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)]; + tensor rotated_11 = concat(axis = var_30, interleave = rotated_11_interleave_0, values = (var_634, var_637))[name = string("rotated_11")]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; + tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; + tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; + tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; + int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; + bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; + tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_38")]; + tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; + tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; + int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; + bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; + tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_28, concat_39_values1_0, var_252, concat_39_values3_0))[name = string("concat_39")]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11, x = coreml_update_state_11)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_12_write_state")]; + tensor coreml_update_state_12 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_12")]; + tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([34])]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; + tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([35])]; + int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; + bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; + tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_42")]; + tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; + tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_34, concat_43_values1_0, var_252, concat_43_values3_0))[name = string("concat_43")]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_15 = transpose(perm = var_593, x = var_592)[name = string("transpose_10")]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = value_states_15, x = coreml_update_state_12)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_13_write_state")]; + tensor coreml_update_state_13 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_13")]; + tensor var_660_begin_0 = const()[name = string("op_660_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_660_end_0 = const()[name = string("op_660_end_0"), val = tensor([3, 8, 1024, 128])]; + tensor var_660_end_mask_0 = const()[name = string("op_660_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_660_cast_fp16 = slice_by_index(begin = var_660_begin_0, end = var_660_end_0, end_mask = var_660_end_mask_0, x = coreml_update_state_13)[name = string("op_660_cast_fp16")]; + tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; + tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_660_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; + tensor var_662_begin_0 = const()[name = string("op_662_begin_0"), val = tensor([34, 0, 0, 0])]; + tensor var_662_end_0 = const()[name = string("op_662_end_0"), val = tensor([35, 8, 1024, 128])]; + tensor var_662_end_mask_0 = const()[name = string("op_662_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_662_cast_fp16 = slice_by_index(begin = var_662_begin_0, end = var_662_end_0, end_mask = var_662_end_mask_0, x = coreml_update_state_13)[name = string("op_662_cast_fp16")]; + tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; + tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_662_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; + tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; + tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")]; + tensor var_671 = const()[name = string("op_671"), val = tensor([1, 4, 1, 1])]; + tensor x_69_cast_fp16 = tile(reps = var_671, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_675 = const()[name = string("op_675"), val = tensor([1, -1, 1024, 128])]; + tensor var_676_cast_fp16 = reshape(shape = var_675, x = x_69_cast_fp16)[name = string("op_676_cast_fp16")]; + tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; + tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_678 = const()[name = string("op_678"), val = tensor([1, 4, 1, 1])]; + tensor x_75_cast_fp16 = tile(reps = var_678, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; + bool var_685_transpose_x_0 = const()[name = string("op_685_transpose_x_0"), val = bool(false)]; + bool var_685_transpose_y_0 = const()[name = string("op_685_transpose_y_0"), val = bool(true)]; + tensor var_685_cast_fp16 = matmul(transpose_x = var_685_transpose_x_0, transpose_y = var_685_transpose_y_0, x = rotated_9, y = var_676_cast_fp16)[name = string("op_685_cast_fp16")]; + fp16 var_686_to_fp16 = const()[name = string("op_686_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_5_cast_fp16 = mul(x = var_685_cast_fp16, y = var_686_to_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_77_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; + tensor x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")]; + tensor exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")]; + tensor var_697_axes_0 = const()[name = string("op_697_axes_0"), val = tensor([-1])]; + bool var_697_keep_dims_0 = const()[name = string("op_697_keep_dims_0"), val = bool(true)]; + tensor var_697_cast_fp16 = reduce_sum(axes = var_697_axes_0, keep_dims = var_697_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_697_cast_fp16")]; + tensor var_698_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_697_cast_fp16)[name = string("op_698_cast_fp16")]; + tensor concat_48 = const()[name = string("concat_48"), val = tensor([32, 256, 1024])]; + tensor reshape_6_cast_fp16 = reshape(shape = concat_48, x = var_698_cast_fp16)[name = string("reshape_6_cast_fp16")]; + tensor concat_49 = const()[name = string("concat_49"), val = tensor([32, 1024, 128])]; + tensor reshape_7_cast_fp16 = reshape(shape = concat_49, x = x_75_cast_fp16)[name = string("reshape_7_cast_fp16")]; + bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)]; + bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(false)]; + tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_6_cast_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")]; + tensor concat_53 = const()[name = string("concat_53"), val = tensor([1, 32, 256, 128])]; + tensor reshape_8_cast_fp16 = reshape(shape = concat_53, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")]; + tensor var_701_perm_0 = const()[name = string("op_701_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_703 = const()[name = string("op_703"), val = tensor([1, 256, 4096])]; + tensor var_701_cast_fp16 = transpose(perm = var_701_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_9")]; + tensor input_33_cast_fp16 = reshape(shape = var_703, x = var_701_cast_fp16)[name = string("input_33_cast_fp16")]; + tensor model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698928960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711511936))))[name = string("model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; + bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; + tensor mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")]; + tensor input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")]; + tensor var_714_axes_0 = const()[name = string("op_714_axes_0"), val = tensor([-1])]; + tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711577536)))]; + tensor var_714_cast_fp16 = layer_norm(axes = var_714_axes_0, epsilon = var_32_to_fp16, gamma = model_model_layers_2_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_714_cast_fp16")]; + tensor var_721 = const()[name = string("op_721"), val = tensor([0, 2, 1])]; + tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; + tensor var_722 = transpose(perm = var_721, x = var_714_cast_fp16)[name = string("transpose_8")]; + tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_722)[name = string("input_37")]; + string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; + tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; + tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; + int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; + tensor input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")]; + string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")]; + tensor up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor([1, 1])]; + tensor up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor([1, 1])]; + int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)]; + tensor up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")]; + tensor gate_states_5 = silu(x = input_39)[name = string("gate_states_5")]; + tensor input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")]; + string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; + tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; + tensor hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")]; + tensor var_744_axes_0 = const()[name = string("op_744_axes_0"), val = tensor([2])]; + tensor var_744 = squeeze(axes = var_744_axes_0, x = hidden_states_23)[name = string("op_744")]; + tensor var_745 = const()[name = string("op_745"), val = tensor([0, 2, 1])]; + tensor var_746 = transpose(perm = var_745, x = var_744)[name = string("transpose_7")]; + tensor hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_746)[name = string("hidden_states_25_cast_fp16")]; + tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; + bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; + tensor mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")]; + tensor input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")]; + tensor var_754_axes_0 = const()[name = string("op_754_axes_0"), val = tensor([-1])]; + tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711585792)))]; + tensor var_754_cast_fp16 = layer_norm(axes = var_754_axes_0, epsilon = var_32_to_fp16, gamma = model_model_layers_3_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_754_cast_fp16")]; + tensor var_758 = const()[name = string("op_758"), val = tensor([0, 2, 1])]; + tensor var_760_axes_0 = const()[name = string("op_760_axes_0"), val = tensor([2])]; + tensor var_759 = transpose(perm = var_758, x = var_754_cast_fp16)[name = string("transpose_6")]; + tensor var_760 = expand_dims(axes = var_760_axes_0, x = var_759)[name = string("op_760")]; + string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; + tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; + tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; + int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; + tensor query_states_13 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_760)[name = string("query_states_13")]; + string key_states_19_pad_type_0 = const()[name = string("key_states_19_pad_type_0"), val = string("valid")]; + tensor key_states_19_strides_0 = const()[name = string("key_states_19_strides_0"), val = tensor([1, 1])]; + tensor key_states_19_pad_0 = const()[name = string("key_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_19_dilations_0 = const()[name = string("key_states_19_dilations_0"), val = tensor([1, 1])]; + int32 key_states_19_groups_0 = const()[name = string("key_states_19_groups_0"), val = int32(1)]; + tensor key_states_19 = conv(dilations = key_states_19_dilations_0, groups = key_states_19_groups_0, pad = key_states_19_pad_0, pad_type = key_states_19_pad_type_0, strides = key_states_19_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_760)[name = string("key_states_19")]; + string value_states_19_pad_type_0 = const()[name = string("value_states_19_pad_type_0"), val = string("valid")]; + tensor value_states_19_strides_0 = const()[name = string("value_states_19_strides_0"), val = tensor([1, 1])]; + tensor value_states_19_pad_0 = const()[name = string("value_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_19_dilations_0 = const()[name = string("value_states_19_dilations_0"), val = tensor([1, 1])]; + int32 value_states_19_groups_0 = const()[name = string("value_states_19_groups_0"), val = int32(1)]; + tensor value_states_19 = conv(dilations = value_states_19_dilations_0, groups = value_states_19_groups_0, pad = value_states_19_pad_0, pad_type = value_states_19_pad_type_0, strides = value_states_19_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_760)[name = string("value_states_19")]; + tensor var_780 = const()[name = string("op_780"), val = tensor([1, 32, 128, 256])]; + tensor var_781 = reshape(shape = var_780, x = query_states_13)[name = string("op_781")]; + tensor var_782 = const()[name = string("op_782"), val = tensor([0, 1, 3, 2])]; + tensor var_784 = const()[name = string("op_784"), val = tensor([1, 8, 128, 256])]; + tensor var_785 = reshape(shape = var_784, x = key_states_19)[name = string("op_785")]; + tensor var_786 = const()[name = string("op_786"), val = tensor([0, 1, 3, 2])]; + tensor var_788 = const()[name = string("op_788"), val = tensor([1, 8, 128, 256])]; + tensor var_789 = reshape(shape = var_788, x = value_states_19)[name = string("op_789")]; + tensor var_790 = const()[name = string("op_790"), val = tensor([0, 1, 3, 2])]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 32, 256, 64])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_85 = transpose(perm = var_782, x = var_781)[name = string("transpose_5")]; + tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = x_85)[name = string("x1_13")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 32, 256, 128])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = x_85)[name = string("x2_13")]; + tensor var_808 = mul(x = x1_13, y = cos_7)[name = string("op_808")]; + tensor var_809 = mul(x = x2_13, y = sin_7)[name = string("op_809")]; + tensor var_810 = sub(x = var_808, y = var_809)[name = string("op_810")]; + tensor var_811 = mul(x = x2_13, y = cos_7)[name = string("op_811")]; + tensor var_812 = mul(x = x1_13, y = sin_7)[name = string("op_812")]; + tensor var_813 = add(x = var_811, y = var_812)[name = string("op_813")]; + bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)]; + tensor rotated_13 = concat(axis = var_30, interleave = rotated_13_interleave_0, values = (var_810, var_813))[name = string("rotated_13")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 256, 64])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_89 = transpose(perm = var_786, x = var_785)[name = string("transpose_4")]; + tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = x_89)[name = string("x1")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 256, 128])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = x_89)[name = string("x2")]; + tensor var_829 = mul(x = x1, y = cos_7)[name = string("op_829")]; + tensor var_830 = mul(x = x2, y = sin_7)[name = string("op_830")]; + tensor var_831 = sub(x = var_829, y = var_830)[name = string("op_831")]; + tensor var_832 = mul(x = x2, y = cos_7)[name = string("op_832")]; + tensor var_833 = mul(x = x1, y = sin_7)[name = string("op_833")]; + tensor var_834 = add(x = var_832, y = var_833)[name = string("op_834")]; + bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)]; + tensor rotated = concat(axis = var_30, interleave = rotated_interleave_0, values = (var_831, var_834))[name = string("rotated")]; + tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; + tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; + tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; + tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; + int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)]; + bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)]; + tensor concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_56")]; + tensor concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor([0])]; + tensor concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor([0])]; + int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; + bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; + tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (expand_dims_40, concat_57_values1_0, var_252, concat_57_values3_0))[name = string("concat_57")]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated, x = coreml_update_state_13)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_14_write_state")]; + tensor coreml_update_state_14 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_14")]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([35])]; + tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; + tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; + tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([36])]; + int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; + bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; + tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_60")]; + tensor concat_61_values1_0 = const()[name = string("concat_61_values1_0"), val = tensor([0])]; + tensor concat_61_values3_0 = const()[name = string("concat_61_values3_0"), val = tensor([0])]; + int32 concat_61_axis_0 = const()[name = string("concat_61_axis_0"), val = int32(0)]; + bool concat_61_interleave_0 = const()[name = string("concat_61_interleave_0"), val = bool(false)]; + tensor concat_61 = concat(axis = concat_61_axis_0, interleave = concat_61_interleave_0, values = (expand_dims_46, concat_61_values1_0, var_252, concat_61_values3_0))[name = string("concat_61")]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_21 = transpose(perm = var_790, x = var_789)[name = string("transpose_3")]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_60, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_61, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = value_states_21, x = coreml_update_state_14)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_15_write_state")]; + tensor coreml_update_state_15 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_15")]; + tensor var_857_begin_0 = const()[name = string("op_857_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_857_end_0 = const()[name = string("op_857_end_0"), val = tensor([4, 8, 1024, 128])]; + tensor var_857_end_mask_0 = const()[name = string("op_857_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_857_cast_fp16 = slice_by_index(begin = var_857_begin_0, end = var_857_end_0, end_mask = var_857_end_mask_0, x = coreml_update_state_15)[name = string("op_857_cast_fp16")]; + tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; + tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_857_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; + tensor var_859_begin_0 = const()[name = string("op_859_begin_0"), val = tensor([35, 0, 0, 0])]; + tensor var_859_end_0 = const()[name = string("op_859_end_0"), val = tensor([36, 8, 1024, 128])]; + tensor var_859_end_mask_0 = const()[name = string("op_859_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_859_cast_fp16 = slice_by_index(begin = var_859_begin_0, end = var_859_end_0, end_mask = var_859_end_mask_0, x = coreml_update_state_15)[name = string("op_859_cast_fp16")]; + tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; + tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_859_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; + tensor x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor([1])]; + tensor x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_95_cast_fp16")]; + tensor var_868 = const()[name = string("op_868"), val = tensor([1, 4, 1, 1])]; + tensor x_97_cast_fp16 = tile(reps = var_868, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")]; + tensor var_872 = const()[name = string("op_872"), val = tensor([1, -1, 1024, 128])]; + tensor var_873_cast_fp16 = reshape(shape = var_872, x = x_97_cast_fp16)[name = string("op_873_cast_fp16")]; + tensor x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor([1])]; + tensor x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_875 = const()[name = string("op_875"), val = tensor([1, 4, 1, 1])]; + tensor x_103_cast_fp16 = tile(reps = var_875, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")]; + bool var_882_transpose_x_0 = const()[name = string("op_882_transpose_x_0"), val = bool(false)]; + bool var_882_transpose_y_0 = const()[name = string("op_882_transpose_y_0"), val = bool(true)]; + tensor var_882_cast_fp16 = matmul(transpose_x = var_882_transpose_x_0, transpose_y = var_882_transpose_y_0, x = rotated_13, y = var_873_cast_fp16)[name = string("op_882_cast_fp16")]; + fp16 var_883_to_fp16 = const()[name = string("op_883_to_fp16"), val = fp16(0x1.6ap-4)]; + tensor attn_weights_cast_fp16 = mul(x = var_882_cast_fp16, y = var_883_to_fp16)[name = string("attn_weights_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = attn_weights_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; + tensor x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")]; + tensor exp_x_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_cast_fp16")]; + tensor var_894_axes_0 = const()[name = string("op_894_axes_0"), val = tensor([-1])]; + bool var_894_keep_dims_0 = const()[name = string("op_894_keep_dims_0"), val = bool(true)]; + tensor var_894_cast_fp16 = reduce_sum(axes = var_894_axes_0, keep_dims = var_894_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_894_cast_fp16")]; + tensor var_895_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_894_cast_fp16)[name = string("op_895_cast_fp16")]; + tensor concat_66 = const()[name = string("concat_66"), val = tensor([32, 256, 1024])]; + tensor reshape_9_cast_fp16 = reshape(shape = concat_66, x = var_895_cast_fp16)[name = string("reshape_9_cast_fp16")]; + tensor concat_67 = const()[name = string("concat_67"), val = tensor([32, 1024, 128])]; + tensor reshape_10_cast_fp16 = reshape(shape = concat_67, x = x_103_cast_fp16)[name = string("reshape_10_cast_fp16")]; + bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)]; + bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(false)]; + tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_9_cast_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")]; + tensor concat_71 = const()[name = string("concat_71"), val = tensor([1, 32, 256, 128])]; + tensor reshape_11_cast_fp16 = reshape(shape = concat_71, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")]; + tensor var_898_perm_0 = const()[name = string("op_898_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_900 = const()[name = string("op_900"), val = tensor([1, 256, 4096])]; + tensor var_898_cast_fp16 = transpose(perm = var_898_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_2")]; + tensor input_47_cast_fp16 = reshape(shape = var_900, x = var_898_cast_fp16)[name = string("input_47_cast_fp16")]; + tensor model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711594048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(724177024))))[name = string("model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; + bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; + tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_cast_fp16")]; + tensor input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_cast_fp16)[name = string("input_49_cast_fp16")]; + tensor var_911_axes_0 = const()[name = string("op_911_axes_0"), val = tensor([-1])]; + tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(724242624)))]; + tensor var_911_cast_fp16 = layer_norm(axes = var_911_axes_0, epsilon = var_32_to_fp16, gamma = model_model_layers_3_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_911_cast_fp16")]; + tensor var_918 = const()[name = string("op_918"), val = tensor([0, 2, 1])]; + tensor input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor([2])]; + tensor var_919 = transpose(perm = var_918, x = var_911_cast_fp16)[name = string("transpose_1")]; + tensor input_51 = expand_dims(axes = input_51_axes_0, x = var_919)[name = string("input_51")]; + string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")]; + tensor input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor([1, 1])]; + tensor input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor([1, 1])]; + int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)]; + tensor input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")]; + string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")]; + tensor up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor([1, 1])]; + tensor up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor([1, 1])]; + int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)]; + tensor up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states")]; + tensor gate_states = silu(x = input_53)[name = string("gate_states")]; + tensor input = mul(x = gate_states, y = up_states)[name = string("input")]; + string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; + tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; + tensor hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")]; + tensor var_941_axes_0 = const()[name = string("op_941_axes_0"), val = tensor([2])]; + tensor var_941 = squeeze(axes = var_941_axes_0, x = hidden_states_1)[name = string("op_941")]; + tensor var_942 = const()[name = string("op_942"), val = tensor([0, 2, 1])]; + tensor var_943 = transpose(perm = var_942, x = var_941)[name = string("transpose_0")]; + tensor output_hidden_states = add(x = hidden_states_29_cast_fp16, y = var_943)[name = string("op_944_cast_fp16")]; + } -> (output_hidden_states); +} \ No newline at end of file