program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}})] { func main(tensor cache_length, tensor decoder_key_padding_mask, state> encoder_attn_key_cache, state> encoder_attn_key_padding_mask, state> encoder_attn_value_cache, tensor input_ids, tensor kv_cache_update_mask, state> self_attn_key_cache, state> self_attn_value_cache) { int32 var_42_axis_0 = const()[name = string("op_42_axis_0"), val = int32(0)]; int32 var_42_batch_dims_0 = const()[name = string("op_42_batch_dims_0"), val = int32(0)]; bool var_42_validate_indices_0 = const()[name = string("op_42_validate_indices_0"), val = bool(false)]; tensor embed_tokens_weight_to_fp16 = const()[name = string("embed_tokens_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; tensor var_42_cast_fp16 = gather(axis = var_42_axis_0, batch_dims = var_42_batch_dims_0, indices = input_ids, validate_indices = var_42_validate_indices_0, x = embed_tokens_weight_to_fp16)[name = string("op_42_cast_fp16")]; int32 var_49_axis_0 = const()[name = string("op_49_axis_0"), val = int32(0)]; int32 var_49_batch_dims_0 = const()[name = string("op_49_batch_dims_0"), val = int32(0)]; bool var_49_validate_indices_0 = const()[name = string("op_49_validate_indices_0"), val = bool(false)]; tensor embed_positions_inlier_module_weight_to_fp16 = const()[name = string("embed_positions_inlier_module_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79663232)))]; string cache_length_to_uint16_dtype_0 = const()[name = string("cache_length_to_uint16_dtype_0"), val = string("uint16")]; tensor cache_length_to_uint16 = cast(dtype = cache_length_to_uint16_dtype_0, x = cache_length)[name = string("cast_0")]; tensor var_49_cast_fp16_cast_uint16 = gather(axis = var_49_axis_0, batch_dims = var_49_batch_dims_0, indices = cache_length_to_uint16, validate_indices = var_49_validate_indices_0, x = embed_positions_inlier_module_weight_to_fp16)[name = string("op_49_cast_fp16_cast_uint16")]; int32 var_51_axis_0 = const()[name = string("op_51_axis_0"), val = int32(0)]; int32 var_51_batch_dims_0 = const()[name = string("op_51_batch_dims_0"), val = int32(0)]; bool var_51_validate_indices_0 = const()[name = string("op_51_validate_indices_0"), val = bool(false)]; tensor embed_positions_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80356800))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80351424))))[name = string("embed_positions_outlier_module_weight_to_fp16_sparsified")]; tensor var_51_cast_fp16_cast_uint16 = gather(axis = var_51_axis_0, batch_dims = var_51_batch_dims_0, indices = cache_length_to_uint16, validate_indices = var_51_validate_indices_0, x = embed_positions_outlier_module_weight_to_fp16_sparsified)[name = string("op_51_cast_fp16_cast_uint16")]; tensor var_52_cast_fp16 = add(x = var_49_cast_fp16_cast_uint16, y = var_51_cast_fp16_cast_uint16)[name = string("op_52_cast_fp16")]; tensor hidden_states_1_cast_fp16 = add(x = var_42_cast_fp16, y = var_52_cast_fp16)[name = string("hidden_states_1_cast_fp16")]; tensor var_66_axes_0 = const()[name = string("op_66_axes_0"), val = tensor([2])]; tensor var_66_cast_fp16 = expand_dims(axes = var_66_axes_0, x = hidden_states_1_cast_fp16)[name = string("op_66_cast_fp16")]; tensor inputs_1_axes_0 = const()[name = string("inputs_1_axes_0"), val = tensor([3])]; tensor inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_66_cast_fp16)[name = string("inputs_1_cast_fp16")]; tensor read_state_0 = read_state(input = self_attn_key_cache)[name = string("read_state_0")]; tensor tile_0 = const()[name = string("tile_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80399872)))]; int32 var_71_axis_0 = const()[name = string("op_71_axis_0"), val = int32(0)]; tensor var_71_cast_fp16_0, tensor var_71_cast_fp16_1, tensor var_71_cast_fp16_2, tensor var_71_cast_fp16_3, tensor var_71_cast_fp16_4, tensor var_71_cast_fp16_5, tensor var_71_cast_fp16_6, tensor var_71_cast_fp16_7, tensor var_71_cast_fp16_8, tensor var_71_cast_fp16_9, tensor var_71_cast_fp16_10, tensor var_71_cast_fp16_11 = split(axis = var_71_axis_0, split_sizes = tile_0, x = read_state_0)[name = string("op_71_cast_fp16")]; tensor read_state_1 = read_state(input = self_attn_value_cache)[name = string("read_state_1")]; tensor tile_1 = const()[name = string("tile_1"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80400000)))]; int32 var_86_axis_0 = const()[name = string("op_86_axis_0"), val = int32(0)]; tensor var_86_cast_fp16_0, tensor var_86_cast_fp16_1, tensor var_86_cast_fp16_2, tensor var_86_cast_fp16_3, tensor var_86_cast_fp16_4, tensor var_86_cast_fp16_5, tensor var_86_cast_fp16_6, tensor var_86_cast_fp16_7, tensor var_86_cast_fp16_8, tensor var_86_cast_fp16_9, tensor var_86_cast_fp16_10, tensor var_86_cast_fp16_11 = split(axis = var_86_axis_0, split_sizes = tile_1, x = read_state_1)[name = string("op_86_cast_fp16")]; tensor read_state_2 = read_state(input = encoder_attn_key_cache)[name = string("read_state_2")]; tensor obj_17_begin_0 = const()[name = string("obj_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor obj_17_end_0 = const()[name = string("obj_17_end_0"), val = tensor([1, 768, 1, 1536])]; tensor obj_17_end_mask_0 = const()[name = string("obj_17_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_17_cast_fp16 = slice_by_index(begin = obj_17_begin_0, end = obj_17_end_0, end_mask = obj_17_end_mask_0, x = read_state_2)[name = string("obj_17_cast_fp16")]; tensor read_state_3 = read_state(input = encoder_attn_value_cache)[name = string("read_state_3")]; tensor obj_19_begin_0 = const()[name = string("obj_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor obj_19_end_0 = const()[name = string("obj_19_end_0"), val = tensor([1, 768, 1, 1536])]; tensor obj_19_end_mask_0 = const()[name = string("obj_19_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_19_cast_fp16 = slice_by_index(begin = obj_19_begin_0, end = obj_19_end_0, end_mask = obj_19_end_mask_0, x = read_state_3)[name = string("obj_19_cast_fp16")]; int32 var_114 = const()[name = string("op_114"), val = int32(3)]; tensor out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor([1])]; fp16 var_139_to_fp16 = const()[name = string("op_139_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_139_to_fp16, x = inputs_1_cast_fp16)[name = string("out_1_cast_fp16")]; tensor obj_5_mean_0_to_fp16 = const()[name = string("obj_5_mean_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80400128)))]; tensor obj_5_variance_0_to_fp16 = const()[name = string("obj_5_variance_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80401728)))]; tensor obj_5_gamma_0_to_fp16 = const()[name = string("obj_5_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80403328)))]; tensor obj_5_beta_0_to_fp16 = const()[name = string("obj_5_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80404928)))]; fp16 obj_5_epsilon_0_to_fp16 = const()[name = string("obj_5_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_1_cast_fp16)[name = string("obj_5_cast_fp16")]; string var_161_pad_type_0 = const()[name = string("op_161_pad_type_0"), val = string("valid")]; tensor var_161_strides_0 = const()[name = string("op_161_strides_0"), val = tensor([1, 1])]; tensor var_161_pad_0 = const()[name = string("op_161_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_161_dilations_0 = const()[name = string("op_161_dilations_0"), val = tensor([1, 1])]; int32 var_161_groups_0 = const()[name = string("op_161_groups_0"), val = int32(1)]; tensor layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80406528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80701504))))[name = string("layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_0_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80701632)))]; tensor var_161_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_161_dilations_0, groups = var_161_groups_0, pad = var_161_pad_0, pad_type = var_161_pad_type_0, strides = var_161_strides_0, weight = layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_161_cast_fp16")]; string var_167_pad_type_0 = const()[name = string("op_167_pad_type_0"), val = string("valid")]; tensor var_167_strides_0 = const()[name = string("op_167_strides_0"), val = tensor([1, 1])]; tensor var_167_pad_0 = const()[name = string("op_167_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_167_dilations_0 = const()[name = string("op_167_dilations_0"), val = tensor([1, 1])]; int32 var_167_groups_0 = const()[name = string("op_167_groups_0"), val = int32(1)]; tensor layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80717440))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80703232))))[name = string("layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_167_cast_fp16 = conv(dilations = var_167_dilations_0, groups = var_167_groups_0, pad = var_167_pad_0, pad_type = var_167_pad_type_0, strides = var_167_strides_0, weight = layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_167_cast_fp16")]; tensor query_1_cast_fp16 = add(x = var_161_cast_fp16, y = var_167_cast_fp16)[name = string("query_1_cast_fp16")]; string var_176_pad_type_0 = const()[name = string("op_176_pad_type_0"), val = string("valid")]; tensor var_176_strides_0 = const()[name = string("op_176_strides_0"), val = tensor([1, 1])]; tensor var_176_pad_0 = const()[name = string("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_176_dilations_0 = const()[name = string("op_176_dilations_0"), val = tensor([1, 1])]; int32 var_176_groups_0 = const()[name = string("op_176_groups_0"), val = int32(1)]; tensor layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80791232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81086208))))[name = string("layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; tensor var_176_cast_fp16 = conv(dilations = var_176_dilations_0, groups = var_176_groups_0, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_176_strides_0, weight = layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_176_cast_fp16")]; string var_182_pad_type_0 = const()[name = string("op_182_pad_type_0"), val = string("valid")]; tensor var_182_strides_0 = const()[name = string("op_182_strides_0"), val = tensor([1, 1])]; tensor var_182_pad_0 = const()[name = string("op_182_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_182_dilations_0 = const()[name = string("op_182_dilations_0"), val = tensor([1, 1])]; int32 var_182_groups_0 = const()[name = string("op_182_groups_0"), val = int32(1)]; tensor layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81101248))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81086336))))[name = string("layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_182_cast_fp16 = conv(dilations = var_182_dilations_0, groups = var_182_groups_0, pad = var_182_pad_0, pad_type = var_182_pad_type_0, strides = var_182_strides_0, weight = layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_182_cast_fp16")]; tensor current_key_1_cast_fp16 = add(x = var_176_cast_fp16, y = var_182_cast_fp16)[name = string("current_key_1_cast_fp16")]; string var_192_pad_type_0 = const()[name = string("op_192_pad_type_0"), val = string("valid")]; tensor var_192_strides_0 = const()[name = string("op_192_strides_0"), val = tensor([1, 1])]; tensor var_192_pad_0 = const()[name = string("op_192_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_192_dilations_0 = const()[name = string("op_192_dilations_0"), val = tensor([1, 1])]; int32 var_192_groups_0 = const()[name = string("op_192_groups_0"), val = int32(1)]; tensor layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81175040))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81470016))))[name = string("layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_0_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81470144)))]; tensor var_192_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_192_dilations_0, groups = var_192_groups_0, pad = var_192_pad_0, pad_type = var_192_pad_type_0, strides = var_192_strides_0, weight = layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_192_cast_fp16")]; string var_198_pad_type_0 = const()[name = string("op_198_pad_type_0"), val = string("valid")]; tensor var_198_strides_0 = const()[name = string("op_198_strides_0"), val = tensor([1, 1])]; tensor var_198_pad_0 = const()[name = string("op_198_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_198_dilations_0 = const()[name = string("op_198_dilations_0"), val = tensor([1, 1])]; int32 var_198_groups_0 = const()[name = string("op_198_groups_0"), val = int32(1)]; tensor layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81480256))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81471744))))[name = string("layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_198_cast_fp16 = conv(dilations = var_198_dilations_0, groups = var_198_groups_0, pad = var_198_pad_0, pad_type = var_198_pad_type_0, strides = var_198_strides_0, weight = layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_198_cast_fp16")]; tensor current_value_1_cast_fp16 = add(x = var_192_cast_fp16, y = var_198_cast_fp16)[name = string("current_value_1_cast_fp16")]; tensor var_201_axes_0 = const()[name = string("op_201_axes_0"), val = tensor([1])]; tensor var_201_cast_fp16 = expand_dims(axes = var_201_axes_0, x = kv_cache_update_mask)[name = string("op_201_cast_fp16")]; tensor var_202_axes_0 = const()[name = string("op_202_axes_0"), val = tensor([2])]; tensor var_202_cast_fp16 = expand_dims(axes = var_202_axes_0, x = var_201_cast_fp16)[name = string("op_202_cast_fp16")]; tensor var_204_cast_fp16 = mul(x = current_key_1_cast_fp16, y = var_202_cast_fp16)[name = string("op_204_cast_fp16")]; tensor key_1_cast_fp16 = add(x = var_71_cast_fp16_0, y = var_204_cast_fp16)[name = string("key_1_cast_fp16")]; tensor var_206_cast_fp16 = mul(x = current_value_1_cast_fp16, y = var_202_cast_fp16)[name = string("op_206_cast_fp16")]; tensor value_1_cast_fp16 = add(x = var_86_cast_fp16_0, y = var_206_cast_fp16)[name = string("value_1_cast_fp16")]; tensor var_209 = const()[name = string("op_209"), val = tensor([1, 12, 64, -1])]; tensor mh_q_1_cast_fp16 = reshape(shape = var_209, x = query_1_cast_fp16)[name = string("mh_q_1_cast_fp16")]; fp16 var_211_to_fp16 = const()[name = string("op_211_to_fp16"), val = fp16(0x1p-3)]; tensor var_212_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_211_to_fp16)[name = string("op_212_cast_fp16")]; tensor var_213 = const()[name = string("op_213"), val = tensor([1, 12, 64, -1])]; tensor var_214_cast_fp16 = reshape(shape = var_213, x = key_1_cast_fp16)[name = string("op_214_cast_fp16")]; bool mh_w_1_transpose_x_0 = const()[name = string("mh_w_1_transpose_x_0"), val = bool(true)]; bool mh_w_1_transpose_y_0 = const()[name = string("mh_w_1_transpose_y_0"), val = bool(false)]; tensor mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_212_cast_fp16, y = var_214_cast_fp16)[name = string("mh_w_1_cast_fp16")]; tensor var_218_axes_0 = const()[name = string("op_218_axes_0"), val = tensor([1])]; tensor var_218_cast_fp16 = expand_dims(axes = var_218_axes_0, x = decoder_key_padding_mask)[name = string("op_218_cast_fp16")]; tensor var_219_axes_0 = const()[name = string("op_219_axes_0"), val = tensor([2])]; tensor var_219_cast_fp16 = expand_dims(axes = var_219_axes_0, x = var_218_cast_fp16)[name = string("op_219_cast_fp16")]; tensor mh_w_3_cast_fp16 = add(x = mh_w_1_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_3_cast_fp16")]; tensor var_222_cast_fp16 = softmax(axis = var_114, x = mh_w_3_cast_fp16)[name = string("op_222_cast_fp16")]; tensor var_223 = const()[name = string("op_223"), val = tensor([1, 12, 64, -1])]; tensor var_224_cast_fp16 = reshape(shape = var_223, x = value_1_cast_fp16)[name = string("op_224_cast_fp16")]; bool attn_1_transpose_x_0 = const()[name = string("attn_1_transpose_x_0"), val = bool(false)]; bool attn_1_transpose_y_0 = const()[name = string("attn_1_transpose_y_0"), val = bool(true)]; tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_224_cast_fp16, y = var_222_cast_fp16)[name = string("attn_1_cast_fp16")]; tensor var_227 = const()[name = string("op_227"), val = tensor([1, 768, 1, -1])]; tensor input_1_cast_fp16 = reshape(shape = var_227, x = attn_1_cast_fp16)[name = string("input_1_cast_fp16")]; string var_237_pad_type_0 = const()[name = string("op_237_pad_type_0"), val = string("valid")]; tensor var_237_strides_0 = const()[name = string("op_237_strides_0"), val = tensor([1, 1])]; tensor var_237_pad_0 = const()[name = string("op_237_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_237_dilations_0 = const()[name = string("op_237_dilations_0"), val = tensor([1, 1])]; int32 var_237_groups_0 = const()[name = string("op_237_groups_0"), val = int32(1)]; tensor layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81554048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81849024))))[name = string("layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_0_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81849152)))]; tensor var_237_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_237_dilations_0, groups = var_237_groups_0, pad = var_237_pad_0, pad_type = var_237_pad_type_0, strides = var_237_strides_0, weight = layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("op_237_cast_fp16")]; string var_243_pad_type_0 = const()[name = string("op_243_pad_type_0"), val = string("valid")]; tensor var_243_strides_0 = const()[name = string("op_243_strides_0"), val = tensor([1, 1])]; tensor var_243_pad_0 = const()[name = string("op_243_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_243_dilations_0 = const()[name = string("op_243_dilations_0"), val = tensor([1, 1])]; int32 var_243_groups_0 = const()[name = string("op_243_groups_0"), val = int32(1)]; tensor layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81860992))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81850752))))[name = string("layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_243_cast_fp16 = conv(dilations = var_243_dilations_0, groups = var_243_groups_0, pad = var_243_pad_0, pad_type = var_243_pad_type_0, strides = var_243_strides_0, weight = layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_1_cast_fp16)[name = string("op_243_cast_fp16")]; tensor obj_11_cast_fp16 = add(x = var_237_cast_fp16, y = var_243_cast_fp16)[name = string("obj_11_cast_fp16")]; tensor inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_11_cast_fp16)[name = string("inputs_3_cast_fp16")]; tensor out_3_axes_0 = const()[name = string("out_3_axes_0"), val = tensor([1])]; fp16 var_258_to_fp16 = const()[name = string("op_258_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_258_to_fp16, x = inputs_3_cast_fp16)[name = string("out_3_cast_fp16")]; tensor obj_13_gamma_0_to_fp16 = const()[name = string("obj_13_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81934784)))]; tensor obj_13_beta_0_to_fp16 = const()[name = string("obj_13_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81936384)))]; fp16 obj_13_epsilon_0_to_fp16 = const()[name = string("obj_13_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_3_cast_fp16)[name = string("obj_13_cast_fp16")]; string var_278_pad_type_0 = const()[name = string("op_278_pad_type_0"), val = string("valid")]; tensor var_278_strides_0 = const()[name = string("op_278_strides_0"), val = tensor([1, 1])]; tensor var_278_pad_0 = const()[name = string("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_278_dilations_0 = const()[name = string("op_278_dilations_0"), val = tensor([1, 1])]; int32 var_278_groups_0 = const()[name = string("op_278_groups_0"), val = int32(1)]; tensor layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81937984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82232960))))[name = string("layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82233088)))]; tensor var_278_cast_fp16 = conv(bias = layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_278_dilations_0, groups = var_278_groups_0, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_278_strides_0, weight = layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_278_cast_fp16")]; string var_284_pad_type_0 = const()[name = string("op_284_pad_type_0"), val = string("valid")]; tensor var_284_strides_0 = const()[name = string("op_284_strides_0"), val = tensor([1, 1])]; tensor var_284_pad_0 = const()[name = string("op_284_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_284_dilations_0 = const()[name = string("op_284_dilations_0"), val = tensor([1, 1])]; int32 var_284_groups_0 = const()[name = string("op_284_groups_0"), val = int32(1)]; tensor layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82258496))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82234688))))[name = string("layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_284_cast_fp16 = conv(dilations = var_284_dilations_0, groups = var_284_groups_0, pad = var_284_pad_0, pad_type = var_284_pad_type_0, strides = var_284_strides_0, weight = layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_284_cast_fp16")]; tensor query_3_cast_fp16 = add(x = var_278_cast_fp16, y = var_284_cast_fp16)[name = string("query_3_cast_fp16")]; tensor var_287 = const()[name = string("op_287"), val = tensor([1, 12, 64, -1])]; tensor mh_q_3_cast_fp16 = reshape(shape = var_287, x = query_3_cast_fp16)[name = string("mh_q_3_cast_fp16")]; fp16 var_289_to_fp16 = const()[name = string("op_289_to_fp16"), val = fp16(0x1p-3)]; tensor var_290_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_289_to_fp16)[name = string("op_290_cast_fp16")]; tensor var_291 = const()[name = string("op_291"), val = tensor([1, 12, 64, -1])]; tensor var_292_cast_fp16 = reshape(shape = var_291, x = obj_17_cast_fp16)[name = string("op_292_cast_fp16")]; bool mh_w_5_transpose_x_0 = const()[name = string("mh_w_5_transpose_x_0"), val = bool(true)]; bool mh_w_5_transpose_y_0 = const()[name = string("mh_w_5_transpose_y_0"), val = bool(false)]; tensor mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_290_cast_fp16, y = var_292_cast_fp16)[name = string("mh_w_5_cast_fp16")]; tensor read_state_4 = read_state(input = encoder_attn_key_padding_mask)[name = string("read_state_4")]; tensor var_296_axes_0 = const()[name = string("op_296_axes_0"), val = tensor([1])]; tensor var_296_cast_fp16 = expand_dims(axes = var_296_axes_0, x = read_state_4)[name = string("op_296_cast_fp16")]; tensor var_297_axes_0 = const()[name = string("op_297_axes_0"), val = tensor([2])]; tensor var_297_cast_fp16 = expand_dims(axes = var_297_axes_0, x = var_296_cast_fp16)[name = string("op_297_cast_fp16")]; tensor mh_w_7_cast_fp16 = add(x = mh_w_5_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_7_cast_fp16")]; tensor obj_23_cast_fp16 = softmax(axis = var_114, x = mh_w_7_cast_fp16)[name = string("obj_23_cast_fp16")]; tensor var_301 = const()[name = string("op_301"), val = tensor([1, 12, 64, -1])]; tensor var_302_cast_fp16 = reshape(shape = var_301, x = obj_19_cast_fp16)[name = string("op_302_cast_fp16")]; bool attn_3_transpose_x_0 = const()[name = string("attn_3_transpose_x_0"), val = bool(false)]; bool attn_3_transpose_y_0 = const()[name = string("attn_3_transpose_y_0"), val = bool(true)]; tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_302_cast_fp16, y = obj_23_cast_fp16)[name = string("attn_3_cast_fp16")]; tensor var_305 = const()[name = string("op_305"), val = tensor([1, 768, 1, -1])]; tensor input_3_cast_fp16 = reshape(shape = var_305, x = attn_3_cast_fp16)[name = string("input_3_cast_fp16")]; string var_315_pad_type_0 = const()[name = string("op_315_pad_type_0"), val = string("valid")]; tensor var_315_strides_0 = const()[name = string("op_315_strides_0"), val = tensor([1, 1])]; tensor var_315_pad_0 = const()[name = string("op_315_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_315_dilations_0 = const()[name = string("op_315_dilations_0"), val = tensor([1, 1])]; int32 var_315_groups_0 = const()[name = string("op_315_groups_0"), val = int32(1)]; tensor layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82332288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82627264))))[name = string("layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82627392)))]; tensor var_315_cast_fp16 = conv(bias = layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_315_dilations_0, groups = var_315_groups_0, pad = var_315_pad_0, pad_type = var_315_pad_type_0, strides = var_315_strides_0, weight = layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("op_315_cast_fp16")]; string var_321_pad_type_0 = const()[name = string("op_321_pad_type_0"), val = string("valid")]; tensor var_321_strides_0 = const()[name = string("op_321_strides_0"), val = tensor([1, 1])]; tensor var_321_pad_0 = const()[name = string("op_321_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_321_dilations_0 = const()[name = string("op_321_dilations_0"), val = tensor([1, 1])]; int32 var_321_groups_0 = const()[name = string("op_321_groups_0"), val = int32(1)]; tensor layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82636736))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82628992))))[name = string("layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_321_cast_fp16 = conv(dilations = var_321_dilations_0, groups = var_321_groups_0, pad = var_321_pad_0, pad_type = var_321_pad_type_0, strides = var_321_strides_0, weight = layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_3_cast_fp16)[name = string("op_321_cast_fp16")]; tensor obj_21_cast_fp16 = add(x = var_315_cast_fp16, y = var_321_cast_fp16)[name = string("obj_21_cast_fp16")]; tensor inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = obj_21_cast_fp16)[name = string("inputs_5_cast_fp16")]; tensor out_5_axes_0 = const()[name = string("out_5_axes_0"), val = tensor([1])]; fp16 var_332_to_fp16 = const()[name = string("op_332_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_332_to_fp16, x = inputs_5_cast_fp16)[name = string("out_5_cast_fp16")]; tensor input_5_gamma_0_to_fp16 = const()[name = string("input_5_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82710528)))]; tensor input_5_beta_0_to_fp16 = const()[name = string("input_5_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82712128)))]; fp16 input_5_epsilon_0_to_fp16 = const()[name = string("input_5_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_5_cast_fp16 = batch_norm(beta = input_5_beta_0_to_fp16, epsilon = input_5_epsilon_0_to_fp16, gamma = input_5_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_5_cast_fp16)[name = string("input_5_cast_fp16")]; string var_350_pad_type_0 = const()[name = string("op_350_pad_type_0"), val = string("valid")]; tensor var_350_strides_0 = const()[name = string("op_350_strides_0"), val = tensor([1, 1])]; tensor var_350_pad_0 = const()[name = string("op_350_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_350_dilations_0 = const()[name = string("op_350_dilations_0"), val = tensor([1, 1])]; int32 var_350_groups_0 = const()[name = string("op_350_groups_0"), val = int32(1)]; tensor layers_0_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82713728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83893440))))[name = string("layers_0_fc1_inlier_module_weight_to_fp16_palettized")]; tensor layers_0_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83893568)))]; tensor var_350_cast_fp16 = conv(bias = layers_0_fc1_inlier_module_bias_to_fp16, dilations = var_350_dilations_0, groups = var_350_groups_0, pad = var_350_pad_0, pad_type = var_350_pad_type_0, strides = var_350_strides_0, weight = layers_0_fc1_inlier_module_weight_to_fp16_palettized, x = input_5_cast_fp16)[name = string("op_350_cast_fp16")]; string var_356_pad_type_0 = const()[name = string("op_356_pad_type_0"), val = string("valid")]; tensor var_356_strides_0 = const()[name = string("op_356_strides_0"), val = tensor([1, 1])]; tensor var_356_pad_0 = const()[name = string("op_356_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_356_dilations_0 = const()[name = string("op_356_dilations_0"), val = tensor([1, 1])]; int32 var_356_groups_0 = const()[name = string("op_356_groups_0"), val = int32(1)]; tensor layers_0_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83956160))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83899776))))[name = string("layers_0_fc1_outlier_module_weight_to_fp16_sparsified")]; tensor var_356_cast_fp16 = conv(dilations = var_356_dilations_0, groups = var_356_groups_0, pad = var_356_pad_0, pad_type = var_356_pad_type_0, strides = var_356_strides_0, weight = layers_0_fc1_outlier_module_weight_to_fp16_sparsified, x = input_5_cast_fp16)[name = string("op_356_cast_fp16")]; tensor input_7_cast_fp16 = add(x = var_350_cast_fp16, y = var_356_cast_fp16)[name = string("input_7_cast_fp16")]; string input_9_mode_0 = const()[name = string("input_9_mode_0"), val = string("EXACT")]; tensor input_9_cast_fp16 = gelu(mode = input_9_mode_0, x = input_7_cast_fp16)[name = string("input_9_cast_fp16")]; string var_367_pad_type_0 = const()[name = string("op_367_pad_type_0"), val = string("valid")]; tensor var_367_strides_0 = const()[name = string("op_367_strides_0"), val = tensor([1, 1])]; tensor var_367_pad_0 = const()[name = string("op_367_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_367_dilations_0 = const()[name = string("op_367_dilations_0"), val = tensor([1, 1])]; int32 var_367_groups_0 = const()[name = string("op_367_groups_0"), val = int32(1)]; tensor layers_0_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84251136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85430848))))[name = string("layers_0_fc2_inlier_module_weight_to_fp16_palettized")]; tensor layers_0_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85430976)))]; tensor var_367_cast_fp16 = conv(bias = layers_0_fc2_inlier_module_bias_to_fp16, dilations = var_367_dilations_0, groups = var_367_groups_0, pad = var_367_pad_0, pad_type = var_367_pad_type_0, strides = var_367_strides_0, weight = layers_0_fc2_inlier_module_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = string("op_367_cast_fp16")]; string var_373_pad_type_0 = const()[name = string("op_373_pad_type_0"), val = string("valid")]; tensor var_373_strides_0 = const()[name = string("op_373_strides_0"), val = tensor([1, 1])]; tensor var_373_pad_0 = const()[name = string("op_373_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_373_dilations_0 = const()[name = string("op_373_dilations_0"), val = tensor([1, 1])]; int32 var_373_groups_0 = const()[name = string("op_373_groups_0"), val = int32(1)]; tensor layers_0_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85482112))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85432576))))[name = string("layers_0_fc2_outlier_module_weight_to_fp16_sparsified")]; tensor var_373_cast_fp16 = conv(dilations = var_373_dilations_0, groups = var_373_groups_0, pad = var_373_pad_0, pad_type = var_373_pad_type_0, strides = var_373_strides_0, weight = layers_0_fc2_outlier_module_weight_to_fp16_sparsified, x = input_9_cast_fp16)[name = string("op_373_cast_fp16")]; tensor hidden_states_3_cast_fp16 = add(x = var_367_cast_fp16, y = var_373_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; tensor inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = hidden_states_3_cast_fp16)[name = string("inputs_7_cast_fp16")]; tensor obj_35_begin_0 = const()[name = string("obj_35_begin_0"), val = tensor([1, 0, 0, 0])]; tensor obj_35_end_0 = const()[name = string("obj_35_end_0"), val = tensor([2, 768, 1, 1536])]; tensor obj_35_end_mask_0 = const()[name = string("obj_35_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_35_cast_fp16 = slice_by_index(begin = obj_35_begin_0, end = obj_35_end_0, end_mask = obj_35_end_mask_0, x = read_state_2)[name = string("obj_35_cast_fp16")]; tensor obj_37_begin_0 = const()[name = string("obj_37_begin_0"), val = tensor([1, 0, 0, 0])]; tensor obj_37_end_0 = const()[name = string("obj_37_end_0"), val = tensor([2, 768, 1, 1536])]; tensor obj_37_end_mask_0 = const()[name = string("obj_37_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_37_cast_fp16 = slice_by_index(begin = obj_37_begin_0, end = obj_37_end_0, end_mask = obj_37_end_mask_0, x = read_state_3)[name = string("obj_37_cast_fp16")]; int32 var_395 = const()[name = string("op_395"), val = int32(3)]; tensor out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor([1])]; fp16 var_420_to_fp16 = const()[name = string("op_420_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_420_to_fp16, x = inputs_7_cast_fp16)[name = string("out_7_cast_fp16")]; tensor obj_25_gamma_0_to_fp16 = const()[name = string("obj_25_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85777088)))]; tensor obj_25_beta_0_to_fp16 = const()[name = string("obj_25_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85778688)))]; fp16 obj_25_epsilon_0_to_fp16 = const()[name = string("obj_25_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_7_cast_fp16)[name = string("obj_25_cast_fp16")]; string var_442_pad_type_0 = const()[name = string("op_442_pad_type_0"), val = string("valid")]; tensor var_442_strides_0 = const()[name = string("op_442_strides_0"), val = tensor([1, 1])]; tensor var_442_pad_0 = const()[name = string("op_442_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_442_dilations_0 = const()[name = string("op_442_dilations_0"), val = tensor([1, 1])]; int32 var_442_groups_0 = const()[name = string("op_442_groups_0"), val = int32(1)]; tensor layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85780288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86075264))))[name = string("layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_1_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86075392)))]; tensor var_442_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_442_dilations_0, groups = var_442_groups_0, pad = var_442_pad_0, pad_type = var_442_pad_type_0, strides = var_442_strides_0, weight = layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_442_cast_fp16")]; string var_448_pad_type_0 = const()[name = string("op_448_pad_type_0"), val = string("valid")]; tensor var_448_strides_0 = const()[name = string("op_448_strides_0"), val = tensor([1, 1])]; tensor var_448_pad_0 = const()[name = string("op_448_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_448_dilations_0 = const()[name = string("op_448_dilations_0"), val = tensor([1, 1])]; int32 var_448_groups_0 = const()[name = string("op_448_groups_0"), val = int32(1)]; tensor layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86097920))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86076992))))[name = string("layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_448_cast_fp16 = conv(dilations = var_448_dilations_0, groups = var_448_groups_0, pad = var_448_pad_0, pad_type = var_448_pad_type_0, strides = var_448_strides_0, weight = layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_448_cast_fp16")]; tensor query_5_cast_fp16 = add(x = var_442_cast_fp16, y = var_448_cast_fp16)[name = string("query_5_cast_fp16")]; string var_457_pad_type_0 = const()[name = string("op_457_pad_type_0"), val = string("valid")]; tensor var_457_strides_0 = const()[name = string("op_457_strides_0"), val = tensor([1, 1])]; tensor var_457_pad_0 = const()[name = string("op_457_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_457_dilations_0 = const()[name = string("op_457_dilations_0"), val = tensor([1, 1])]; int32 var_457_groups_0 = const()[name = string("op_457_groups_0"), val = int32(1)]; tensor layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86171712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86466688))))[name = string("layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; tensor var_457_cast_fp16 = conv(dilations = var_457_dilations_0, groups = var_457_groups_0, pad = var_457_pad_0, pad_type = var_457_pad_type_0, strides = var_457_strides_0, weight = layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_457_cast_fp16")]; string var_463_pad_type_0 = const()[name = string("op_463_pad_type_0"), val = string("valid")]; tensor var_463_strides_0 = const()[name = string("op_463_strides_0"), val = tensor([1, 1])]; tensor var_463_pad_0 = const()[name = string("op_463_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_463_dilations_0 = const()[name = string("op_463_dilations_0"), val = tensor([1, 1])]; int32 var_463_groups_0 = const()[name = string("op_463_groups_0"), val = int32(1)]; tensor layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86483968))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86466816))))[name = string("layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_463_cast_fp16 = conv(dilations = var_463_dilations_0, groups = var_463_groups_0, pad = var_463_pad_0, pad_type = var_463_pad_type_0, strides = var_463_strides_0, weight = layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_463_cast_fp16")]; tensor current_key_3_cast_fp16 = add(x = var_457_cast_fp16, y = var_463_cast_fp16)[name = string("current_key_3_cast_fp16")]; string var_473_pad_type_0 = const()[name = string("op_473_pad_type_0"), val = string("valid")]; tensor var_473_strides_0 = const()[name = string("op_473_strides_0"), val = tensor([1, 1])]; tensor var_473_pad_0 = const()[name = string("op_473_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_473_dilations_0 = const()[name = string("op_473_dilations_0"), val = tensor([1, 1])]; int32 var_473_groups_0 = const()[name = string("op_473_groups_0"), val = int32(1)]; tensor layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86557760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86852736))))[name = string("layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_1_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86852864)))]; tensor var_473_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_473_dilations_0, groups = var_473_groups_0, pad = var_473_pad_0, pad_type = var_473_pad_type_0, strides = var_473_strides_0, weight = layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_473_cast_fp16")]; string var_479_pad_type_0 = const()[name = string("op_479_pad_type_0"), val = string("valid")]; tensor var_479_strides_0 = const()[name = string("op_479_strides_0"), val = tensor([1, 1])]; tensor var_479_pad_0 = const()[name = string("op_479_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_479_dilations_0 = const()[name = string("op_479_dilations_0"), val = tensor([1, 1])]; int32 var_479_groups_0 = const()[name = string("op_479_groups_0"), val = int32(1)]; tensor layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86878080))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86854464))))[name = string("layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_479_cast_fp16 = conv(dilations = var_479_dilations_0, groups = var_479_groups_0, pad = var_479_pad_0, pad_type = var_479_pad_type_0, strides = var_479_strides_0, weight = layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_479_cast_fp16")]; tensor current_value_3_cast_fp16 = add(x = var_473_cast_fp16, y = var_479_cast_fp16)[name = string("current_value_3_cast_fp16")]; tensor var_485_cast_fp16 = mul(x = current_key_3_cast_fp16, y = var_202_cast_fp16)[name = string("op_485_cast_fp16")]; tensor key_3_cast_fp16 = add(x = var_71_cast_fp16_1, y = var_485_cast_fp16)[name = string("key_3_cast_fp16")]; tensor var_487_cast_fp16 = mul(x = current_value_3_cast_fp16, y = var_202_cast_fp16)[name = string("op_487_cast_fp16")]; tensor value_3_cast_fp16 = add(x = var_86_cast_fp16_1, y = var_487_cast_fp16)[name = string("value_3_cast_fp16")]; tensor var_490 = const()[name = string("op_490"), val = tensor([1, 12, 64, -1])]; tensor mh_q_5_cast_fp16 = reshape(shape = var_490, x = query_5_cast_fp16)[name = string("mh_q_5_cast_fp16")]; fp16 var_492_to_fp16 = const()[name = string("op_492_to_fp16"), val = fp16(0x1p-3)]; tensor var_493_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_492_to_fp16)[name = string("op_493_cast_fp16")]; tensor var_494 = const()[name = string("op_494"), val = tensor([1, 12, 64, -1])]; tensor var_495_cast_fp16 = reshape(shape = var_494, x = key_3_cast_fp16)[name = string("op_495_cast_fp16")]; bool mh_w_9_transpose_x_0 = const()[name = string("mh_w_9_transpose_x_0"), val = bool(true)]; bool mh_w_9_transpose_y_0 = const()[name = string("mh_w_9_transpose_y_0"), val = bool(false)]; tensor mh_w_9_cast_fp16 = matmul(transpose_x = mh_w_9_transpose_x_0, transpose_y = mh_w_9_transpose_y_0, x = var_493_cast_fp16, y = var_495_cast_fp16)[name = string("mh_w_9_cast_fp16")]; tensor mh_w_11_cast_fp16 = add(x = mh_w_9_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_11_cast_fp16")]; tensor var_503_cast_fp16 = softmax(axis = var_395, x = mh_w_11_cast_fp16)[name = string("op_503_cast_fp16")]; tensor var_504 = const()[name = string("op_504"), val = tensor([1, 12, 64, -1])]; tensor var_505_cast_fp16 = reshape(shape = var_504, x = value_3_cast_fp16)[name = string("op_505_cast_fp16")]; bool attn_5_transpose_x_0 = const()[name = string("attn_5_transpose_x_0"), val = bool(false)]; bool attn_5_transpose_y_0 = const()[name = string("attn_5_transpose_y_0"), val = bool(true)]; tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_505_cast_fp16, y = var_503_cast_fp16)[name = string("attn_5_cast_fp16")]; tensor var_508 = const()[name = string("op_508"), val = tensor([1, 768, 1, -1])]; tensor input_11_cast_fp16 = reshape(shape = var_508, x = attn_5_cast_fp16)[name = string("input_11_cast_fp16")]; string var_518_pad_type_0 = const()[name = string("op_518_pad_type_0"), val = string("valid")]; tensor var_518_strides_0 = const()[name = string("op_518_strides_0"), val = tensor([1, 1])]; tensor var_518_pad_0 = const()[name = string("op_518_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_518_dilations_0 = const()[name = string("op_518_dilations_0"), val = tensor([1, 1])]; int32 var_518_groups_0 = const()[name = string("op_518_groups_0"), val = int32(1)]; tensor layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86951872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87246848))))[name = string("layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_1_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87246976)))]; tensor var_518_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_518_dilations_0, groups = var_518_groups_0, pad = var_518_pad_0, pad_type = var_518_pad_type_0, strides = var_518_strides_0, weight = layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("op_518_cast_fp16")]; string var_524_pad_type_0 = const()[name = string("op_524_pad_type_0"), val = string("valid")]; tensor var_524_strides_0 = const()[name = string("op_524_strides_0"), val = tensor([1, 1])]; tensor var_524_pad_0 = const()[name = string("op_524_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_524_dilations_0 = const()[name = string("op_524_dilations_0"), val = tensor([1, 1])]; int32 var_524_groups_0 = const()[name = string("op_524_groups_0"), val = int32(1)]; tensor layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87270528))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87248576))))[name = string("layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_524_cast_fp16 = conv(dilations = var_524_dilations_0, groups = var_524_groups_0, pad = var_524_pad_0, pad_type = var_524_pad_type_0, strides = var_524_strides_0, weight = layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_11_cast_fp16)[name = string("op_524_cast_fp16")]; tensor obj_31_cast_fp16 = add(x = var_518_cast_fp16, y = var_524_cast_fp16)[name = string("obj_31_cast_fp16")]; tensor inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_31_cast_fp16)[name = string("inputs_9_cast_fp16")]; tensor out_9_axes_0 = const()[name = string("out_9_axes_0"), val = tensor([1])]; fp16 var_539_to_fp16 = const()[name = string("op_539_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_539_to_fp16, x = inputs_9_cast_fp16)[name = string("out_9_cast_fp16")]; tensor obj_33_gamma_0_to_fp16 = const()[name = string("obj_33_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87344320)))]; tensor obj_33_beta_0_to_fp16 = const()[name = string("obj_33_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87345920)))]; fp16 obj_33_epsilon_0_to_fp16 = const()[name = string("obj_33_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_9_cast_fp16)[name = string("obj_33_cast_fp16")]; string var_559_pad_type_0 = const()[name = string("op_559_pad_type_0"), val = string("valid")]; tensor var_559_strides_0 = const()[name = string("op_559_strides_0"), val = tensor([1, 1])]; tensor var_559_pad_0 = const()[name = string("op_559_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_559_dilations_0 = const()[name = string("op_559_dilations_0"), val = tensor([1, 1])]; int32 var_559_groups_0 = const()[name = string("op_559_groups_0"), val = int32(1)]; tensor layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87347520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87642496))))[name = string("layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87642624)))]; tensor var_559_cast_fp16 = conv(bias = layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_559_dilations_0, groups = var_559_groups_0, pad = var_559_pad_0, pad_type = var_559_pad_type_0, strides = var_559_strides_0, weight = layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_559_cast_fp16")]; string var_565_pad_type_0 = const()[name = string("op_565_pad_type_0"), val = string("valid")]; tensor var_565_strides_0 = const()[name = string("op_565_strides_0"), val = tensor([1, 1])]; tensor var_565_pad_0 = const()[name = string("op_565_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_565_dilations_0 = const()[name = string("op_565_dilations_0"), val = tensor([1, 1])]; int32 var_565_groups_0 = const()[name = string("op_565_groups_0"), val = int32(1)]; tensor layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87655552))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87644224))))[name = string("layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_565_cast_fp16 = conv(dilations = var_565_dilations_0, groups = var_565_groups_0, pad = var_565_pad_0, pad_type = var_565_pad_type_0, strides = var_565_strides_0, weight = layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_565_cast_fp16")]; tensor query_7_cast_fp16 = add(x = var_559_cast_fp16, y = var_565_cast_fp16)[name = string("query_7_cast_fp16")]; tensor var_568 = const()[name = string("op_568"), val = tensor([1, 12, 64, -1])]; tensor mh_q_7_cast_fp16 = reshape(shape = var_568, x = query_7_cast_fp16)[name = string("mh_q_7_cast_fp16")]; fp16 var_570_to_fp16 = const()[name = string("op_570_to_fp16"), val = fp16(0x1p-3)]; tensor var_571_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_570_to_fp16)[name = string("op_571_cast_fp16")]; tensor var_572 = const()[name = string("op_572"), val = tensor([1, 12, 64, -1])]; tensor var_573_cast_fp16 = reshape(shape = var_572, x = obj_35_cast_fp16)[name = string("op_573_cast_fp16")]; bool mh_w_13_transpose_x_0 = const()[name = string("mh_w_13_transpose_x_0"), val = bool(true)]; bool mh_w_13_transpose_y_0 = const()[name = string("mh_w_13_transpose_y_0"), val = bool(false)]; tensor mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_571_cast_fp16, y = var_573_cast_fp16)[name = string("mh_w_13_cast_fp16")]; tensor mh_w_15_cast_fp16 = add(x = mh_w_13_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_15_cast_fp16")]; tensor obj_41_cast_fp16 = softmax(axis = var_395, x = mh_w_15_cast_fp16)[name = string("obj_41_cast_fp16")]; tensor var_582 = const()[name = string("op_582"), val = tensor([1, 12, 64, -1])]; tensor var_583_cast_fp16 = reshape(shape = var_582, x = obj_37_cast_fp16)[name = string("op_583_cast_fp16")]; bool attn_7_transpose_x_0 = const()[name = string("attn_7_transpose_x_0"), val = bool(false)]; bool attn_7_transpose_y_0 = const()[name = string("attn_7_transpose_y_0"), val = bool(true)]; tensor attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_583_cast_fp16, y = obj_41_cast_fp16)[name = string("attn_7_cast_fp16")]; tensor var_586 = const()[name = string("op_586"), val = tensor([1, 768, 1, -1])]; tensor input_13_cast_fp16 = reshape(shape = var_586, x = attn_7_cast_fp16)[name = string("input_13_cast_fp16")]; string var_596_pad_type_0 = const()[name = string("op_596_pad_type_0"), val = string("valid")]; tensor var_596_strides_0 = const()[name = string("op_596_strides_0"), val = tensor([1, 1])]; tensor var_596_pad_0 = const()[name = string("op_596_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_596_dilations_0 = const()[name = string("op_596_dilations_0"), val = tensor([1, 1])]; int32 var_596_groups_0 = const()[name = string("op_596_groups_0"), val = int32(1)]; tensor layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87729344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88024320))))[name = string("layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88024448)))]; tensor var_596_cast_fp16 = conv(bias = layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_596_dilations_0, groups = var_596_groups_0, pad = var_596_pad_0, pad_type = var_596_pad_type_0, strides = var_596_strides_0, weight = layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_13_cast_fp16)[name = string("op_596_cast_fp16")]; string var_602_pad_type_0 = const()[name = string("op_602_pad_type_0"), val = string("valid")]; tensor var_602_strides_0 = const()[name = string("op_602_strides_0"), val = tensor([1, 1])]; tensor var_602_pad_0 = const()[name = string("op_602_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_602_dilations_0 = const()[name = string("op_602_dilations_0"), val = tensor([1, 1])]; int32 var_602_groups_0 = const()[name = string("op_602_groups_0"), val = int32(1)]; tensor layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88032448))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88026048))))[name = string("layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_602_cast_fp16 = conv(dilations = var_602_dilations_0, groups = var_602_groups_0, pad = var_602_pad_0, pad_type = var_602_pad_type_0, strides = var_602_strides_0, weight = layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_13_cast_fp16)[name = string("op_602_cast_fp16")]; tensor obj_39_cast_fp16 = add(x = var_596_cast_fp16, y = var_602_cast_fp16)[name = string("obj_39_cast_fp16")]; tensor inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_39_cast_fp16)[name = string("inputs_11_cast_fp16")]; tensor out_11_axes_0 = const()[name = string("out_11_axes_0"), val = tensor([1])]; fp16 var_613_to_fp16 = const()[name = string("op_613_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_613_to_fp16, x = inputs_11_cast_fp16)[name = string("out_11_cast_fp16")]; tensor input_15_gamma_0_to_fp16 = const()[name = string("input_15_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88106240)))]; tensor input_15_beta_0_to_fp16 = const()[name = string("input_15_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88107840)))]; fp16 input_15_epsilon_0_to_fp16 = const()[name = string("input_15_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_15_cast_fp16 = batch_norm(beta = input_15_beta_0_to_fp16, epsilon = input_15_epsilon_0_to_fp16, gamma = input_15_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_11_cast_fp16)[name = string("input_15_cast_fp16")]; string var_631_pad_type_0 = const()[name = string("op_631_pad_type_0"), val = string("valid")]; tensor var_631_strides_0 = const()[name = string("op_631_strides_0"), val = tensor([1, 1])]; tensor var_631_pad_0 = const()[name = string("op_631_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_631_dilations_0 = const()[name = string("op_631_dilations_0"), val = tensor([1, 1])]; int32 var_631_groups_0 = const()[name = string("op_631_groups_0"), val = int32(1)]; tensor layers_1_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88109440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89289152))))[name = string("layers_1_fc1_inlier_module_weight_to_fp16_palettized")]; tensor layers_1_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89289280)))]; tensor var_631_cast_fp16 = conv(bias = layers_1_fc1_inlier_module_bias_to_fp16, dilations = var_631_dilations_0, groups = var_631_groups_0, pad = var_631_pad_0, pad_type = var_631_pad_type_0, strides = var_631_strides_0, weight = layers_1_fc1_inlier_module_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = string("op_631_cast_fp16")]; string var_637_pad_type_0 = const()[name = string("op_637_pad_type_0"), val = string("valid")]; tensor var_637_strides_0 = const()[name = string("op_637_strides_0"), val = tensor([1, 1])]; tensor var_637_pad_0 = const()[name = string("op_637_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_637_dilations_0 = const()[name = string("op_637_dilations_0"), val = tensor([1, 1])]; int32 var_637_groups_0 = const()[name = string("op_637_groups_0"), val = int32(1)]; tensor layers_1_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89384960))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89295488))))[name = string("layers_1_fc1_outlier_module_weight_to_fp16_sparsified")]; tensor var_637_cast_fp16 = conv(dilations = var_637_dilations_0, groups = var_637_groups_0, pad = var_637_pad_0, pad_type = var_637_pad_type_0, strides = var_637_strides_0, weight = layers_1_fc1_outlier_module_weight_to_fp16_sparsified, x = input_15_cast_fp16)[name = string("op_637_cast_fp16")]; tensor input_17_cast_fp16 = add(x = var_631_cast_fp16, y = var_637_cast_fp16)[name = string("input_17_cast_fp16")]; string input_19_mode_0 = const()[name = string("input_19_mode_0"), val = string("EXACT")]; tensor input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = input_17_cast_fp16)[name = string("input_19_cast_fp16")]; string var_648_pad_type_0 = const()[name = string("op_648_pad_type_0"), val = string("valid")]; tensor var_648_strides_0 = const()[name = string("op_648_strides_0"), val = tensor([1, 1])]; tensor var_648_pad_0 = const()[name = string("op_648_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_648_dilations_0 = const()[name = string("op_648_dilations_0"), val = tensor([1, 1])]; int32 var_648_groups_0 = const()[name = string("op_648_groups_0"), val = int32(1)]; tensor layers_1_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89679936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90859648))))[name = string("layers_1_fc2_inlier_module_weight_to_fp16_palettized")]; tensor layers_1_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90859776)))]; tensor var_648_cast_fp16 = conv(bias = layers_1_fc2_inlier_module_bias_to_fp16, dilations = var_648_dilations_0, groups = var_648_groups_0, pad = var_648_pad_0, pad_type = var_648_pad_type_0, strides = var_648_strides_0, weight = layers_1_fc2_inlier_module_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("op_648_cast_fp16")]; string var_654_pad_type_0 = const()[name = string("op_654_pad_type_0"), val = string("valid")]; tensor var_654_strides_0 = const()[name = string("op_654_strides_0"), val = tensor([1, 1])]; tensor var_654_pad_0 = const()[name = string("op_654_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_654_dilations_0 = const()[name = string("op_654_dilations_0"), val = tensor([1, 1])]; int32 var_654_groups_0 = const()[name = string("op_654_groups_0"), val = int32(1)]; tensor layers_1_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90936128))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90861376))))[name = string("layers_1_fc2_outlier_module_weight_to_fp16_sparsified")]; tensor var_654_cast_fp16 = conv(dilations = var_654_dilations_0, groups = var_654_groups_0, pad = var_654_pad_0, pad_type = var_654_pad_type_0, strides = var_654_strides_0, weight = layers_1_fc2_outlier_module_weight_to_fp16_sparsified, x = input_19_cast_fp16)[name = string("op_654_cast_fp16")]; tensor hidden_states_5_cast_fp16 = add(x = var_648_cast_fp16, y = var_654_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("inputs_13_cast_fp16")]; tensor obj_53_begin_0 = const()[name = string("obj_53_begin_0"), val = tensor([2, 0, 0, 0])]; tensor obj_53_end_0 = const()[name = string("obj_53_end_0"), val = tensor([3, 768, 1, 1536])]; tensor obj_53_end_mask_0 = const()[name = string("obj_53_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_53_cast_fp16 = slice_by_index(begin = obj_53_begin_0, end = obj_53_end_0, end_mask = obj_53_end_mask_0, x = read_state_2)[name = string("obj_53_cast_fp16")]; tensor obj_55_begin_0 = const()[name = string("obj_55_begin_0"), val = tensor([2, 0, 0, 0])]; tensor obj_55_end_0 = const()[name = string("obj_55_end_0"), val = tensor([3, 768, 1, 1536])]; tensor obj_55_end_mask_0 = const()[name = string("obj_55_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_55_cast_fp16 = slice_by_index(begin = obj_55_begin_0, end = obj_55_end_0, end_mask = obj_55_end_mask_0, x = read_state_3)[name = string("obj_55_cast_fp16")]; int32 var_676 = const()[name = string("op_676"), val = int32(3)]; tensor out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor([1])]; fp16 var_701_to_fp16 = const()[name = string("op_701_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_701_to_fp16, x = inputs_13_cast_fp16)[name = string("out_13_cast_fp16")]; tensor obj_43_gamma_0_to_fp16 = const()[name = string("obj_43_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91231104)))]; tensor obj_43_beta_0_to_fp16 = const()[name = string("obj_43_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91232704)))]; fp16 obj_43_epsilon_0_to_fp16 = const()[name = string("obj_43_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_43_cast_fp16 = batch_norm(beta = obj_43_beta_0_to_fp16, epsilon = obj_43_epsilon_0_to_fp16, gamma = obj_43_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_13_cast_fp16)[name = string("obj_43_cast_fp16")]; string var_723_pad_type_0 = const()[name = string("op_723_pad_type_0"), val = string("valid")]; tensor var_723_strides_0 = const()[name = string("op_723_strides_0"), val = tensor([1, 1])]; tensor var_723_pad_0 = const()[name = string("op_723_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_723_dilations_0 = const()[name = string("op_723_dilations_0"), val = tensor([1, 1])]; int32 var_723_groups_0 = const()[name = string("op_723_groups_0"), val = int32(1)]; tensor layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91234304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91529280))))[name = string("layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_2_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91529408)))]; tensor var_723_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_723_dilations_0, groups = var_723_groups_0, pad = var_723_pad_0, pad_type = var_723_pad_type_0, strides = var_723_strides_0, weight = layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = string("op_723_cast_fp16")]; string var_729_pad_type_0 = const()[name = string("op_729_pad_type_0"), val = string("valid")]; tensor var_729_strides_0 = const()[name = string("op_729_strides_0"), val = tensor([1, 1])]; tensor var_729_pad_0 = const()[name = string("op_729_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_729_dilations_0 = const()[name = string("op_729_dilations_0"), val = tensor([1, 1])]; int32 var_729_groups_0 = const()[name = string("op_729_groups_0"), val = int32(1)]; tensor layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91561536))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91531008))))[name = string("layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_729_cast_fp16 = conv(dilations = var_729_dilations_0, groups = var_729_groups_0, pad = var_729_pad_0, pad_type = var_729_pad_type_0, strides = var_729_strides_0, weight = layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = string("op_729_cast_fp16")]; tensor query_9_cast_fp16 = add(x = var_723_cast_fp16, y = var_729_cast_fp16)[name = string("query_9_cast_fp16")]; string var_738_pad_type_0 = const()[name = string("op_738_pad_type_0"), val = string("valid")]; tensor var_738_strides_0 = const()[name = string("op_738_strides_0"), val = tensor([1, 1])]; tensor var_738_pad_0 = const()[name = string("op_738_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_738_dilations_0 = const()[name = string("op_738_dilations_0"), val = tensor([1, 1])]; int32 var_738_groups_0 = const()[name = string("op_738_groups_0"), val = int32(1)]; tensor layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91635328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91930304))))[name = string("layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; tensor var_738_cast_fp16 = conv(dilations = var_738_dilations_0, groups = var_738_groups_0, pad = var_738_pad_0, pad_type = var_738_pad_type_0, strides = var_738_strides_0, weight = layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = string("op_738_cast_fp16")]; string var_744_pad_type_0 = const()[name = string("op_744_pad_type_0"), val = string("valid")]; tensor var_744_strides_0 = const()[name = string("op_744_strides_0"), val = tensor([1, 1])]; tensor var_744_pad_0 = const()[name = string("op_744_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_744_dilations_0 = const()[name = string("op_744_dilations_0"), val = tensor([1, 1])]; int32 var_744_groups_0 = const()[name = string("op_744_groups_0"), val = int32(1)]; tensor layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91961984))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91930432))))[name = string("layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_744_cast_fp16 = conv(dilations = var_744_dilations_0, groups = var_744_groups_0, pad = var_744_pad_0, pad_type = var_744_pad_type_0, strides = var_744_strides_0, weight = layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = string("op_744_cast_fp16")]; tensor current_key_5_cast_fp16 = add(x = var_738_cast_fp16, y = var_744_cast_fp16)[name = string("current_key_5_cast_fp16")]; string var_754_pad_type_0 = const()[name = string("op_754_pad_type_0"), val = string("valid")]; tensor var_754_strides_0 = const()[name = string("op_754_strides_0"), val = tensor([1, 1])]; tensor var_754_pad_0 = const()[name = string("op_754_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_754_dilations_0 = const()[name = string("op_754_dilations_0"), val = tensor([1, 1])]; int32 var_754_groups_0 = const()[name = string("op_754_groups_0"), val = int32(1)]; tensor layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92035776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92330752))))[name = string("layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_2_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92330880)))]; tensor var_754_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_754_dilations_0, groups = var_754_groups_0, pad = var_754_pad_0, pad_type = var_754_pad_type_0, strides = var_754_strides_0, weight = layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = string("op_754_cast_fp16")]; string var_760_pad_type_0 = const()[name = string("op_760_pad_type_0"), val = string("valid")]; tensor var_760_strides_0 = const()[name = string("op_760_strides_0"), val = tensor([1, 1])]; tensor var_760_pad_0 = const()[name = string("op_760_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_760_dilations_0 = const()[name = string("op_760_dilations_0"), val = tensor([1, 1])]; int32 var_760_groups_0 = const()[name = string("op_760_groups_0"), val = int32(1)]; tensor layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92365504))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92332480))))[name = string("layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_760_cast_fp16 = conv(dilations = var_760_dilations_0, groups = var_760_groups_0, pad = var_760_pad_0, pad_type = var_760_pad_type_0, strides = var_760_strides_0, weight = layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = string("op_760_cast_fp16")]; tensor current_value_5_cast_fp16 = add(x = var_754_cast_fp16, y = var_760_cast_fp16)[name = string("current_value_5_cast_fp16")]; tensor var_766_cast_fp16 = mul(x = current_key_5_cast_fp16, y = var_202_cast_fp16)[name = string("op_766_cast_fp16")]; tensor key_5_cast_fp16 = add(x = var_71_cast_fp16_2, y = var_766_cast_fp16)[name = string("key_5_cast_fp16")]; tensor var_768_cast_fp16 = mul(x = current_value_5_cast_fp16, y = var_202_cast_fp16)[name = string("op_768_cast_fp16")]; tensor value_5_cast_fp16 = add(x = var_86_cast_fp16_2, y = var_768_cast_fp16)[name = string("value_5_cast_fp16")]; tensor var_771 = const()[name = string("op_771"), val = tensor([1, 12, 64, -1])]; tensor mh_q_9_cast_fp16 = reshape(shape = var_771, x = query_9_cast_fp16)[name = string("mh_q_9_cast_fp16")]; fp16 var_773_to_fp16 = const()[name = string("op_773_to_fp16"), val = fp16(0x1p-3)]; tensor var_774_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_773_to_fp16)[name = string("op_774_cast_fp16")]; tensor var_775 = const()[name = string("op_775"), val = tensor([1, 12, 64, -1])]; tensor var_776_cast_fp16 = reshape(shape = var_775, x = key_5_cast_fp16)[name = string("op_776_cast_fp16")]; bool mh_w_17_transpose_x_0 = const()[name = string("mh_w_17_transpose_x_0"), val = bool(true)]; bool mh_w_17_transpose_y_0 = const()[name = string("mh_w_17_transpose_y_0"), val = bool(false)]; tensor mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_774_cast_fp16, y = var_776_cast_fp16)[name = string("mh_w_17_cast_fp16")]; tensor mh_w_19_cast_fp16 = add(x = mh_w_17_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_19_cast_fp16")]; tensor var_784_cast_fp16 = softmax(axis = var_676, x = mh_w_19_cast_fp16)[name = string("op_784_cast_fp16")]; tensor var_785 = const()[name = string("op_785"), val = tensor([1, 12, 64, -1])]; tensor var_786_cast_fp16 = reshape(shape = var_785, x = value_5_cast_fp16)[name = string("op_786_cast_fp16")]; bool attn_9_transpose_x_0 = const()[name = string("attn_9_transpose_x_0"), val = bool(false)]; bool attn_9_transpose_y_0 = const()[name = string("attn_9_transpose_y_0"), val = bool(true)]; tensor attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_786_cast_fp16, y = var_784_cast_fp16)[name = string("attn_9_cast_fp16")]; tensor var_789 = const()[name = string("op_789"), val = tensor([1, 768, 1, -1])]; tensor input_21_cast_fp16 = reshape(shape = var_789, x = attn_9_cast_fp16)[name = string("input_21_cast_fp16")]; string var_799_pad_type_0 = const()[name = string("op_799_pad_type_0"), val = string("valid")]; tensor var_799_strides_0 = const()[name = string("op_799_strides_0"), val = tensor([1, 1])]; tensor var_799_pad_0 = const()[name = string("op_799_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_799_dilations_0 = const()[name = string("op_799_dilations_0"), val = tensor([1, 1])]; int32 var_799_groups_0 = const()[name = string("op_799_groups_0"), val = int32(1)]; tensor layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92439296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92734272))))[name = string("layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_2_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92734400)))]; tensor var_799_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_799_dilations_0, groups = var_799_groups_0, pad = var_799_pad_0, pad_type = var_799_pad_type_0, strides = var_799_strides_0, weight = layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_21_cast_fp16)[name = string("op_799_cast_fp16")]; string var_805_pad_type_0 = const()[name = string("op_805_pad_type_0"), val = string("valid")]; tensor var_805_strides_0 = const()[name = string("op_805_strides_0"), val = tensor([1, 1])]; tensor var_805_pad_0 = const()[name = string("op_805_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_805_dilations_0 = const()[name = string("op_805_dilations_0"), val = tensor([1, 1])]; int32 var_805_groups_0 = const()[name = string("op_805_groups_0"), val = int32(1)]; tensor layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92761920))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92736000))))[name = string("layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_805_cast_fp16 = conv(dilations = var_805_dilations_0, groups = var_805_groups_0, pad = var_805_pad_0, pad_type = var_805_pad_type_0, strides = var_805_strides_0, weight = layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_21_cast_fp16)[name = string("op_805_cast_fp16")]; tensor obj_49_cast_fp16 = add(x = var_799_cast_fp16, y = var_805_cast_fp16)[name = string("obj_49_cast_fp16")]; tensor inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_49_cast_fp16)[name = string("inputs_15_cast_fp16")]; tensor out_15_axes_0 = const()[name = string("out_15_axes_0"), val = tensor([1])]; fp16 var_820_to_fp16 = const()[name = string("op_820_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_820_to_fp16, x = inputs_15_cast_fp16)[name = string("out_15_cast_fp16")]; tensor obj_51_gamma_0_to_fp16 = const()[name = string("obj_51_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92835712)))]; tensor obj_51_beta_0_to_fp16 = const()[name = string("obj_51_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92837312)))]; fp16 obj_51_epsilon_0_to_fp16 = const()[name = string("obj_51_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_51_cast_fp16 = batch_norm(beta = obj_51_beta_0_to_fp16, epsilon = obj_51_epsilon_0_to_fp16, gamma = obj_51_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_15_cast_fp16)[name = string("obj_51_cast_fp16")]; string var_840_pad_type_0 = const()[name = string("op_840_pad_type_0"), val = string("valid")]; tensor var_840_strides_0 = const()[name = string("op_840_strides_0"), val = tensor([1, 1])]; tensor var_840_pad_0 = const()[name = string("op_840_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_840_dilations_0 = const()[name = string("op_840_dilations_0"), val = tensor([1, 1])]; int32 var_840_groups_0 = const()[name = string("op_840_groups_0"), val = int32(1)]; tensor layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92838912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93133888))))[name = string("layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93134016)))]; tensor var_840_cast_fp16 = conv(bias = layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_840_dilations_0, groups = var_840_groups_0, pad = var_840_pad_0, pad_type = var_840_pad_type_0, strides = var_840_strides_0, weight = layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_51_cast_fp16)[name = string("op_840_cast_fp16")]; string var_846_pad_type_0 = const()[name = string("op_846_pad_type_0"), val = string("valid")]; tensor var_846_strides_0 = const()[name = string("op_846_strides_0"), val = tensor([1, 1])]; tensor var_846_pad_0 = const()[name = string("op_846_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_846_dilations_0 = const()[name = string("op_846_dilations_0"), val = tensor([1, 1])]; int32 var_846_groups_0 = const()[name = string("op_846_groups_0"), val = int32(1)]; tensor layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93146368))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93135616))))[name = string("layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_846_cast_fp16 = conv(dilations = var_846_dilations_0, groups = var_846_groups_0, pad = var_846_pad_0, pad_type = var_846_pad_type_0, strides = var_846_strides_0, weight = layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_51_cast_fp16)[name = string("op_846_cast_fp16")]; tensor query_11_cast_fp16 = add(x = var_840_cast_fp16, y = var_846_cast_fp16)[name = string("query_11_cast_fp16")]; tensor var_849 = const()[name = string("op_849"), val = tensor([1, 12, 64, -1])]; tensor mh_q_11_cast_fp16 = reshape(shape = var_849, x = query_11_cast_fp16)[name = string("mh_q_11_cast_fp16")]; fp16 var_851_to_fp16 = const()[name = string("op_851_to_fp16"), val = fp16(0x1p-3)]; tensor var_852_cast_fp16 = mul(x = mh_q_11_cast_fp16, y = var_851_to_fp16)[name = string("op_852_cast_fp16")]; tensor var_853 = const()[name = string("op_853"), val = tensor([1, 12, 64, -1])]; tensor var_854_cast_fp16 = reshape(shape = var_853, x = obj_53_cast_fp16)[name = string("op_854_cast_fp16")]; bool mh_w_21_transpose_x_0 = const()[name = string("mh_w_21_transpose_x_0"), val = bool(true)]; bool mh_w_21_transpose_y_0 = const()[name = string("mh_w_21_transpose_y_0"), val = bool(false)]; tensor mh_w_21_cast_fp16 = matmul(transpose_x = mh_w_21_transpose_x_0, transpose_y = mh_w_21_transpose_y_0, x = var_852_cast_fp16, y = var_854_cast_fp16)[name = string("mh_w_21_cast_fp16")]; tensor mh_w_23_cast_fp16 = add(x = mh_w_21_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_23_cast_fp16")]; tensor obj_59_cast_fp16 = softmax(axis = var_676, x = mh_w_23_cast_fp16)[name = string("obj_59_cast_fp16")]; tensor var_863 = const()[name = string("op_863"), val = tensor([1, 12, 64, -1])]; tensor var_864_cast_fp16 = reshape(shape = var_863, x = obj_55_cast_fp16)[name = string("op_864_cast_fp16")]; bool attn_11_transpose_x_0 = const()[name = string("attn_11_transpose_x_0"), val = bool(false)]; bool attn_11_transpose_y_0 = const()[name = string("attn_11_transpose_y_0"), val = bool(true)]; tensor attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_864_cast_fp16, y = obj_59_cast_fp16)[name = string("attn_11_cast_fp16")]; tensor var_867 = const()[name = string("op_867"), val = tensor([1, 768, 1, -1])]; tensor input_23_cast_fp16 = reshape(shape = var_867, x = attn_11_cast_fp16)[name = string("input_23_cast_fp16")]; string var_877_pad_type_0 = const()[name = string("op_877_pad_type_0"), val = string("valid")]; tensor var_877_strides_0 = const()[name = string("op_877_strides_0"), val = tensor([1, 1])]; tensor var_877_pad_0 = const()[name = string("op_877_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_877_dilations_0 = const()[name = string("op_877_dilations_0"), val = tensor([1, 1])]; int32 var_877_groups_0 = const()[name = string("op_877_groups_0"), val = int32(1)]; tensor layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93220160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93515136))))[name = string("layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93515264)))]; tensor var_877_cast_fp16 = conv(bias = layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_877_dilations_0, groups = var_877_groups_0, pad = var_877_pad_0, pad_type = var_877_pad_type_0, strides = var_877_strides_0, weight = layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = string("op_877_cast_fp16")]; string var_883_pad_type_0 = const()[name = string("op_883_pad_type_0"), val = string("valid")]; tensor var_883_strides_0 = const()[name = string("op_883_strides_0"), val = tensor([1, 1])]; tensor var_883_pad_0 = const()[name = string("op_883_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_883_dilations_0 = const()[name = string("op_883_dilations_0"), val = tensor([1, 1])]; int32 var_883_groups_0 = const()[name = string("op_883_groups_0"), val = int32(1)]; tensor layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93526336))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93516864))))[name = string("layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_883_cast_fp16 = conv(dilations = var_883_dilations_0, groups = var_883_groups_0, pad = var_883_pad_0, pad_type = var_883_pad_type_0, strides = var_883_strides_0, weight = layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_23_cast_fp16)[name = string("op_883_cast_fp16")]; tensor obj_57_cast_fp16 = add(x = var_877_cast_fp16, y = var_883_cast_fp16)[name = string("obj_57_cast_fp16")]; tensor inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = obj_57_cast_fp16)[name = string("inputs_17_cast_fp16")]; tensor out_17_axes_0 = const()[name = string("out_17_axes_0"), val = tensor([1])]; fp16 var_894_to_fp16 = const()[name = string("op_894_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_894_to_fp16, x = inputs_17_cast_fp16)[name = string("out_17_cast_fp16")]; tensor input_25_gamma_0_to_fp16 = const()[name = string("input_25_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93600128)))]; tensor input_25_beta_0_to_fp16 = const()[name = string("input_25_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93601728)))]; fp16 input_25_epsilon_0_to_fp16 = const()[name = string("input_25_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_25_cast_fp16 = batch_norm(beta = input_25_beta_0_to_fp16, epsilon = input_25_epsilon_0_to_fp16, gamma = input_25_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_17_cast_fp16)[name = string("input_25_cast_fp16")]; string var_912_pad_type_0 = const()[name = string("op_912_pad_type_0"), val = string("valid")]; tensor var_912_strides_0 = const()[name = string("op_912_strides_0"), val = tensor([1, 1])]; tensor var_912_pad_0 = const()[name = string("op_912_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_912_dilations_0 = const()[name = string("op_912_dilations_0"), val = tensor([1, 1])]; int32 var_912_groups_0 = const()[name = string("op_912_groups_0"), val = int32(1)]; tensor layers_2_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93603328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94783040))))[name = string("layers_2_fc1_inlier_module_weight_to_fp16_palettized")]; tensor layers_2_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94783168)))]; tensor var_912_cast_fp16 = conv(bias = layers_2_fc1_inlier_module_bias_to_fp16, dilations = var_912_dilations_0, groups = var_912_groups_0, pad = var_912_pad_0, pad_type = var_912_pad_type_0, strides = var_912_strides_0, weight = layers_2_fc1_inlier_module_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = string("op_912_cast_fp16")]; string var_918_pad_type_0 = const()[name = string("op_918_pad_type_0"), val = string("valid")]; tensor var_918_strides_0 = const()[name = string("op_918_strides_0"), val = tensor([1, 1])]; tensor var_918_pad_0 = const()[name = string("op_918_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_918_dilations_0 = const()[name = string("op_918_dilations_0"), val = tensor([1, 1])]; int32 var_918_groups_0 = const()[name = string("op_918_groups_0"), val = int32(1)]; tensor layers_2_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94895552))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94789376))))[name = string("layers_2_fc1_outlier_module_weight_to_fp16_sparsified")]; tensor var_918_cast_fp16 = conv(dilations = var_918_dilations_0, groups = var_918_groups_0, pad = var_918_pad_0, pad_type = var_918_pad_type_0, strides = var_918_strides_0, weight = layers_2_fc1_outlier_module_weight_to_fp16_sparsified, x = input_25_cast_fp16)[name = string("op_918_cast_fp16")]; tensor input_27_cast_fp16 = add(x = var_912_cast_fp16, y = var_918_cast_fp16)[name = string("input_27_cast_fp16")]; string input_29_mode_0 = const()[name = string("input_29_mode_0"), val = string("EXACT")]; tensor input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = input_27_cast_fp16)[name = string("input_29_cast_fp16")]; string var_929_pad_type_0 = const()[name = string("op_929_pad_type_0"), val = string("valid")]; tensor var_929_strides_0 = const()[name = string("op_929_strides_0"), val = tensor([1, 1])]; tensor var_929_pad_0 = const()[name = string("op_929_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_929_dilations_0 = const()[name = string("op_929_dilations_0"), val = tensor([1, 1])]; int32 var_929_groups_0 = const()[name = string("op_929_groups_0"), val = int32(1)]; tensor layers_2_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95190528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96370240))))[name = string("layers_2_fc2_inlier_module_weight_to_fp16_palettized")]; tensor layers_2_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96370368)))]; tensor var_929_cast_fp16 = conv(bias = layers_2_fc2_inlier_module_bias_to_fp16, dilations = var_929_dilations_0, groups = var_929_groups_0, pad = var_929_pad_0, pad_type = var_929_pad_type_0, strides = var_929_strides_0, weight = layers_2_fc2_inlier_module_weight_to_fp16_palettized, x = input_29_cast_fp16)[name = string("op_929_cast_fp16")]; string var_935_pad_type_0 = const()[name = string("op_935_pad_type_0"), val = string("valid")]; tensor var_935_strides_0 = const()[name = string("op_935_strides_0"), val = tensor([1, 1])]; tensor var_935_pad_0 = const()[name = string("op_935_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_935_dilations_0 = const()[name = string("op_935_dilations_0"), val = tensor([1, 1])]; int32 var_935_groups_0 = const()[name = string("op_935_groups_0"), val = int32(1)]; tensor layers_2_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96483392))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96371968))))[name = string("layers_2_fc2_outlier_module_weight_to_fp16_sparsified")]; tensor var_935_cast_fp16 = conv(dilations = var_935_dilations_0, groups = var_935_groups_0, pad = var_935_pad_0, pad_type = var_935_pad_type_0, strides = var_935_strides_0, weight = layers_2_fc2_outlier_module_weight_to_fp16_sparsified, x = input_29_cast_fp16)[name = string("op_935_cast_fp16")]; tensor hidden_states_7_cast_fp16 = add(x = var_929_cast_fp16, y = var_935_cast_fp16)[name = string("hidden_states_7_cast_fp16")]; tensor inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = hidden_states_7_cast_fp16)[name = string("inputs_19_cast_fp16")]; tensor obj_71_begin_0 = const()[name = string("obj_71_begin_0"), val = tensor([3, 0, 0, 0])]; tensor obj_71_end_0 = const()[name = string("obj_71_end_0"), val = tensor([4, 768, 1, 1536])]; tensor obj_71_end_mask_0 = const()[name = string("obj_71_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_71_cast_fp16 = slice_by_index(begin = obj_71_begin_0, end = obj_71_end_0, end_mask = obj_71_end_mask_0, x = read_state_2)[name = string("obj_71_cast_fp16")]; tensor obj_73_begin_0 = const()[name = string("obj_73_begin_0"), val = tensor([3, 0, 0, 0])]; tensor obj_73_end_0 = const()[name = string("obj_73_end_0"), val = tensor([4, 768, 1, 1536])]; tensor obj_73_end_mask_0 = const()[name = string("obj_73_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_73_cast_fp16 = slice_by_index(begin = obj_73_begin_0, end = obj_73_end_0, end_mask = obj_73_end_mask_0, x = read_state_3)[name = string("obj_73_cast_fp16")]; int32 var_957 = const()[name = string("op_957"), val = int32(3)]; tensor out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor([1])]; fp16 var_982_to_fp16 = const()[name = string("op_982_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_982_to_fp16, x = inputs_19_cast_fp16)[name = string("out_19_cast_fp16")]; tensor obj_61_gamma_0_to_fp16 = const()[name = string("obj_61_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96778368)))]; tensor obj_61_beta_0_to_fp16 = const()[name = string("obj_61_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96779968)))]; fp16 obj_61_epsilon_0_to_fp16 = const()[name = string("obj_61_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_61_cast_fp16 = batch_norm(beta = obj_61_beta_0_to_fp16, epsilon = obj_61_epsilon_0_to_fp16, gamma = obj_61_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_19_cast_fp16)[name = string("obj_61_cast_fp16")]; string var_1004_pad_type_0 = const()[name = string("op_1004_pad_type_0"), val = string("valid")]; tensor var_1004_strides_0 = const()[name = string("op_1004_strides_0"), val = tensor([1, 1])]; tensor var_1004_pad_0 = const()[name = string("op_1004_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1004_dilations_0 = const()[name = string("op_1004_dilations_0"), val = tensor([1, 1])]; int32 var_1004_groups_0 = const()[name = string("op_1004_groups_0"), val = int32(1)]; tensor layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96781568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97076544))))[name = string("layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_3_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97076672)))]; tensor var_1004_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1004_dilations_0, groups = var_1004_groups_0, pad = var_1004_pad_0, pad_type = var_1004_pad_type_0, strides = var_1004_strides_0, weight = layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_1004_cast_fp16")]; string var_1010_pad_type_0 = const()[name = string("op_1010_pad_type_0"), val = string("valid")]; tensor var_1010_strides_0 = const()[name = string("op_1010_strides_0"), val = tensor([1, 1])]; tensor var_1010_pad_0 = const()[name = string("op_1010_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1010_dilations_0 = const()[name = string("op_1010_dilations_0"), val = tensor([1, 1])]; int32 var_1010_groups_0 = const()[name = string("op_1010_groups_0"), val = int32(1)]; tensor layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97097024))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97078272))))[name = string("layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1010_cast_fp16 = conv(dilations = var_1010_dilations_0, groups = var_1010_groups_0, pad = var_1010_pad_0, pad_type = var_1010_pad_type_0, strides = var_1010_strides_0, weight = layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_1010_cast_fp16")]; tensor query_13_cast_fp16 = add(x = var_1004_cast_fp16, y = var_1010_cast_fp16)[name = string("query_13_cast_fp16")]; string var_1019_pad_type_0 = const()[name = string("op_1019_pad_type_0"), val = string("valid")]; tensor var_1019_strides_0 = const()[name = string("op_1019_strides_0"), val = tensor([1, 1])]; tensor var_1019_pad_0 = const()[name = string("op_1019_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1019_dilations_0 = const()[name = string("op_1019_dilations_0"), val = tensor([1, 1])]; int32 var_1019_groups_0 = const()[name = string("op_1019_groups_0"), val = int32(1)]; tensor layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97170816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97465792))))[name = string("layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; tensor var_1019_cast_fp16 = conv(dilations = var_1019_dilations_0, groups = var_1019_groups_0, pad = var_1019_pad_0, pad_type = var_1019_pad_type_0, strides = var_1019_strides_0, weight = layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_1019_cast_fp16")]; string var_1025_pad_type_0 = const()[name = string("op_1025_pad_type_0"), val = string("valid")]; tensor var_1025_strides_0 = const()[name = string("op_1025_strides_0"), val = tensor([1, 1])]; tensor var_1025_pad_0 = const()[name = string("op_1025_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1025_dilations_0 = const()[name = string("op_1025_dilations_0"), val = tensor([1, 1])]; int32 var_1025_groups_0 = const()[name = string("op_1025_groups_0"), val = int32(1)]; tensor layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97482880))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97465920))))[name = string("layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1025_cast_fp16 = conv(dilations = var_1025_dilations_0, groups = var_1025_groups_0, pad = var_1025_pad_0, pad_type = var_1025_pad_type_0, strides = var_1025_strides_0, weight = layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_1025_cast_fp16")]; tensor current_key_7_cast_fp16 = add(x = var_1019_cast_fp16, y = var_1025_cast_fp16)[name = string("current_key_7_cast_fp16")]; string var_1035_pad_type_0 = const()[name = string("op_1035_pad_type_0"), val = string("valid")]; tensor var_1035_strides_0 = const()[name = string("op_1035_strides_0"), val = tensor([1, 1])]; tensor var_1035_pad_0 = const()[name = string("op_1035_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1035_dilations_0 = const()[name = string("op_1035_dilations_0"), val = tensor([1, 1])]; int32 var_1035_groups_0 = const()[name = string("op_1035_groups_0"), val = int32(1)]; tensor layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97556672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97851648))))[name = string("layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_3_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97851776)))]; tensor var_1035_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1035_dilations_0, groups = var_1035_groups_0, pad = var_1035_pad_0, pad_type = var_1035_pad_type_0, strides = var_1035_strides_0, weight = layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_1035_cast_fp16")]; string var_1041_pad_type_0 = const()[name = string("op_1041_pad_type_0"), val = string("valid")]; tensor var_1041_strides_0 = const()[name = string("op_1041_strides_0"), val = tensor([1, 1])]; tensor var_1041_pad_0 = const()[name = string("op_1041_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1041_dilations_0 = const()[name = string("op_1041_dilations_0"), val = tensor([1, 1])]; int32 var_1041_groups_0 = const()[name = string("op_1041_groups_0"), val = int32(1)]; tensor layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97869504))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97853376))))[name = string("layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1041_cast_fp16 = conv(dilations = var_1041_dilations_0, groups = var_1041_groups_0, pad = var_1041_pad_0, pad_type = var_1041_pad_type_0, strides = var_1041_strides_0, weight = layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_1041_cast_fp16")]; tensor current_value_7_cast_fp16 = add(x = var_1035_cast_fp16, y = var_1041_cast_fp16)[name = string("current_value_7_cast_fp16")]; tensor var_1047_cast_fp16 = mul(x = current_key_7_cast_fp16, y = var_202_cast_fp16)[name = string("op_1047_cast_fp16")]; tensor key_7_cast_fp16 = add(x = var_71_cast_fp16_3, y = var_1047_cast_fp16)[name = string("key_7_cast_fp16")]; tensor var_1049_cast_fp16 = mul(x = current_value_7_cast_fp16, y = var_202_cast_fp16)[name = string("op_1049_cast_fp16")]; tensor value_7_cast_fp16 = add(x = var_86_cast_fp16_3, y = var_1049_cast_fp16)[name = string("value_7_cast_fp16")]; tensor var_1052 = const()[name = string("op_1052"), val = tensor([1, 12, 64, -1])]; tensor mh_q_13_cast_fp16 = reshape(shape = var_1052, x = query_13_cast_fp16)[name = string("mh_q_13_cast_fp16")]; fp16 var_1054_to_fp16 = const()[name = string("op_1054_to_fp16"), val = fp16(0x1p-3)]; tensor var_1055_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = var_1054_to_fp16)[name = string("op_1055_cast_fp16")]; tensor var_1056 = const()[name = string("op_1056"), val = tensor([1, 12, 64, -1])]; tensor var_1057_cast_fp16 = reshape(shape = var_1056, x = key_7_cast_fp16)[name = string("op_1057_cast_fp16")]; bool mh_w_25_transpose_x_0 = const()[name = string("mh_w_25_transpose_x_0"), val = bool(true)]; bool mh_w_25_transpose_y_0 = const()[name = string("mh_w_25_transpose_y_0"), val = bool(false)]; tensor mh_w_25_cast_fp16 = matmul(transpose_x = mh_w_25_transpose_x_0, transpose_y = mh_w_25_transpose_y_0, x = var_1055_cast_fp16, y = var_1057_cast_fp16)[name = string("mh_w_25_cast_fp16")]; tensor mh_w_27_cast_fp16 = add(x = mh_w_25_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_27_cast_fp16")]; tensor var_1065_cast_fp16 = softmax(axis = var_957, x = mh_w_27_cast_fp16)[name = string("op_1065_cast_fp16")]; tensor var_1066 = const()[name = string("op_1066"), val = tensor([1, 12, 64, -1])]; tensor var_1067_cast_fp16 = reshape(shape = var_1066, x = value_7_cast_fp16)[name = string("op_1067_cast_fp16")]; bool attn_13_transpose_x_0 = const()[name = string("attn_13_transpose_x_0"), val = bool(false)]; bool attn_13_transpose_y_0 = const()[name = string("attn_13_transpose_y_0"), val = bool(true)]; tensor attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_1067_cast_fp16, y = var_1065_cast_fp16)[name = string("attn_13_cast_fp16")]; tensor var_1070 = const()[name = string("op_1070"), val = tensor([1, 768, 1, -1])]; tensor input_31_cast_fp16 = reshape(shape = var_1070, x = attn_13_cast_fp16)[name = string("input_31_cast_fp16")]; string var_1080_pad_type_0 = const()[name = string("op_1080_pad_type_0"), val = string("valid")]; tensor var_1080_strides_0 = const()[name = string("op_1080_strides_0"), val = tensor([1, 1])]; tensor var_1080_pad_0 = const()[name = string("op_1080_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1080_dilations_0 = const()[name = string("op_1080_dilations_0"), val = tensor([1, 1])]; int32 var_1080_groups_0 = const()[name = string("op_1080_groups_0"), val = int32(1)]; tensor layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97943296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98238272))))[name = string("layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_3_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98238400)))]; tensor var_1080_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1080_dilations_0, groups = var_1080_groups_0, pad = var_1080_pad_0, pad_type = var_1080_pad_type_0, strides = var_1080_strides_0, weight = layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = string("op_1080_cast_fp16")]; string var_1086_pad_type_0 = const()[name = string("op_1086_pad_type_0"), val = string("valid")]; tensor var_1086_strides_0 = const()[name = string("op_1086_strides_0"), val = tensor([1, 1])]; tensor var_1086_pad_0 = const()[name = string("op_1086_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1086_dilations_0 = const()[name = string("op_1086_dilations_0"), val = tensor([1, 1])]; int32 var_1086_groups_0 = const()[name = string("op_1086_groups_0"), val = int32(1)]; tensor layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98260224))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98240000))))[name = string("layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1086_cast_fp16 = conv(dilations = var_1086_dilations_0, groups = var_1086_groups_0, pad = var_1086_pad_0, pad_type = var_1086_pad_type_0, strides = var_1086_strides_0, weight = layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_31_cast_fp16)[name = string("op_1086_cast_fp16")]; tensor obj_67_cast_fp16 = add(x = var_1080_cast_fp16, y = var_1086_cast_fp16)[name = string("obj_67_cast_fp16")]; tensor inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = obj_67_cast_fp16)[name = string("inputs_21_cast_fp16")]; tensor out_21_axes_0 = const()[name = string("out_21_axes_0"), val = tensor([1])]; fp16 var_1101_to_fp16 = const()[name = string("op_1101_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_1101_to_fp16, x = inputs_21_cast_fp16)[name = string("out_21_cast_fp16")]; tensor obj_69_gamma_0_to_fp16 = const()[name = string("obj_69_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98334016)))]; tensor obj_69_beta_0_to_fp16 = const()[name = string("obj_69_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98335616)))]; fp16 obj_69_epsilon_0_to_fp16 = const()[name = string("obj_69_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_69_cast_fp16 = batch_norm(beta = obj_69_beta_0_to_fp16, epsilon = obj_69_epsilon_0_to_fp16, gamma = obj_69_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_21_cast_fp16)[name = string("obj_69_cast_fp16")]; string var_1121_pad_type_0 = const()[name = string("op_1121_pad_type_0"), val = string("valid")]; tensor var_1121_strides_0 = const()[name = string("op_1121_strides_0"), val = tensor([1, 1])]; tensor var_1121_pad_0 = const()[name = string("op_1121_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1121_dilations_0 = const()[name = string("op_1121_dilations_0"), val = tensor([1, 1])]; int32 var_1121_groups_0 = const()[name = string("op_1121_groups_0"), val = int32(1)]; tensor layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98337216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98632192))))[name = string("layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98632320)))]; tensor var_1121_cast_fp16 = conv(bias = layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1121_dilations_0, groups = var_1121_groups_0, pad = var_1121_pad_0, pad_type = var_1121_pad_type_0, strides = var_1121_strides_0, weight = layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_69_cast_fp16)[name = string("op_1121_cast_fp16")]; string var_1127_pad_type_0 = const()[name = string("op_1127_pad_type_0"), val = string("valid")]; tensor var_1127_strides_0 = const()[name = string("op_1127_strides_0"), val = tensor([1, 1])]; tensor var_1127_pad_0 = const()[name = string("op_1127_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1127_dilations_0 = const()[name = string("op_1127_dilations_0"), val = tensor([1, 1])]; int32 var_1127_groups_0 = const()[name = string("op_1127_groups_0"), val = int32(1)]; tensor layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98650816))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98633920))))[name = string("layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1127_cast_fp16 = conv(dilations = var_1127_dilations_0, groups = var_1127_groups_0, pad = var_1127_pad_0, pad_type = var_1127_pad_type_0, strides = var_1127_strides_0, weight = layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_69_cast_fp16)[name = string("op_1127_cast_fp16")]; tensor query_15_cast_fp16 = add(x = var_1121_cast_fp16, y = var_1127_cast_fp16)[name = string("query_15_cast_fp16")]; tensor var_1130 = const()[name = string("op_1130"), val = tensor([1, 12, 64, -1])]; tensor mh_q_15_cast_fp16 = reshape(shape = var_1130, x = query_15_cast_fp16)[name = string("mh_q_15_cast_fp16")]; fp16 var_1132_to_fp16 = const()[name = string("op_1132_to_fp16"), val = fp16(0x1p-3)]; tensor var_1133_cast_fp16 = mul(x = mh_q_15_cast_fp16, y = var_1132_to_fp16)[name = string("op_1133_cast_fp16")]; tensor var_1134 = const()[name = string("op_1134"), val = tensor([1, 12, 64, -1])]; tensor var_1135_cast_fp16 = reshape(shape = var_1134, x = obj_71_cast_fp16)[name = string("op_1135_cast_fp16")]; bool mh_w_29_transpose_x_0 = const()[name = string("mh_w_29_transpose_x_0"), val = bool(true)]; bool mh_w_29_transpose_y_0 = const()[name = string("mh_w_29_transpose_y_0"), val = bool(false)]; tensor mh_w_29_cast_fp16 = matmul(transpose_x = mh_w_29_transpose_x_0, transpose_y = mh_w_29_transpose_y_0, x = var_1133_cast_fp16, y = var_1135_cast_fp16)[name = string("mh_w_29_cast_fp16")]; tensor mh_w_31_cast_fp16 = add(x = mh_w_29_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_31_cast_fp16")]; tensor obj_77_cast_fp16 = softmax(axis = var_957, x = mh_w_31_cast_fp16)[name = string("obj_77_cast_fp16")]; tensor var_1144 = const()[name = string("op_1144"), val = tensor([1, 12, 64, -1])]; tensor var_1145_cast_fp16 = reshape(shape = var_1144, x = obj_73_cast_fp16)[name = string("op_1145_cast_fp16")]; bool attn_15_transpose_x_0 = const()[name = string("attn_15_transpose_x_0"), val = bool(false)]; bool attn_15_transpose_y_0 = const()[name = string("attn_15_transpose_y_0"), val = bool(true)]; tensor attn_15_cast_fp16 = matmul(transpose_x = attn_15_transpose_x_0, transpose_y = attn_15_transpose_y_0, x = var_1145_cast_fp16, y = obj_77_cast_fp16)[name = string("attn_15_cast_fp16")]; tensor var_1148 = const()[name = string("op_1148"), val = tensor([1, 768, 1, -1])]; tensor input_33_cast_fp16 = reshape(shape = var_1148, x = attn_15_cast_fp16)[name = string("input_33_cast_fp16")]; string var_1158_pad_type_0 = const()[name = string("op_1158_pad_type_0"), val = string("valid")]; tensor var_1158_strides_0 = const()[name = string("op_1158_strides_0"), val = tensor([1, 1])]; tensor var_1158_pad_0 = const()[name = string("op_1158_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1158_dilations_0 = const()[name = string("op_1158_dilations_0"), val = tensor([1, 1])]; int32 var_1158_groups_0 = const()[name = string("op_1158_groups_0"), val = int32(1)]; tensor layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98724608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99019584))))[name = string("layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99019712)))]; tensor var_1158_cast_fp16 = conv(bias = layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1158_dilations_0, groups = var_1158_groups_0, pad = var_1158_pad_0, pad_type = var_1158_pad_type_0, strides = var_1158_strides_0, weight = layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = string("op_1158_cast_fp16")]; string var_1164_pad_type_0 = const()[name = string("op_1164_pad_type_0"), val = string("valid")]; tensor var_1164_strides_0 = const()[name = string("op_1164_strides_0"), val = tensor([1, 1])]; tensor var_1164_pad_0 = const()[name = string("op_1164_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1164_dilations_0 = const()[name = string("op_1164_dilations_0"), val = tensor([1, 1])]; int32 var_1164_groups_0 = const()[name = string("op_1164_groups_0"), val = int32(1)]; tensor layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99034624))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99021312))))[name = string("layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1164_cast_fp16 = conv(dilations = var_1164_dilations_0, groups = var_1164_groups_0, pad = var_1164_pad_0, pad_type = var_1164_pad_type_0, strides = var_1164_strides_0, weight = layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_33_cast_fp16)[name = string("op_1164_cast_fp16")]; tensor obj_75_cast_fp16 = add(x = var_1158_cast_fp16, y = var_1164_cast_fp16)[name = string("obj_75_cast_fp16")]; tensor inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_75_cast_fp16)[name = string("inputs_23_cast_fp16")]; tensor out_23_axes_0 = const()[name = string("out_23_axes_0"), val = tensor([1])]; fp16 var_1175_to_fp16 = const()[name = string("op_1175_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_1175_to_fp16, x = inputs_23_cast_fp16)[name = string("out_23_cast_fp16")]; tensor input_35_gamma_0_to_fp16 = const()[name = string("input_35_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99108416)))]; tensor input_35_beta_0_to_fp16 = const()[name = string("input_35_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99110016)))]; fp16 input_35_epsilon_0_to_fp16 = const()[name = string("input_35_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_23_cast_fp16)[name = string("input_35_cast_fp16")]; string var_1193_pad_type_0 = const()[name = string("op_1193_pad_type_0"), val = string("valid")]; tensor var_1193_strides_0 = const()[name = string("op_1193_strides_0"), val = tensor([1, 1])]; tensor var_1193_pad_0 = const()[name = string("op_1193_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1193_dilations_0 = const()[name = string("op_1193_dilations_0"), val = tensor([1, 1])]; int32 var_1193_groups_0 = const()[name = string("op_1193_groups_0"), val = int32(1)]; tensor layers_3_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99111616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100291328))))[name = string("layers_3_fc1_inlier_module_weight_to_fp16_palettized")]; tensor layers_3_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100291456)))]; tensor var_1193_cast_fp16 = conv(bias = layers_3_fc1_inlier_module_bias_to_fp16, dilations = var_1193_dilations_0, groups = var_1193_groups_0, pad = var_1193_pad_0, pad_type = var_1193_pad_type_0, strides = var_1193_strides_0, weight = layers_3_fc1_inlier_module_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("op_1193_cast_fp16")]; string var_1199_pad_type_0 = const()[name = string("op_1199_pad_type_0"), val = string("valid")]; tensor var_1199_strides_0 = const()[name = string("op_1199_strides_0"), val = tensor([1, 1])]; tensor var_1199_pad_0 = const()[name = string("op_1199_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1199_dilations_0 = const()[name = string("op_1199_dilations_0"), val = tensor([1, 1])]; int32 var_1199_groups_0 = const()[name = string("op_1199_groups_0"), val = int32(1)]; tensor layers_3_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100362496))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100297664))))[name = string("layers_3_fc1_outlier_module_weight_to_fp16_sparsified")]; tensor var_1199_cast_fp16 = conv(dilations = var_1199_dilations_0, groups = var_1199_groups_0, pad = var_1199_pad_0, pad_type = var_1199_pad_type_0, strides = var_1199_strides_0, weight = layers_3_fc1_outlier_module_weight_to_fp16_sparsified, x = input_35_cast_fp16)[name = string("op_1199_cast_fp16")]; tensor input_37_cast_fp16 = add(x = var_1193_cast_fp16, y = var_1199_cast_fp16)[name = string("input_37_cast_fp16")]; string input_39_mode_0 = const()[name = string("input_39_mode_0"), val = string("EXACT")]; tensor input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = string("input_39_cast_fp16")]; string var_1210_pad_type_0 = const()[name = string("op_1210_pad_type_0"), val = string("valid")]; tensor var_1210_strides_0 = const()[name = string("op_1210_strides_0"), val = tensor([1, 1])]; tensor var_1210_pad_0 = const()[name = string("op_1210_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1210_dilations_0 = const()[name = string("op_1210_dilations_0"), val = tensor([1, 1])]; int32 var_1210_groups_0 = const()[name = string("op_1210_groups_0"), val = int32(1)]; tensor layers_3_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100657472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101837184))))[name = string("layers_3_fc2_inlier_module_weight_to_fp16_palettized")]; tensor layers_3_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101837312)))]; tensor var_1210_cast_fp16 = conv(bias = layers_3_fc2_inlier_module_bias_to_fp16, dilations = var_1210_dilations_0, groups = var_1210_groups_0, pad = var_1210_pad_0, pad_type = var_1210_pad_type_0, strides = var_1210_strides_0, weight = layers_3_fc2_inlier_module_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = string("op_1210_cast_fp16")]; string var_1216_pad_type_0 = const()[name = string("op_1216_pad_type_0"), val = string("valid")]; tensor var_1216_strides_0 = const()[name = string("op_1216_strides_0"), val = tensor([1, 1])]; tensor var_1216_pad_0 = const()[name = string("op_1216_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1216_dilations_0 = const()[name = string("op_1216_dilations_0"), val = tensor([1, 1])]; int32 var_1216_groups_0 = const()[name = string("op_1216_groups_0"), val = int32(1)]; tensor layers_3_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101896448))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101838912))))[name = string("layers_3_fc2_outlier_module_weight_to_fp16_sparsified")]; tensor var_1216_cast_fp16 = conv(dilations = var_1216_dilations_0, groups = var_1216_groups_0, pad = var_1216_pad_0, pad_type = var_1216_pad_type_0, strides = var_1216_strides_0, weight = layers_3_fc2_outlier_module_weight_to_fp16_sparsified, x = input_39_cast_fp16)[name = string("op_1216_cast_fp16")]; tensor hidden_states_9_cast_fp16 = add(x = var_1210_cast_fp16, y = var_1216_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; tensor inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("inputs_25_cast_fp16")]; tensor obj_89_begin_0 = const()[name = string("obj_89_begin_0"), val = tensor([4, 0, 0, 0])]; tensor obj_89_end_0 = const()[name = string("obj_89_end_0"), val = tensor([5, 768, 1, 1536])]; tensor obj_89_end_mask_0 = const()[name = string("obj_89_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_89_cast_fp16 = slice_by_index(begin = obj_89_begin_0, end = obj_89_end_0, end_mask = obj_89_end_mask_0, x = read_state_2)[name = string("obj_89_cast_fp16")]; tensor obj_91_begin_0 = const()[name = string("obj_91_begin_0"), val = tensor([4, 0, 0, 0])]; tensor obj_91_end_0 = const()[name = string("obj_91_end_0"), val = tensor([5, 768, 1, 1536])]; tensor obj_91_end_mask_0 = const()[name = string("obj_91_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_91_cast_fp16 = slice_by_index(begin = obj_91_begin_0, end = obj_91_end_0, end_mask = obj_91_end_mask_0, x = read_state_3)[name = string("obj_91_cast_fp16")]; int32 var_1238 = const()[name = string("op_1238"), val = int32(3)]; tensor out_25_axes_0 = const()[name = string("out_25_axes_0"), val = tensor([1])]; fp16 var_1263_to_fp16 = const()[name = string("op_1263_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_1263_to_fp16, x = inputs_25_cast_fp16)[name = string("out_25_cast_fp16")]; tensor obj_79_gamma_0_to_fp16 = const()[name = string("obj_79_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102191424)))]; tensor obj_79_beta_0_to_fp16 = const()[name = string("obj_79_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102193024)))]; fp16 obj_79_epsilon_0_to_fp16 = const()[name = string("obj_79_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_79_cast_fp16 = batch_norm(beta = obj_79_beta_0_to_fp16, epsilon = obj_79_epsilon_0_to_fp16, gamma = obj_79_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_25_cast_fp16)[name = string("obj_79_cast_fp16")]; string var_1285_pad_type_0 = const()[name = string("op_1285_pad_type_0"), val = string("valid")]; tensor var_1285_strides_0 = const()[name = string("op_1285_strides_0"), val = tensor([1, 1])]; tensor var_1285_pad_0 = const()[name = string("op_1285_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1285_dilations_0 = const()[name = string("op_1285_dilations_0"), val = tensor([1, 1])]; int32 var_1285_groups_0 = const()[name = string("op_1285_groups_0"), val = int32(1)]; tensor layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102194624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102489600))))[name = string("layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_4_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102489728)))]; tensor var_1285_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1285_dilations_0, groups = var_1285_groups_0, pad = var_1285_pad_0, pad_type = var_1285_pad_type_0, strides = var_1285_strides_0, weight = layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_79_cast_fp16)[name = string("op_1285_cast_fp16")]; string var_1291_pad_type_0 = const()[name = string("op_1291_pad_type_0"), val = string("valid")]; tensor var_1291_strides_0 = const()[name = string("op_1291_strides_0"), val = tensor([1, 1])]; tensor var_1291_pad_0 = const()[name = string("op_1291_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1291_dilations_0 = const()[name = string("op_1291_dilations_0"), val = tensor([1, 1])]; int32 var_1291_groups_0 = const()[name = string("op_1291_groups_0"), val = int32(1)]; tensor layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102501568))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102491328))))[name = string("layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1291_cast_fp16 = conv(dilations = var_1291_dilations_0, groups = var_1291_groups_0, pad = var_1291_pad_0, pad_type = var_1291_pad_type_0, strides = var_1291_strides_0, weight = layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_79_cast_fp16)[name = string("op_1291_cast_fp16")]; tensor query_17_cast_fp16 = add(x = var_1285_cast_fp16, y = var_1291_cast_fp16)[name = string("query_17_cast_fp16")]; string var_1300_pad_type_0 = const()[name = string("op_1300_pad_type_0"), val = string("valid")]; tensor var_1300_strides_0 = const()[name = string("op_1300_strides_0"), val = tensor([1, 1])]; tensor var_1300_pad_0 = const()[name = string("op_1300_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1300_dilations_0 = const()[name = string("op_1300_dilations_0"), val = tensor([1, 1])]; int32 var_1300_groups_0 = const()[name = string("op_1300_groups_0"), val = int32(1)]; tensor layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102575360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102870336))))[name = string("layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; tensor var_1300_cast_fp16 = conv(dilations = var_1300_dilations_0, groups = var_1300_groups_0, pad = var_1300_pad_0, pad_type = var_1300_pad_type_0, strides = var_1300_strides_0, weight = layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_79_cast_fp16)[name = string("op_1300_cast_fp16")]; string var_1306_pad_type_0 = const()[name = string("op_1306_pad_type_0"), val = string("valid")]; tensor var_1306_strides_0 = const()[name = string("op_1306_strides_0"), val = tensor([1, 1])]; tensor var_1306_pad_0 = const()[name = string("op_1306_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1306_dilations_0 = const()[name = string("op_1306_dilations_0"), val = tensor([1, 1])]; int32 var_1306_groups_0 = const()[name = string("op_1306_groups_0"), val = int32(1)]; tensor layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102881536))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102870464))))[name = string("layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1306_cast_fp16 = conv(dilations = var_1306_dilations_0, groups = var_1306_groups_0, pad = var_1306_pad_0, pad_type = var_1306_pad_type_0, strides = var_1306_strides_0, weight = layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_79_cast_fp16)[name = string("op_1306_cast_fp16")]; tensor current_key_9_cast_fp16 = add(x = var_1300_cast_fp16, y = var_1306_cast_fp16)[name = string("current_key_9_cast_fp16")]; string var_1316_pad_type_0 = const()[name = string("op_1316_pad_type_0"), val = string("valid")]; tensor var_1316_strides_0 = const()[name = string("op_1316_strides_0"), val = tensor([1, 1])]; tensor var_1316_pad_0 = const()[name = string("op_1316_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1316_dilations_0 = const()[name = string("op_1316_dilations_0"), val = tensor([1, 1])]; int32 var_1316_groups_0 = const()[name = string("op_1316_groups_0"), val = int32(1)]; tensor layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102955328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103250304))))[name = string("layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_4_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103250432)))]; tensor var_1316_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1316_dilations_0, groups = var_1316_groups_0, pad = var_1316_pad_0, pad_type = var_1316_pad_type_0, strides = var_1316_strides_0, weight = layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_79_cast_fp16)[name = string("op_1316_cast_fp16")]; string var_1322_pad_type_0 = const()[name = string("op_1322_pad_type_0"), val = string("valid")]; tensor var_1322_strides_0 = const()[name = string("op_1322_strides_0"), val = tensor([1, 1])]; tensor var_1322_pad_0 = const()[name = string("op_1322_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1322_dilations_0 = const()[name = string("op_1322_dilations_0"), val = tensor([1, 1])]; int32 var_1322_groups_0 = const()[name = string("op_1322_groups_0"), val = int32(1)]; tensor layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103261824))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103252032))))[name = string("layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1322_cast_fp16 = conv(dilations = var_1322_dilations_0, groups = var_1322_groups_0, pad = var_1322_pad_0, pad_type = var_1322_pad_type_0, strides = var_1322_strides_0, weight = layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_79_cast_fp16)[name = string("op_1322_cast_fp16")]; tensor current_value_9_cast_fp16 = add(x = var_1316_cast_fp16, y = var_1322_cast_fp16)[name = string("current_value_9_cast_fp16")]; tensor var_1328_cast_fp16 = mul(x = current_key_9_cast_fp16, y = var_202_cast_fp16)[name = string("op_1328_cast_fp16")]; tensor key_9_cast_fp16 = add(x = var_71_cast_fp16_4, y = var_1328_cast_fp16)[name = string("key_9_cast_fp16")]; tensor var_1330_cast_fp16 = mul(x = current_value_9_cast_fp16, y = var_202_cast_fp16)[name = string("op_1330_cast_fp16")]; tensor value_9_cast_fp16 = add(x = var_86_cast_fp16_4, y = var_1330_cast_fp16)[name = string("value_9_cast_fp16")]; tensor var_1333 = const()[name = string("op_1333"), val = tensor([1, 12, 64, -1])]; tensor mh_q_17_cast_fp16 = reshape(shape = var_1333, x = query_17_cast_fp16)[name = string("mh_q_17_cast_fp16")]; fp16 var_1335_to_fp16 = const()[name = string("op_1335_to_fp16"), val = fp16(0x1p-3)]; tensor var_1336_cast_fp16 = mul(x = mh_q_17_cast_fp16, y = var_1335_to_fp16)[name = string("op_1336_cast_fp16")]; tensor var_1337 = const()[name = string("op_1337"), val = tensor([1, 12, 64, -1])]; tensor var_1338_cast_fp16 = reshape(shape = var_1337, x = key_9_cast_fp16)[name = string("op_1338_cast_fp16")]; bool mh_w_33_transpose_x_0 = const()[name = string("mh_w_33_transpose_x_0"), val = bool(true)]; bool mh_w_33_transpose_y_0 = const()[name = string("mh_w_33_transpose_y_0"), val = bool(false)]; tensor mh_w_33_cast_fp16 = matmul(transpose_x = mh_w_33_transpose_x_0, transpose_y = mh_w_33_transpose_y_0, x = var_1336_cast_fp16, y = var_1338_cast_fp16)[name = string("mh_w_33_cast_fp16")]; tensor mh_w_35_cast_fp16 = add(x = mh_w_33_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_35_cast_fp16")]; tensor var_1346_cast_fp16 = softmax(axis = var_1238, x = mh_w_35_cast_fp16)[name = string("op_1346_cast_fp16")]; tensor var_1347 = const()[name = string("op_1347"), val = tensor([1, 12, 64, -1])]; tensor var_1348_cast_fp16 = reshape(shape = var_1347, x = value_9_cast_fp16)[name = string("op_1348_cast_fp16")]; bool attn_17_transpose_x_0 = const()[name = string("attn_17_transpose_x_0"), val = bool(false)]; bool attn_17_transpose_y_0 = const()[name = string("attn_17_transpose_y_0"), val = bool(true)]; tensor attn_17_cast_fp16 = matmul(transpose_x = attn_17_transpose_x_0, transpose_y = attn_17_transpose_y_0, x = var_1348_cast_fp16, y = var_1346_cast_fp16)[name = string("attn_17_cast_fp16")]; tensor var_1351 = const()[name = string("op_1351"), val = tensor([1, 768, 1, -1])]; tensor input_41_cast_fp16 = reshape(shape = var_1351, x = attn_17_cast_fp16)[name = string("input_41_cast_fp16")]; string var_1361_pad_type_0 = const()[name = string("op_1361_pad_type_0"), val = string("valid")]; tensor var_1361_strides_0 = const()[name = string("op_1361_strides_0"), val = tensor([1, 1])]; tensor var_1361_pad_0 = const()[name = string("op_1361_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1361_dilations_0 = const()[name = string("op_1361_dilations_0"), val = tensor([1, 1])]; int32 var_1361_groups_0 = const()[name = string("op_1361_groups_0"), val = int32(1)]; tensor layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103335616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103630592))))[name = string("layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_4_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103630720)))]; tensor var_1361_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1361_dilations_0, groups = var_1361_groups_0, pad = var_1361_pad_0, pad_type = var_1361_pad_type_0, strides = var_1361_strides_0, weight = layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = string("op_1361_cast_fp16")]; string var_1367_pad_type_0 = const()[name = string("op_1367_pad_type_0"), val = string("valid")]; tensor var_1367_strides_0 = const()[name = string("op_1367_strides_0"), val = tensor([1, 1])]; tensor var_1367_pad_0 = const()[name = string("op_1367_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1367_dilations_0 = const()[name = string("op_1367_dilations_0"), val = tensor([1, 1])]; int32 var_1367_groups_0 = const()[name = string("op_1367_groups_0"), val = int32(1)]; tensor layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103644992))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103632320))))[name = string("layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1367_cast_fp16 = conv(dilations = var_1367_dilations_0, groups = var_1367_groups_0, pad = var_1367_pad_0, pad_type = var_1367_pad_type_0, strides = var_1367_strides_0, weight = layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_41_cast_fp16)[name = string("op_1367_cast_fp16")]; tensor obj_85_cast_fp16 = add(x = var_1361_cast_fp16, y = var_1367_cast_fp16)[name = string("obj_85_cast_fp16")]; tensor inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_85_cast_fp16)[name = string("inputs_27_cast_fp16")]; tensor out_27_axes_0 = const()[name = string("out_27_axes_0"), val = tensor([1])]; fp16 var_1382_to_fp16 = const()[name = string("op_1382_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_27_cast_fp16 = layer_norm(axes = out_27_axes_0, epsilon = var_1382_to_fp16, x = inputs_27_cast_fp16)[name = string("out_27_cast_fp16")]; tensor obj_87_gamma_0_to_fp16 = const()[name = string("obj_87_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103718784)))]; tensor obj_87_beta_0_to_fp16 = const()[name = string("obj_87_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103720384)))]; fp16 obj_87_epsilon_0_to_fp16 = const()[name = string("obj_87_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_87_cast_fp16 = batch_norm(beta = obj_87_beta_0_to_fp16, epsilon = obj_87_epsilon_0_to_fp16, gamma = obj_87_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_27_cast_fp16)[name = string("obj_87_cast_fp16")]; string var_1402_pad_type_0 = const()[name = string("op_1402_pad_type_0"), val = string("valid")]; tensor var_1402_strides_0 = const()[name = string("op_1402_strides_0"), val = tensor([1, 1])]; tensor var_1402_pad_0 = const()[name = string("op_1402_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1402_dilations_0 = const()[name = string("op_1402_dilations_0"), val = tensor([1, 1])]; int32 var_1402_groups_0 = const()[name = string("op_1402_groups_0"), val = int32(1)]; tensor layers_4_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103721984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104016960))))[name = string("layers_4_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_4_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104017088)))]; tensor var_1402_cast_fp16 = conv(bias = layers_4_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1402_dilations_0, groups = var_1402_groups_0, pad = var_1402_pad_0, pad_type = var_1402_pad_type_0, strides = var_1402_strides_0, weight = layers_4_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_87_cast_fp16)[name = string("op_1402_cast_fp16")]; string var_1408_pad_type_0 = const()[name = string("op_1408_pad_type_0"), val = string("valid")]; tensor var_1408_strides_0 = const()[name = string("op_1408_strides_0"), val = tensor([1, 1])]; tensor var_1408_pad_0 = const()[name = string("op_1408_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1408_dilations_0 = const()[name = string("op_1408_dilations_0"), val = tensor([1, 1])]; int32 var_1408_groups_0 = const()[name = string("op_1408_groups_0"), val = int32(1)]; tensor layers_4_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104032192))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104018688))))[name = string("layers_4_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1408_cast_fp16 = conv(dilations = var_1408_dilations_0, groups = var_1408_groups_0, pad = var_1408_pad_0, pad_type = var_1408_pad_type_0, strides = var_1408_strides_0, weight = layers_4_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_87_cast_fp16)[name = string("op_1408_cast_fp16")]; tensor query_19_cast_fp16 = add(x = var_1402_cast_fp16, y = var_1408_cast_fp16)[name = string("query_19_cast_fp16")]; tensor var_1411 = const()[name = string("op_1411"), val = tensor([1, 12, 64, -1])]; tensor mh_q_19_cast_fp16 = reshape(shape = var_1411, x = query_19_cast_fp16)[name = string("mh_q_19_cast_fp16")]; fp16 var_1413_to_fp16 = const()[name = string("op_1413_to_fp16"), val = fp16(0x1p-3)]; tensor var_1414_cast_fp16 = mul(x = mh_q_19_cast_fp16, y = var_1413_to_fp16)[name = string("op_1414_cast_fp16")]; tensor var_1415 = const()[name = string("op_1415"), val = tensor([1, 12, 64, -1])]; tensor var_1416_cast_fp16 = reshape(shape = var_1415, x = obj_89_cast_fp16)[name = string("op_1416_cast_fp16")]; bool mh_w_37_transpose_x_0 = const()[name = string("mh_w_37_transpose_x_0"), val = bool(true)]; bool mh_w_37_transpose_y_0 = const()[name = string("mh_w_37_transpose_y_0"), val = bool(false)]; tensor mh_w_37_cast_fp16 = matmul(transpose_x = mh_w_37_transpose_x_0, transpose_y = mh_w_37_transpose_y_0, x = var_1414_cast_fp16, y = var_1416_cast_fp16)[name = string("mh_w_37_cast_fp16")]; tensor mh_w_39_cast_fp16 = add(x = mh_w_37_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_39_cast_fp16")]; tensor obj_95_cast_fp16 = softmax(axis = var_1238, x = mh_w_39_cast_fp16)[name = string("obj_95_cast_fp16")]; tensor var_1425 = const()[name = string("op_1425"), val = tensor([1, 12, 64, -1])]; tensor var_1426_cast_fp16 = reshape(shape = var_1425, x = obj_91_cast_fp16)[name = string("op_1426_cast_fp16")]; bool attn_19_transpose_x_0 = const()[name = string("attn_19_transpose_x_0"), val = bool(false)]; bool attn_19_transpose_y_0 = const()[name = string("attn_19_transpose_y_0"), val = bool(true)]; tensor attn_19_cast_fp16 = matmul(transpose_x = attn_19_transpose_x_0, transpose_y = attn_19_transpose_y_0, x = var_1426_cast_fp16, y = obj_95_cast_fp16)[name = string("attn_19_cast_fp16")]; tensor var_1429 = const()[name = string("op_1429"), val = tensor([1, 768, 1, -1])]; tensor input_43_cast_fp16 = reshape(shape = var_1429, x = attn_19_cast_fp16)[name = string("input_43_cast_fp16")]; string var_1439_pad_type_0 = const()[name = string("op_1439_pad_type_0"), val = string("valid")]; tensor var_1439_strides_0 = const()[name = string("op_1439_strides_0"), val = tensor([1, 1])]; tensor var_1439_pad_0 = const()[name = string("op_1439_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1439_dilations_0 = const()[name = string("op_1439_dilations_0"), val = tensor([1, 1])]; int32 var_1439_groups_0 = const()[name = string("op_1439_groups_0"), val = int32(1)]; tensor layers_4_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104105984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104400960))))[name = string("layers_4_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_4_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104401088)))]; tensor var_1439_cast_fp16 = conv(bias = layers_4_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1439_dilations_0, groups = var_1439_groups_0, pad = var_1439_pad_0, pad_type = var_1439_pad_type_0, strides = var_1439_strides_0, weight = layers_4_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = string("op_1439_cast_fp16")]; string var_1445_pad_type_0 = const()[name = string("op_1445_pad_type_0"), val = string("valid")]; tensor var_1445_strides_0 = const()[name = string("op_1445_strides_0"), val = tensor([1, 1])]; tensor var_1445_pad_0 = const()[name = string("op_1445_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1445_dilations_0 = const()[name = string("op_1445_dilations_0"), val = tensor([1, 1])]; int32 var_1445_groups_0 = const()[name = string("op_1445_groups_0"), val = int32(1)]; tensor layers_4_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104412544))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104402688))))[name = string("layers_4_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1445_cast_fp16 = conv(dilations = var_1445_dilations_0, groups = var_1445_groups_0, pad = var_1445_pad_0, pad_type = var_1445_pad_type_0, strides = var_1445_strides_0, weight = layers_4_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_43_cast_fp16)[name = string("op_1445_cast_fp16")]; tensor obj_93_cast_fp16 = add(x = var_1439_cast_fp16, y = var_1445_cast_fp16)[name = string("obj_93_cast_fp16")]; tensor inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = obj_93_cast_fp16)[name = string("inputs_29_cast_fp16")]; tensor out_29_axes_0 = const()[name = string("out_29_axes_0"), val = tensor([1])]; fp16 var_1456_to_fp16 = const()[name = string("op_1456_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_29_cast_fp16 = layer_norm(axes = out_29_axes_0, epsilon = var_1456_to_fp16, x = inputs_29_cast_fp16)[name = string("out_29_cast_fp16")]; tensor input_45_gamma_0_to_fp16 = const()[name = string("input_45_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104486336)))]; tensor input_45_beta_0_to_fp16 = const()[name = string("input_45_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104487936)))]; fp16 input_45_epsilon_0_to_fp16 = const()[name = string("input_45_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_45_cast_fp16 = batch_norm(beta = input_45_beta_0_to_fp16, epsilon = input_45_epsilon_0_to_fp16, gamma = input_45_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_29_cast_fp16)[name = string("input_45_cast_fp16")]; string var_1474_pad_type_0 = const()[name = string("op_1474_pad_type_0"), val = string("valid")]; tensor var_1474_strides_0 = const()[name = string("op_1474_strides_0"), val = tensor([1, 1])]; tensor var_1474_pad_0 = const()[name = string("op_1474_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1474_dilations_0 = const()[name = string("op_1474_dilations_0"), val = tensor([1, 1])]; int32 var_1474_groups_0 = const()[name = string("op_1474_groups_0"), val = int32(1)]; tensor layers_4_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104489536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105669248))))[name = string("layers_4_fc1_inlier_module_weight_to_fp16_palettized")]; tensor layers_4_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_4_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105669376)))]; tensor var_1474_cast_fp16 = conv(bias = layers_4_fc1_inlier_module_bias_to_fp16, dilations = var_1474_dilations_0, groups = var_1474_groups_0, pad = var_1474_pad_0, pad_type = var_1474_pad_type_0, strides = var_1474_strides_0, weight = layers_4_fc1_inlier_module_weight_to_fp16_palettized, x = input_45_cast_fp16)[name = string("op_1474_cast_fp16")]; string var_1480_pad_type_0 = const()[name = string("op_1480_pad_type_0"), val = string("valid")]; tensor var_1480_strides_0 = const()[name = string("op_1480_strides_0"), val = tensor([1, 1])]; tensor var_1480_pad_0 = const()[name = string("op_1480_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1480_dilations_0 = const()[name = string("op_1480_dilations_0"), val = tensor([1, 1])]; int32 var_1480_groups_0 = const()[name = string("op_1480_groups_0"), val = int32(1)]; tensor layers_4_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105710592))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105675584))))[name = string("layers_4_fc1_outlier_module_weight_to_fp16_sparsified")]; tensor var_1480_cast_fp16 = conv(dilations = var_1480_dilations_0, groups = var_1480_groups_0, pad = var_1480_pad_0, pad_type = var_1480_pad_type_0, strides = var_1480_strides_0, weight = layers_4_fc1_outlier_module_weight_to_fp16_sparsified, x = input_45_cast_fp16)[name = string("op_1480_cast_fp16")]; tensor input_47_cast_fp16 = add(x = var_1474_cast_fp16, y = var_1480_cast_fp16)[name = string("input_47_cast_fp16")]; string input_49_mode_0 = const()[name = string("input_49_mode_0"), val = string("EXACT")]; tensor input_49_cast_fp16 = gelu(mode = input_49_mode_0, x = input_47_cast_fp16)[name = string("input_49_cast_fp16")]; string var_1491_pad_type_0 = const()[name = string("op_1491_pad_type_0"), val = string("valid")]; tensor var_1491_strides_0 = const()[name = string("op_1491_strides_0"), val = tensor([1, 1])]; tensor var_1491_pad_0 = const()[name = string("op_1491_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1491_dilations_0 = const()[name = string("op_1491_dilations_0"), val = tensor([1, 1])]; int32 var_1491_groups_0 = const()[name = string("op_1491_groups_0"), val = int32(1)]; tensor layers_4_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106005568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107185280))))[name = string("layers_4_fc2_inlier_module_weight_to_fp16_palettized")]; tensor layers_4_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_4_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107185408)))]; tensor var_1491_cast_fp16 = conv(bias = layers_4_fc2_inlier_module_bias_to_fp16, dilations = var_1491_dilations_0, groups = var_1491_groups_0, pad = var_1491_pad_0, pad_type = var_1491_pad_type_0, strides = var_1491_strides_0, weight = layers_4_fc2_inlier_module_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = string("op_1491_cast_fp16")]; string var_1497_pad_type_0 = const()[name = string("op_1497_pad_type_0"), val = string("valid")]; tensor var_1497_strides_0 = const()[name = string("op_1497_strides_0"), val = tensor([1, 1])]; tensor var_1497_pad_0 = const()[name = string("op_1497_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1497_dilations_0 = const()[name = string("op_1497_dilations_0"), val = tensor([1, 1])]; int32 var_1497_groups_0 = const()[name = string("op_1497_groups_0"), val = int32(1)]; tensor layers_4_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107218304))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107187008))))[name = string("layers_4_fc2_outlier_module_weight_to_fp16_sparsified")]; tensor var_1497_cast_fp16 = conv(dilations = var_1497_dilations_0, groups = var_1497_groups_0, pad = var_1497_pad_0, pad_type = var_1497_pad_type_0, strides = var_1497_strides_0, weight = layers_4_fc2_outlier_module_weight_to_fp16_sparsified, x = input_49_cast_fp16)[name = string("op_1497_cast_fp16")]; tensor hidden_states_11_cast_fp16 = add(x = var_1491_cast_fp16, y = var_1497_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; tensor inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = hidden_states_11_cast_fp16)[name = string("inputs_31_cast_fp16")]; tensor obj_107_begin_0 = const()[name = string("obj_107_begin_0"), val = tensor([5, 0, 0, 0])]; tensor obj_107_end_0 = const()[name = string("obj_107_end_0"), val = tensor([6, 768, 1, 1536])]; tensor obj_107_end_mask_0 = const()[name = string("obj_107_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_107_cast_fp16 = slice_by_index(begin = obj_107_begin_0, end = obj_107_end_0, end_mask = obj_107_end_mask_0, x = read_state_2)[name = string("obj_107_cast_fp16")]; tensor obj_109_begin_0 = const()[name = string("obj_109_begin_0"), val = tensor([5, 0, 0, 0])]; tensor obj_109_end_0 = const()[name = string("obj_109_end_0"), val = tensor([6, 768, 1, 1536])]; tensor obj_109_end_mask_0 = const()[name = string("obj_109_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_109_cast_fp16 = slice_by_index(begin = obj_109_begin_0, end = obj_109_end_0, end_mask = obj_109_end_mask_0, x = read_state_3)[name = string("obj_109_cast_fp16")]; int32 var_1519 = const()[name = string("op_1519"), val = int32(3)]; tensor out_31_axes_0 = const()[name = string("out_31_axes_0"), val = tensor([1])]; fp16 var_1544_to_fp16 = const()[name = string("op_1544_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1544_to_fp16, x = inputs_31_cast_fp16)[name = string("out_31_cast_fp16")]; tensor obj_97_gamma_0_to_fp16 = const()[name = string("obj_97_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107513280)))]; tensor obj_97_beta_0_to_fp16 = const()[name = string("obj_97_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107514880)))]; fp16 obj_97_epsilon_0_to_fp16 = const()[name = string("obj_97_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_97_cast_fp16 = batch_norm(beta = obj_97_beta_0_to_fp16, epsilon = obj_97_epsilon_0_to_fp16, gamma = obj_97_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_31_cast_fp16)[name = string("obj_97_cast_fp16")]; string var_1566_pad_type_0 = const()[name = string("op_1566_pad_type_0"), val = string("valid")]; tensor var_1566_strides_0 = const()[name = string("op_1566_strides_0"), val = tensor([1, 1])]; tensor var_1566_pad_0 = const()[name = string("op_1566_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1566_dilations_0 = const()[name = string("op_1566_dilations_0"), val = tensor([1, 1])]; int32 var_1566_groups_0 = const()[name = string("op_1566_groups_0"), val = int32(1)]; tensor layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107516480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107811456))))[name = string("layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_5_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107811584)))]; tensor var_1566_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1566_dilations_0, groups = var_1566_groups_0, pad = var_1566_pad_0, pad_type = var_1566_pad_type_0, strides = var_1566_strides_0, weight = layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_97_cast_fp16)[name = string("op_1566_cast_fp16")]; string var_1572_pad_type_0 = const()[name = string("op_1572_pad_type_0"), val = string("valid")]; tensor var_1572_strides_0 = const()[name = string("op_1572_strides_0"), val = tensor([1, 1])]; tensor var_1572_pad_0 = const()[name = string("op_1572_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1572_dilations_0 = const()[name = string("op_1572_dilations_0"), val = tensor([1, 1])]; int32 var_1572_groups_0 = const()[name = string("op_1572_groups_0"), val = int32(1)]; tensor layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107822336))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107813184))))[name = string("layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1572_cast_fp16 = conv(dilations = var_1572_dilations_0, groups = var_1572_groups_0, pad = var_1572_pad_0, pad_type = var_1572_pad_type_0, strides = var_1572_strides_0, weight = layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_97_cast_fp16)[name = string("op_1572_cast_fp16")]; tensor query_21_cast_fp16 = add(x = var_1566_cast_fp16, y = var_1572_cast_fp16)[name = string("query_21_cast_fp16")]; string var_1581_pad_type_0 = const()[name = string("op_1581_pad_type_0"), val = string("valid")]; tensor var_1581_strides_0 = const()[name = string("op_1581_strides_0"), val = tensor([1, 1])]; tensor var_1581_pad_0 = const()[name = string("op_1581_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1581_dilations_0 = const()[name = string("op_1581_dilations_0"), val = tensor([1, 1])]; int32 var_1581_groups_0 = const()[name = string("op_1581_groups_0"), val = int32(1)]; tensor layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107896128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108191104))))[name = string("layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; tensor var_1581_cast_fp16 = conv(dilations = var_1581_dilations_0, groups = var_1581_groups_0, pad = var_1581_pad_0, pad_type = var_1581_pad_type_0, strides = var_1581_strides_0, weight = layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_97_cast_fp16)[name = string("op_1581_cast_fp16")]; string var_1587_pad_type_0 = const()[name = string("op_1587_pad_type_0"), val = string("valid")]; tensor var_1587_strides_0 = const()[name = string("op_1587_strides_0"), val = tensor([1, 1])]; tensor var_1587_pad_0 = const()[name = string("op_1587_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1587_dilations_0 = const()[name = string("op_1587_dilations_0"), val = tensor([1, 1])]; int32 var_1587_groups_0 = const()[name = string("op_1587_groups_0"), val = int32(1)]; tensor layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108200576))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108191232))))[name = string("layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1587_cast_fp16 = conv(dilations = var_1587_dilations_0, groups = var_1587_groups_0, pad = var_1587_pad_0, pad_type = var_1587_pad_type_0, strides = var_1587_strides_0, weight = layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_97_cast_fp16)[name = string("op_1587_cast_fp16")]; tensor current_key_11_cast_fp16 = add(x = var_1581_cast_fp16, y = var_1587_cast_fp16)[name = string("current_key_11_cast_fp16")]; string var_1597_pad_type_0 = const()[name = string("op_1597_pad_type_0"), val = string("valid")]; tensor var_1597_strides_0 = const()[name = string("op_1597_strides_0"), val = tensor([1, 1])]; tensor var_1597_pad_0 = const()[name = string("op_1597_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1597_dilations_0 = const()[name = string("op_1597_dilations_0"), val = tensor([1, 1])]; int32 var_1597_groups_0 = const()[name = string("op_1597_groups_0"), val = int32(1)]; tensor layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108274368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108569344))))[name = string("layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_5_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108569472)))]; tensor var_1597_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1597_dilations_0, groups = var_1597_groups_0, pad = var_1597_pad_0, pad_type = var_1597_pad_type_0, strides = var_1597_strides_0, weight = layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_97_cast_fp16)[name = string("op_1597_cast_fp16")]; string var_1603_pad_type_0 = const()[name = string("op_1603_pad_type_0"), val = string("valid")]; tensor var_1603_strides_0 = const()[name = string("op_1603_strides_0"), val = tensor([1, 1])]; tensor var_1603_pad_0 = const()[name = string("op_1603_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1603_dilations_0 = const()[name = string("op_1603_dilations_0"), val = tensor([1, 1])]; int32 var_1603_groups_0 = const()[name = string("op_1603_groups_0"), val = int32(1)]; tensor layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108577024))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108571072))))[name = string("layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1603_cast_fp16 = conv(dilations = var_1603_dilations_0, groups = var_1603_groups_0, pad = var_1603_pad_0, pad_type = var_1603_pad_type_0, strides = var_1603_strides_0, weight = layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_97_cast_fp16)[name = string("op_1603_cast_fp16")]; tensor current_value_11_cast_fp16 = add(x = var_1597_cast_fp16, y = var_1603_cast_fp16)[name = string("current_value_11_cast_fp16")]; tensor var_1609_cast_fp16 = mul(x = current_key_11_cast_fp16, y = var_202_cast_fp16)[name = string("op_1609_cast_fp16")]; tensor key_11_cast_fp16 = add(x = var_71_cast_fp16_5, y = var_1609_cast_fp16)[name = string("key_11_cast_fp16")]; tensor var_1611_cast_fp16 = mul(x = current_value_11_cast_fp16, y = var_202_cast_fp16)[name = string("op_1611_cast_fp16")]; tensor value_11_cast_fp16 = add(x = var_86_cast_fp16_5, y = var_1611_cast_fp16)[name = string("value_11_cast_fp16")]; tensor var_1614 = const()[name = string("op_1614"), val = tensor([1, 12, 64, -1])]; tensor mh_q_21_cast_fp16 = reshape(shape = var_1614, x = query_21_cast_fp16)[name = string("mh_q_21_cast_fp16")]; fp16 var_1616_to_fp16 = const()[name = string("op_1616_to_fp16"), val = fp16(0x1p-3)]; tensor var_1617_cast_fp16 = mul(x = mh_q_21_cast_fp16, y = var_1616_to_fp16)[name = string("op_1617_cast_fp16")]; tensor var_1618 = const()[name = string("op_1618"), val = tensor([1, 12, 64, -1])]; tensor var_1619_cast_fp16 = reshape(shape = var_1618, x = key_11_cast_fp16)[name = string("op_1619_cast_fp16")]; bool mh_w_41_transpose_x_0 = const()[name = string("mh_w_41_transpose_x_0"), val = bool(true)]; bool mh_w_41_transpose_y_0 = const()[name = string("mh_w_41_transpose_y_0"), val = bool(false)]; tensor mh_w_41_cast_fp16 = matmul(transpose_x = mh_w_41_transpose_x_0, transpose_y = mh_w_41_transpose_y_0, x = var_1617_cast_fp16, y = var_1619_cast_fp16)[name = string("mh_w_41_cast_fp16")]; tensor mh_w_43_cast_fp16 = add(x = mh_w_41_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_43_cast_fp16")]; tensor var_1627_cast_fp16 = softmax(axis = var_1519, x = mh_w_43_cast_fp16)[name = string("op_1627_cast_fp16")]; tensor var_1628 = const()[name = string("op_1628"), val = tensor([1, 12, 64, -1])]; tensor var_1629_cast_fp16 = reshape(shape = var_1628, x = value_11_cast_fp16)[name = string("op_1629_cast_fp16")]; bool attn_21_transpose_x_0 = const()[name = string("attn_21_transpose_x_0"), val = bool(false)]; bool attn_21_transpose_y_0 = const()[name = string("attn_21_transpose_y_0"), val = bool(true)]; tensor attn_21_cast_fp16 = matmul(transpose_x = attn_21_transpose_x_0, transpose_y = attn_21_transpose_y_0, x = var_1629_cast_fp16, y = var_1627_cast_fp16)[name = string("attn_21_cast_fp16")]; tensor var_1632 = const()[name = string("op_1632"), val = tensor([1, 768, 1, -1])]; tensor input_51_cast_fp16 = reshape(shape = var_1632, x = attn_21_cast_fp16)[name = string("input_51_cast_fp16")]; string var_1642_pad_type_0 = const()[name = string("op_1642_pad_type_0"), val = string("valid")]; tensor var_1642_strides_0 = const()[name = string("op_1642_strides_0"), val = tensor([1, 1])]; tensor var_1642_pad_0 = const()[name = string("op_1642_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1642_dilations_0 = const()[name = string("op_1642_dilations_0"), val = tensor([1, 1])]; int32 var_1642_groups_0 = const()[name = string("op_1642_groups_0"), val = int32(1)]; tensor layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108650816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108945792))))[name = string("layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_5_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108945920)))]; tensor var_1642_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1642_dilations_0, groups = var_1642_groups_0, pad = var_1642_pad_0, pad_type = var_1642_pad_type_0, strides = var_1642_strides_0, weight = layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = string("op_1642_cast_fp16")]; string var_1648_pad_type_0 = const()[name = string("op_1648_pad_type_0"), val = string("valid")]; tensor var_1648_strides_0 = const()[name = string("op_1648_strides_0"), val = tensor([1, 1])]; tensor var_1648_pad_0 = const()[name = string("op_1648_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1648_dilations_0 = const()[name = string("op_1648_dilations_0"), val = tensor([1, 1])]; int32 var_1648_groups_0 = const()[name = string("op_1648_groups_0"), val = int32(1)]; tensor layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108953920))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108947520))))[name = string("layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1648_cast_fp16 = conv(dilations = var_1648_dilations_0, groups = var_1648_groups_0, pad = var_1648_pad_0, pad_type = var_1648_pad_type_0, strides = var_1648_strides_0, weight = layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_51_cast_fp16)[name = string("op_1648_cast_fp16")]; tensor obj_103_cast_fp16 = add(x = var_1642_cast_fp16, y = var_1648_cast_fp16)[name = string("obj_103_cast_fp16")]; tensor inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = obj_103_cast_fp16)[name = string("inputs_33_cast_fp16")]; tensor out_33_axes_0 = const()[name = string("out_33_axes_0"), val = tensor([1])]; fp16 var_1663_to_fp16 = const()[name = string("op_1663_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_33_cast_fp16 = layer_norm(axes = out_33_axes_0, epsilon = var_1663_to_fp16, x = inputs_33_cast_fp16)[name = string("out_33_cast_fp16")]; tensor obj_105_gamma_0_to_fp16 = const()[name = string("obj_105_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109027712)))]; tensor obj_105_beta_0_to_fp16 = const()[name = string("obj_105_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109029312)))]; fp16 obj_105_epsilon_0_to_fp16 = const()[name = string("obj_105_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_105_cast_fp16 = batch_norm(beta = obj_105_beta_0_to_fp16, epsilon = obj_105_epsilon_0_to_fp16, gamma = obj_105_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_33_cast_fp16)[name = string("obj_105_cast_fp16")]; string var_1683_pad_type_0 = const()[name = string("op_1683_pad_type_0"), val = string("valid")]; tensor var_1683_strides_0 = const()[name = string("op_1683_strides_0"), val = tensor([1, 1])]; tensor var_1683_pad_0 = const()[name = string("op_1683_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1683_dilations_0 = const()[name = string("op_1683_dilations_0"), val = tensor([1, 1])]; int32 var_1683_groups_0 = const()[name = string("op_1683_groups_0"), val = int32(1)]; tensor layers_5_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109030912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109325888))))[name = string("layers_5_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_5_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109326016)))]; tensor var_1683_cast_fp16 = conv(bias = layers_5_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1683_dilations_0, groups = var_1683_groups_0, pad = var_1683_pad_0, pad_type = var_1683_pad_type_0, strides = var_1683_strides_0, weight = layers_5_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_105_cast_fp16)[name = string("op_1683_cast_fp16")]; string var_1689_pad_type_0 = const()[name = string("op_1689_pad_type_0"), val = string("valid")]; tensor var_1689_strides_0 = const()[name = string("op_1689_strides_0"), val = tensor([1, 1])]; tensor var_1689_pad_0 = const()[name = string("op_1689_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1689_dilations_0 = const()[name = string("op_1689_dilations_0"), val = tensor([1, 1])]; int32 var_1689_groups_0 = const()[name = string("op_1689_groups_0"), val = int32(1)]; tensor layers_5_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109336960))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109327616))))[name = string("layers_5_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1689_cast_fp16 = conv(dilations = var_1689_dilations_0, groups = var_1689_groups_0, pad = var_1689_pad_0, pad_type = var_1689_pad_type_0, strides = var_1689_strides_0, weight = layers_5_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_105_cast_fp16)[name = string("op_1689_cast_fp16")]; tensor query_23_cast_fp16 = add(x = var_1683_cast_fp16, y = var_1689_cast_fp16)[name = string("query_23_cast_fp16")]; tensor var_1692 = const()[name = string("op_1692"), val = tensor([1, 12, 64, -1])]; tensor mh_q_23_cast_fp16 = reshape(shape = var_1692, x = query_23_cast_fp16)[name = string("mh_q_23_cast_fp16")]; fp16 var_1694_to_fp16 = const()[name = string("op_1694_to_fp16"), val = fp16(0x1p-3)]; tensor var_1695_cast_fp16 = mul(x = mh_q_23_cast_fp16, y = var_1694_to_fp16)[name = string("op_1695_cast_fp16")]; tensor var_1696 = const()[name = string("op_1696"), val = tensor([1, 12, 64, -1])]; tensor var_1697_cast_fp16 = reshape(shape = var_1696, x = obj_107_cast_fp16)[name = string("op_1697_cast_fp16")]; bool mh_w_45_transpose_x_0 = const()[name = string("mh_w_45_transpose_x_0"), val = bool(true)]; bool mh_w_45_transpose_y_0 = const()[name = string("mh_w_45_transpose_y_0"), val = bool(false)]; tensor mh_w_45_cast_fp16 = matmul(transpose_x = mh_w_45_transpose_x_0, transpose_y = mh_w_45_transpose_y_0, x = var_1695_cast_fp16, y = var_1697_cast_fp16)[name = string("mh_w_45_cast_fp16")]; tensor mh_w_47_cast_fp16 = add(x = mh_w_45_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_47_cast_fp16")]; tensor obj_113_cast_fp16 = softmax(axis = var_1519, x = mh_w_47_cast_fp16)[name = string("obj_113_cast_fp16")]; tensor var_1706 = const()[name = string("op_1706"), val = tensor([1, 12, 64, -1])]; tensor var_1707_cast_fp16 = reshape(shape = var_1706, x = obj_109_cast_fp16)[name = string("op_1707_cast_fp16")]; bool attn_23_transpose_x_0 = const()[name = string("attn_23_transpose_x_0"), val = bool(false)]; bool attn_23_transpose_y_0 = const()[name = string("attn_23_transpose_y_0"), val = bool(true)]; tensor attn_23_cast_fp16 = matmul(transpose_x = attn_23_transpose_x_0, transpose_y = attn_23_transpose_y_0, x = var_1707_cast_fp16, y = obj_113_cast_fp16)[name = string("attn_23_cast_fp16")]; tensor var_1710 = const()[name = string("op_1710"), val = tensor([1, 768, 1, -1])]; tensor input_53_cast_fp16 = reshape(shape = var_1710, x = attn_23_cast_fp16)[name = string("input_53_cast_fp16")]; string var_1720_pad_type_0 = const()[name = string("op_1720_pad_type_0"), val = string("valid")]; tensor var_1720_strides_0 = const()[name = string("op_1720_strides_0"), val = tensor([1, 1])]; tensor var_1720_pad_0 = const()[name = string("op_1720_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1720_dilations_0 = const()[name = string("op_1720_dilations_0"), val = tensor([1, 1])]; int32 var_1720_groups_0 = const()[name = string("op_1720_groups_0"), val = int32(1)]; tensor layers_5_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109410752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109705728))))[name = string("layers_5_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_5_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109705856)))]; tensor var_1720_cast_fp16 = conv(bias = layers_5_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1720_dilations_0, groups = var_1720_groups_0, pad = var_1720_pad_0, pad_type = var_1720_pad_type_0, strides = var_1720_strides_0, weight = layers_5_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_53_cast_fp16)[name = string("op_1720_cast_fp16")]; string var_1726_pad_type_0 = const()[name = string("op_1726_pad_type_0"), val = string("valid")]; tensor var_1726_strides_0 = const()[name = string("op_1726_strides_0"), val = tensor([1, 1])]; tensor var_1726_pad_0 = const()[name = string("op_1726_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1726_dilations_0 = const()[name = string("op_1726_dilations_0"), val = tensor([1, 1])]; int32 var_1726_groups_0 = const()[name = string("op_1726_groups_0"), val = int32(1)]; tensor layers_5_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109714432))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109707456))))[name = string("layers_5_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1726_cast_fp16 = conv(dilations = var_1726_dilations_0, groups = var_1726_groups_0, pad = var_1726_pad_0, pad_type = var_1726_pad_type_0, strides = var_1726_strides_0, weight = layers_5_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_53_cast_fp16)[name = string("op_1726_cast_fp16")]; tensor obj_111_cast_fp16 = add(x = var_1720_cast_fp16, y = var_1726_cast_fp16)[name = string("obj_111_cast_fp16")]; tensor inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_111_cast_fp16)[name = string("inputs_35_cast_fp16")]; tensor out_35_axes_0 = const()[name = string("out_35_axes_0"), val = tensor([1])]; fp16 var_1737_to_fp16 = const()[name = string("op_1737_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_35_cast_fp16 = layer_norm(axes = out_35_axes_0, epsilon = var_1737_to_fp16, x = inputs_35_cast_fp16)[name = string("out_35_cast_fp16")]; tensor input_55_gamma_0_to_fp16 = const()[name = string("input_55_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109788224)))]; tensor input_55_beta_0_to_fp16 = const()[name = string("input_55_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109789824)))]; fp16 input_55_epsilon_0_to_fp16 = const()[name = string("input_55_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_55_cast_fp16 = batch_norm(beta = input_55_beta_0_to_fp16, epsilon = input_55_epsilon_0_to_fp16, gamma = input_55_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_35_cast_fp16)[name = string("input_55_cast_fp16")]; string var_1755_pad_type_0 = const()[name = string("op_1755_pad_type_0"), val = string("valid")]; tensor var_1755_strides_0 = const()[name = string("op_1755_strides_0"), val = tensor([1, 1])]; tensor var_1755_pad_0 = const()[name = string("op_1755_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1755_dilations_0 = const()[name = string("op_1755_dilations_0"), val = tensor([1, 1])]; int32 var_1755_groups_0 = const()[name = string("op_1755_groups_0"), val = int32(1)]; tensor layers_5_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109791424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110971136))))[name = string("layers_5_fc1_inlier_module_weight_to_fp16_palettized")]; tensor layers_5_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_5_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110971264)))]; tensor var_1755_cast_fp16 = conv(bias = layers_5_fc1_inlier_module_bias_to_fp16, dilations = var_1755_dilations_0, groups = var_1755_groups_0, pad = var_1755_pad_0, pad_type = var_1755_pad_type_0, strides = var_1755_strides_0, weight = layers_5_fc1_inlier_module_weight_to_fp16_palettized, x = input_55_cast_fp16)[name = string("op_1755_cast_fp16")]; string var_1761_pad_type_0 = const()[name = string("op_1761_pad_type_0"), val = string("valid")]; tensor var_1761_strides_0 = const()[name = string("op_1761_strides_0"), val = tensor([1, 1])]; tensor var_1761_pad_0 = const()[name = string("op_1761_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1761_dilations_0 = const()[name = string("op_1761_dilations_0"), val = tensor([1, 1])]; int32 var_1761_groups_0 = const()[name = string("op_1761_groups_0"), val = int32(1)]; tensor layers_5_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110998848))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110977472))))[name = string("layers_5_fc1_outlier_module_weight_to_fp16_sparsified")]; tensor var_1761_cast_fp16 = conv(dilations = var_1761_dilations_0, groups = var_1761_groups_0, pad = var_1761_pad_0, pad_type = var_1761_pad_type_0, strides = var_1761_strides_0, weight = layers_5_fc1_outlier_module_weight_to_fp16_sparsified, x = input_55_cast_fp16)[name = string("op_1761_cast_fp16")]; tensor input_57_cast_fp16 = add(x = var_1755_cast_fp16, y = var_1761_cast_fp16)[name = string("input_57_cast_fp16")]; string input_59_mode_0 = const()[name = string("input_59_mode_0"), val = string("EXACT")]; tensor input_59_cast_fp16 = gelu(mode = input_59_mode_0, x = input_57_cast_fp16)[name = string("input_59_cast_fp16")]; string var_1772_pad_type_0 = const()[name = string("op_1772_pad_type_0"), val = string("valid")]; tensor var_1772_strides_0 = const()[name = string("op_1772_strides_0"), val = tensor([1, 1])]; tensor var_1772_pad_0 = const()[name = string("op_1772_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1772_dilations_0 = const()[name = string("op_1772_dilations_0"), val = tensor([1, 1])]; int32 var_1772_groups_0 = const()[name = string("op_1772_groups_0"), val = int32(1)]; tensor layers_5_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111293824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112473536))))[name = string("layers_5_fc2_inlier_module_weight_to_fp16_palettized")]; tensor layers_5_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_5_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112473664)))]; tensor var_1772_cast_fp16 = conv(bias = layers_5_fc2_inlier_module_bias_to_fp16, dilations = var_1772_dilations_0, groups = var_1772_groups_0, pad = var_1772_pad_0, pad_type = var_1772_pad_type_0, strides = var_1772_strides_0, weight = layers_5_fc2_inlier_module_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = string("op_1772_cast_fp16")]; string var_1778_pad_type_0 = const()[name = string("op_1778_pad_type_0"), val = string("valid")]; tensor var_1778_strides_0 = const()[name = string("op_1778_strides_0"), val = tensor([1, 1])]; tensor var_1778_pad_0 = const()[name = string("op_1778_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1778_dilations_0 = const()[name = string("op_1778_dilations_0"), val = tensor([1, 1])]; int32 var_1778_groups_0 = const()[name = string("op_1778_groups_0"), val = int32(1)]; tensor layers_5_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112497472))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112475264))))[name = string("layers_5_fc2_outlier_module_weight_to_fp16_sparsified")]; tensor var_1778_cast_fp16 = conv(dilations = var_1778_dilations_0, groups = var_1778_groups_0, pad = var_1778_pad_0, pad_type = var_1778_pad_type_0, strides = var_1778_strides_0, weight = layers_5_fc2_outlier_module_weight_to_fp16_sparsified, x = input_59_cast_fp16)[name = string("op_1778_cast_fp16")]; tensor hidden_states_13_cast_fp16 = add(x = var_1772_cast_fp16, y = var_1778_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; tensor inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_13_cast_fp16)[name = string("inputs_37_cast_fp16")]; tensor obj_125_begin_0 = const()[name = string("obj_125_begin_0"), val = tensor([6, 0, 0, 0])]; tensor obj_125_end_0 = const()[name = string("obj_125_end_0"), val = tensor([7, 768, 1, 1536])]; tensor obj_125_end_mask_0 = const()[name = string("obj_125_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_125_cast_fp16 = slice_by_index(begin = obj_125_begin_0, end = obj_125_end_0, end_mask = obj_125_end_mask_0, x = read_state_2)[name = string("obj_125_cast_fp16")]; tensor obj_127_begin_0 = const()[name = string("obj_127_begin_0"), val = tensor([6, 0, 0, 0])]; tensor obj_127_end_0 = const()[name = string("obj_127_end_0"), val = tensor([7, 768, 1, 1536])]; tensor obj_127_end_mask_0 = const()[name = string("obj_127_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_127_cast_fp16 = slice_by_index(begin = obj_127_begin_0, end = obj_127_end_0, end_mask = obj_127_end_mask_0, x = read_state_3)[name = string("obj_127_cast_fp16")]; int32 var_1800 = const()[name = string("op_1800"), val = int32(3)]; tensor out_37_axes_0 = const()[name = string("out_37_axes_0"), val = tensor([1])]; fp16 var_1825_to_fp16 = const()[name = string("op_1825_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1825_to_fp16, x = inputs_37_cast_fp16)[name = string("out_37_cast_fp16")]; tensor obj_115_gamma_0_to_fp16 = const()[name = string("obj_115_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112792448)))]; tensor obj_115_beta_0_to_fp16 = const()[name = string("obj_115_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112794048)))]; fp16 obj_115_epsilon_0_to_fp16 = const()[name = string("obj_115_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_115_cast_fp16 = batch_norm(beta = obj_115_beta_0_to_fp16, epsilon = obj_115_epsilon_0_to_fp16, gamma = obj_115_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_37_cast_fp16)[name = string("obj_115_cast_fp16")]; string var_1847_pad_type_0 = const()[name = string("op_1847_pad_type_0"), val = string("valid")]; tensor var_1847_strides_0 = const()[name = string("op_1847_strides_0"), val = tensor([1, 1])]; tensor var_1847_pad_0 = const()[name = string("op_1847_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1847_dilations_0 = const()[name = string("op_1847_dilations_0"), val = tensor([1, 1])]; int32 var_1847_groups_0 = const()[name = string("op_1847_groups_0"), val = int32(1)]; tensor layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112795648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113090624))))[name = string("layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_6_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113090752)))]; tensor var_1847_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1847_dilations_0, groups = var_1847_groups_0, pad = var_1847_pad_0, pad_type = var_1847_pad_type_0, strides = var_1847_strides_0, weight = layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_115_cast_fp16)[name = string("op_1847_cast_fp16")]; string var_1853_pad_type_0 = const()[name = string("op_1853_pad_type_0"), val = string("valid")]; tensor var_1853_strides_0 = const()[name = string("op_1853_strides_0"), val = tensor([1, 1])]; tensor var_1853_pad_0 = const()[name = string("op_1853_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1853_dilations_0 = const()[name = string("op_1853_dilations_0"), val = tensor([1, 1])]; int32 var_1853_groups_0 = const()[name = string("op_1853_groups_0"), val = int32(1)]; tensor layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113100672))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113092352))))[name = string("layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1853_cast_fp16 = conv(dilations = var_1853_dilations_0, groups = var_1853_groups_0, pad = var_1853_pad_0, pad_type = var_1853_pad_type_0, strides = var_1853_strides_0, weight = layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_115_cast_fp16)[name = string("op_1853_cast_fp16")]; tensor query_25_cast_fp16 = add(x = var_1847_cast_fp16, y = var_1853_cast_fp16)[name = string("query_25_cast_fp16")]; string var_1862_pad_type_0 = const()[name = string("op_1862_pad_type_0"), val = string("valid")]; tensor var_1862_strides_0 = const()[name = string("op_1862_strides_0"), val = tensor([1, 1])]; tensor var_1862_pad_0 = const()[name = string("op_1862_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1862_dilations_0 = const()[name = string("op_1862_dilations_0"), val = tensor([1, 1])]; int32 var_1862_groups_0 = const()[name = string("op_1862_groups_0"), val = int32(1)]; tensor layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113174464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113469440))))[name = string("layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; tensor var_1862_cast_fp16 = conv(dilations = var_1862_dilations_0, groups = var_1862_groups_0, pad = var_1862_pad_0, pad_type = var_1862_pad_type_0, strides = var_1862_strides_0, weight = layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_115_cast_fp16)[name = string("op_1862_cast_fp16")]; string var_1868_pad_type_0 = const()[name = string("op_1868_pad_type_0"), val = string("valid")]; tensor var_1868_strides_0 = const()[name = string("op_1868_strides_0"), val = tensor([1, 1])]; tensor var_1868_pad_0 = const()[name = string("op_1868_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1868_dilations_0 = const()[name = string("op_1868_dilations_0"), val = tensor([1, 1])]; int32 var_1868_groups_0 = const()[name = string("op_1868_groups_0"), val = int32(1)]; tensor layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113478080))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113469568))))[name = string("layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1868_cast_fp16 = conv(dilations = var_1868_dilations_0, groups = var_1868_groups_0, pad = var_1868_pad_0, pad_type = var_1868_pad_type_0, strides = var_1868_strides_0, weight = layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_115_cast_fp16)[name = string("op_1868_cast_fp16")]; tensor current_key_13_cast_fp16 = add(x = var_1862_cast_fp16, y = var_1868_cast_fp16)[name = string("current_key_13_cast_fp16")]; string var_1878_pad_type_0 = const()[name = string("op_1878_pad_type_0"), val = string("valid")]; tensor var_1878_strides_0 = const()[name = string("op_1878_strides_0"), val = tensor([1, 1])]; tensor var_1878_pad_0 = const()[name = string("op_1878_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1878_dilations_0 = const()[name = string("op_1878_dilations_0"), val = tensor([1, 1])]; int32 var_1878_groups_0 = const()[name = string("op_1878_groups_0"), val = int32(1)]; tensor layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113551872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113846848))))[name = string("layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_6_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113846976)))]; tensor var_1878_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1878_dilations_0, groups = var_1878_groups_0, pad = var_1878_pad_0, pad_type = var_1878_pad_type_0, strides = var_1878_strides_0, weight = layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_115_cast_fp16)[name = string("op_1878_cast_fp16")]; string var_1884_pad_type_0 = const()[name = string("op_1884_pad_type_0"), val = string("valid")]; tensor var_1884_strides_0 = const()[name = string("op_1884_strides_0"), val = tensor([1, 1])]; tensor var_1884_pad_0 = const()[name = string("op_1884_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1884_dilations_0 = const()[name = string("op_1884_dilations_0"), val = tensor([1, 1])]; int32 var_1884_groups_0 = const()[name = string("op_1884_groups_0"), val = int32(1)]; tensor layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113853824))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113848576))))[name = string("layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1884_cast_fp16 = conv(dilations = var_1884_dilations_0, groups = var_1884_groups_0, pad = var_1884_pad_0, pad_type = var_1884_pad_type_0, strides = var_1884_strides_0, weight = layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_115_cast_fp16)[name = string("op_1884_cast_fp16")]; tensor current_value_13_cast_fp16 = add(x = var_1878_cast_fp16, y = var_1884_cast_fp16)[name = string("current_value_13_cast_fp16")]; tensor var_1890_cast_fp16 = mul(x = current_key_13_cast_fp16, y = var_202_cast_fp16)[name = string("op_1890_cast_fp16")]; tensor key_13_cast_fp16 = add(x = var_71_cast_fp16_6, y = var_1890_cast_fp16)[name = string("key_13_cast_fp16")]; tensor var_1892_cast_fp16 = mul(x = current_value_13_cast_fp16, y = var_202_cast_fp16)[name = string("op_1892_cast_fp16")]; tensor value_13_cast_fp16 = add(x = var_86_cast_fp16_6, y = var_1892_cast_fp16)[name = string("value_13_cast_fp16")]; tensor var_1895 = const()[name = string("op_1895"), val = tensor([1, 12, 64, -1])]; tensor mh_q_25_cast_fp16 = reshape(shape = var_1895, x = query_25_cast_fp16)[name = string("mh_q_25_cast_fp16")]; fp16 var_1897_to_fp16 = const()[name = string("op_1897_to_fp16"), val = fp16(0x1p-3)]; tensor var_1898_cast_fp16 = mul(x = mh_q_25_cast_fp16, y = var_1897_to_fp16)[name = string("op_1898_cast_fp16")]; tensor var_1899 = const()[name = string("op_1899"), val = tensor([1, 12, 64, -1])]; tensor var_1900_cast_fp16 = reshape(shape = var_1899, x = key_13_cast_fp16)[name = string("op_1900_cast_fp16")]; bool mh_w_49_transpose_x_0 = const()[name = string("mh_w_49_transpose_x_0"), val = bool(true)]; bool mh_w_49_transpose_y_0 = const()[name = string("mh_w_49_transpose_y_0"), val = bool(false)]; tensor mh_w_49_cast_fp16 = matmul(transpose_x = mh_w_49_transpose_x_0, transpose_y = mh_w_49_transpose_y_0, x = var_1898_cast_fp16, y = var_1900_cast_fp16)[name = string("mh_w_49_cast_fp16")]; tensor mh_w_51_cast_fp16 = add(x = mh_w_49_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_51_cast_fp16")]; tensor var_1908_cast_fp16 = softmax(axis = var_1800, x = mh_w_51_cast_fp16)[name = string("op_1908_cast_fp16")]; tensor var_1909 = const()[name = string("op_1909"), val = tensor([1, 12, 64, -1])]; tensor var_1910_cast_fp16 = reshape(shape = var_1909, x = value_13_cast_fp16)[name = string("op_1910_cast_fp16")]; bool attn_25_transpose_x_0 = const()[name = string("attn_25_transpose_x_0"), val = bool(false)]; bool attn_25_transpose_y_0 = const()[name = string("attn_25_transpose_y_0"), val = bool(true)]; tensor attn_25_cast_fp16 = matmul(transpose_x = attn_25_transpose_x_0, transpose_y = attn_25_transpose_y_0, x = var_1910_cast_fp16, y = var_1908_cast_fp16)[name = string("attn_25_cast_fp16")]; tensor var_1913 = const()[name = string("op_1913"), val = tensor([1, 768, 1, -1])]; tensor input_61_cast_fp16 = reshape(shape = var_1913, x = attn_25_cast_fp16)[name = string("input_61_cast_fp16")]; string var_1923_pad_type_0 = const()[name = string("op_1923_pad_type_0"), val = string("valid")]; tensor var_1923_strides_0 = const()[name = string("op_1923_strides_0"), val = tensor([1, 1])]; tensor var_1923_pad_0 = const()[name = string("op_1923_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1923_dilations_0 = const()[name = string("op_1923_dilations_0"), val = tensor([1, 1])]; int32 var_1923_groups_0 = const()[name = string("op_1923_groups_0"), val = int32(1)]; tensor layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113927616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114222592))))[name = string("layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_6_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114222720)))]; tensor var_1923_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1923_dilations_0, groups = var_1923_groups_0, pad = var_1923_pad_0, pad_type = var_1923_pad_type_0, strides = var_1923_strides_0, weight = layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_61_cast_fp16)[name = string("op_1923_cast_fp16")]; string var_1929_pad_type_0 = const()[name = string("op_1929_pad_type_0"), val = string("valid")]; tensor var_1929_strides_0 = const()[name = string("op_1929_strides_0"), val = tensor([1, 1])]; tensor var_1929_pad_0 = const()[name = string("op_1929_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1929_dilations_0 = const()[name = string("op_1929_dilations_0"), val = tensor([1, 1])]; int32 var_1929_groups_0 = const()[name = string("op_1929_groups_0"), val = int32(1)]; tensor layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114230208))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114224320))))[name = string("layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1929_cast_fp16 = conv(dilations = var_1929_dilations_0, groups = var_1929_groups_0, pad = var_1929_pad_0, pad_type = var_1929_pad_type_0, strides = var_1929_strides_0, weight = layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_61_cast_fp16)[name = string("op_1929_cast_fp16")]; tensor obj_121_cast_fp16 = add(x = var_1923_cast_fp16, y = var_1929_cast_fp16)[name = string("obj_121_cast_fp16")]; tensor inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_121_cast_fp16)[name = string("inputs_39_cast_fp16")]; tensor out_39_axes_0 = const()[name = string("out_39_axes_0"), val = tensor([1])]; fp16 var_1944_to_fp16 = const()[name = string("op_1944_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_39_cast_fp16 = layer_norm(axes = out_39_axes_0, epsilon = var_1944_to_fp16, x = inputs_39_cast_fp16)[name = string("out_39_cast_fp16")]; tensor obj_123_gamma_0_to_fp16 = const()[name = string("obj_123_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114304000)))]; tensor obj_123_beta_0_to_fp16 = const()[name = string("obj_123_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114305600)))]; fp16 obj_123_epsilon_0_to_fp16 = const()[name = string("obj_123_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_123_cast_fp16 = batch_norm(beta = obj_123_beta_0_to_fp16, epsilon = obj_123_epsilon_0_to_fp16, gamma = obj_123_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_39_cast_fp16)[name = string("obj_123_cast_fp16")]; string var_1964_pad_type_0 = const()[name = string("op_1964_pad_type_0"), val = string("valid")]; tensor var_1964_strides_0 = const()[name = string("op_1964_strides_0"), val = tensor([1, 1])]; tensor var_1964_pad_0 = const()[name = string("op_1964_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1964_dilations_0 = const()[name = string("op_1964_dilations_0"), val = tensor([1, 1])]; int32 var_1964_groups_0 = const()[name = string("op_1964_groups_0"), val = int32(1)]; tensor layers_6_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114307200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114602176))))[name = string("layers_6_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_6_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114602304)))]; tensor var_1964_cast_fp16 = conv(bias = layers_6_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1964_dilations_0, groups = var_1964_groups_0, pad = var_1964_pad_0, pad_type = var_1964_pad_type_0, strides = var_1964_strides_0, weight = layers_6_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_123_cast_fp16)[name = string("op_1964_cast_fp16")]; string var_1970_pad_type_0 = const()[name = string("op_1970_pad_type_0"), val = string("valid")]; tensor var_1970_strides_0 = const()[name = string("op_1970_strides_0"), val = tensor([1, 1])]; tensor var_1970_pad_0 = const()[name = string("op_1970_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1970_dilations_0 = const()[name = string("op_1970_dilations_0"), val = tensor([1, 1])]; int32 var_1970_groups_0 = const()[name = string("op_1970_groups_0"), val = int32(1)]; tensor layers_6_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114610752))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114603904))))[name = string("layers_6_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_1970_cast_fp16 = conv(dilations = var_1970_dilations_0, groups = var_1970_groups_0, pad = var_1970_pad_0, pad_type = var_1970_pad_type_0, strides = var_1970_strides_0, weight = layers_6_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_123_cast_fp16)[name = string("op_1970_cast_fp16")]; tensor query_27_cast_fp16 = add(x = var_1964_cast_fp16, y = var_1970_cast_fp16)[name = string("query_27_cast_fp16")]; tensor var_1973 = const()[name = string("op_1973"), val = tensor([1, 12, 64, -1])]; tensor mh_q_27_cast_fp16 = reshape(shape = var_1973, x = query_27_cast_fp16)[name = string("mh_q_27_cast_fp16")]; fp16 var_1975_to_fp16 = const()[name = string("op_1975_to_fp16"), val = fp16(0x1p-3)]; tensor var_1976_cast_fp16 = mul(x = mh_q_27_cast_fp16, y = var_1975_to_fp16)[name = string("op_1976_cast_fp16")]; tensor var_1977 = const()[name = string("op_1977"), val = tensor([1, 12, 64, -1])]; tensor var_1978_cast_fp16 = reshape(shape = var_1977, x = obj_125_cast_fp16)[name = string("op_1978_cast_fp16")]; bool mh_w_53_transpose_x_0 = const()[name = string("mh_w_53_transpose_x_0"), val = bool(true)]; bool mh_w_53_transpose_y_0 = const()[name = string("mh_w_53_transpose_y_0"), val = bool(false)]; tensor mh_w_53_cast_fp16 = matmul(transpose_x = mh_w_53_transpose_x_0, transpose_y = mh_w_53_transpose_y_0, x = var_1976_cast_fp16, y = var_1978_cast_fp16)[name = string("mh_w_53_cast_fp16")]; tensor mh_w_55_cast_fp16 = add(x = mh_w_53_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_55_cast_fp16")]; tensor obj_131_cast_fp16 = softmax(axis = var_1800, x = mh_w_55_cast_fp16)[name = string("obj_131_cast_fp16")]; tensor var_1987 = const()[name = string("op_1987"), val = tensor([1, 12, 64, -1])]; tensor var_1988_cast_fp16 = reshape(shape = var_1987, x = obj_127_cast_fp16)[name = string("op_1988_cast_fp16")]; bool attn_27_transpose_x_0 = const()[name = string("attn_27_transpose_x_0"), val = bool(false)]; bool attn_27_transpose_y_0 = const()[name = string("attn_27_transpose_y_0"), val = bool(true)]; tensor attn_27_cast_fp16 = matmul(transpose_x = attn_27_transpose_x_0, transpose_y = attn_27_transpose_y_0, x = var_1988_cast_fp16, y = obj_131_cast_fp16)[name = string("attn_27_cast_fp16")]; tensor var_1991 = const()[name = string("op_1991"), val = tensor([1, 768, 1, -1])]; tensor input_63_cast_fp16 = reshape(shape = var_1991, x = attn_27_cast_fp16)[name = string("input_63_cast_fp16")]; string var_2001_pad_type_0 = const()[name = string("op_2001_pad_type_0"), val = string("valid")]; tensor var_2001_strides_0 = const()[name = string("op_2001_strides_0"), val = tensor([1, 1])]; tensor var_2001_pad_0 = const()[name = string("op_2001_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2001_dilations_0 = const()[name = string("op_2001_dilations_0"), val = tensor([1, 1])]; int32 var_2001_groups_0 = const()[name = string("op_2001_groups_0"), val = int32(1)]; tensor layers_6_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114684544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114979520))))[name = string("layers_6_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_6_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114979648)))]; tensor var_2001_cast_fp16 = conv(bias = layers_6_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2001_dilations_0, groups = var_2001_groups_0, pad = var_2001_pad_0, pad_type = var_2001_pad_type_0, strides = var_2001_strides_0, weight = layers_6_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_63_cast_fp16)[name = string("op_2001_cast_fp16")]; string var_2007_pad_type_0 = const()[name = string("op_2007_pad_type_0"), val = string("valid")]; tensor var_2007_strides_0 = const()[name = string("op_2007_strides_0"), val = tensor([1, 1])]; tensor var_2007_pad_0 = const()[name = string("op_2007_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2007_dilations_0 = const()[name = string("op_2007_dilations_0"), val = tensor([1, 1])]; int32 var_2007_groups_0 = const()[name = string("op_2007_groups_0"), val = int32(1)]; tensor layers_6_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114987584))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114981248))))[name = string("layers_6_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_2007_cast_fp16 = conv(dilations = var_2007_dilations_0, groups = var_2007_groups_0, pad = var_2007_pad_0, pad_type = var_2007_pad_type_0, strides = var_2007_strides_0, weight = layers_6_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_63_cast_fp16)[name = string("op_2007_cast_fp16")]; tensor obj_129_cast_fp16 = add(x = var_2001_cast_fp16, y = var_2007_cast_fp16)[name = string("obj_129_cast_fp16")]; tensor inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = obj_129_cast_fp16)[name = string("inputs_41_cast_fp16")]; tensor out_41_axes_0 = const()[name = string("out_41_axes_0"), val = tensor([1])]; fp16 var_2021_to_fp16 = const()[name = string("op_2021_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_41_cast_fp16 = layer_norm(axes = out_41_axes_0, epsilon = var_2021_to_fp16, x = inputs_41_cast_fp16)[name = string("out_41_cast_fp16")]; tensor input_65_gamma_0_to_fp16 = const()[name = string("input_65_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115061376)))]; tensor input_65_beta_0_to_fp16 = const()[name = string("input_65_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115062976)))]; fp16 input_65_epsilon_0_to_fp16 = const()[name = string("input_65_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_65_cast_fp16 = batch_norm(beta = input_65_beta_0_to_fp16, epsilon = input_65_epsilon_0_to_fp16, gamma = input_65_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_41_cast_fp16)[name = string("input_65_cast_fp16")]; string var_2039_pad_type_0 = const()[name = string("op_2039_pad_type_0"), val = string("valid")]; tensor var_2039_strides_0 = const()[name = string("op_2039_strides_0"), val = tensor([1, 1])]; tensor var_2039_pad_0 = const()[name = string("op_2039_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2039_dilations_0 = const()[name = string("op_2039_dilations_0"), val = tensor([1, 1])]; int32 var_2039_groups_0 = const()[name = string("op_2039_groups_0"), val = int32(1)]; tensor layers_6_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115064576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116244288))))[name = string("layers_6_fc1_inlier_module_weight_to_fp16_palettized")]; tensor layers_6_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_6_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116244416)))]; tensor var_2039_cast_fp16 = conv(bias = layers_6_fc1_inlier_module_bias_to_fp16, dilations = var_2039_dilations_0, groups = var_2039_groups_0, pad = var_2039_pad_0, pad_type = var_2039_pad_type_0, strides = var_2039_strides_0, weight = layers_6_fc1_inlier_module_weight_to_fp16_palettized, x = input_65_cast_fp16)[name = string("op_2039_cast_fp16")]; string var_2045_pad_type_0 = const()[name = string("op_2045_pad_type_0"), val = string("valid")]; tensor var_2045_strides_0 = const()[name = string("op_2045_strides_0"), val = tensor([1, 1])]; tensor var_2045_pad_0 = const()[name = string("op_2045_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2045_dilations_0 = const()[name = string("op_2045_dilations_0"), val = tensor([1, 1])]; int32 var_2045_groups_0 = const()[name = string("op_2045_groups_0"), val = int32(1)]; tensor layers_6_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116270336))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116250624))))[name = string("layers_6_fc1_outlier_module_weight_to_fp16_sparsified")]; tensor var_2045_cast_fp16 = conv(dilations = var_2045_dilations_0, groups = var_2045_groups_0, pad = var_2045_pad_0, pad_type = var_2045_pad_type_0, strides = var_2045_strides_0, weight = layers_6_fc1_outlier_module_weight_to_fp16_sparsified, x = input_65_cast_fp16)[name = string("op_2045_cast_fp16")]; tensor input_67_cast_fp16 = add(x = var_2039_cast_fp16, y = var_2045_cast_fp16)[name = string("input_67_cast_fp16")]; string input_69_mode_0 = const()[name = string("input_69_mode_0"), val = string("EXACT")]; tensor input_69_cast_fp16 = gelu(mode = input_69_mode_0, x = input_67_cast_fp16)[name = string("input_69_cast_fp16")]; string var_2056_pad_type_0 = const()[name = string("op_2056_pad_type_0"), val = string("valid")]; tensor var_2056_strides_0 = const()[name = string("op_2056_strides_0"), val = tensor([1, 1])]; tensor var_2056_pad_0 = const()[name = string("op_2056_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2056_dilations_0 = const()[name = string("op_2056_dilations_0"), val = tensor([1, 1])]; int32 var_2056_groups_0 = const()[name = string("op_2056_groups_0"), val = int32(1)]; tensor layers_6_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116565312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117745024))))[name = string("layers_6_fc2_inlier_module_weight_to_fp16_palettized")]; tensor layers_6_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_6_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117745152)))]; tensor var_2056_cast_fp16 = conv(bias = layers_6_fc2_inlier_module_bias_to_fp16, dilations = var_2056_dilations_0, groups = var_2056_groups_0, pad = var_2056_pad_0, pad_type = var_2056_pad_type_0, strides = var_2056_strides_0, weight = layers_6_fc2_inlier_module_weight_to_fp16_palettized, x = input_69_cast_fp16)[name = string("op_2056_cast_fp16")]; string var_2062_pad_type_0 = const()[name = string("op_2062_pad_type_0"), val = string("valid")]; tensor var_2062_strides_0 = const()[name = string("op_2062_strides_0"), val = tensor([1, 1])]; tensor var_2062_pad_0 = const()[name = string("op_2062_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2062_dilations_0 = const()[name = string("op_2062_dilations_0"), val = tensor([1, 1])]; int32 var_2062_groups_0 = const()[name = string("op_2062_groups_0"), val = int32(1)]; tensor layers_6_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117773312))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117746752))))[name = string("layers_6_fc2_outlier_module_weight_to_fp16_sparsified")]; tensor var_2062_cast_fp16 = conv(dilations = var_2062_dilations_0, groups = var_2062_groups_0, pad = var_2062_pad_0, pad_type = var_2062_pad_type_0, strides = var_2062_strides_0, weight = layers_6_fc2_outlier_module_weight_to_fp16_sparsified, x = input_69_cast_fp16)[name = string("op_2062_cast_fp16")]; tensor hidden_states_15_cast_fp16 = add(x = var_2056_cast_fp16, y = var_2062_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; tensor inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("inputs_43_cast_fp16")]; tensor obj_143_begin_0 = const()[name = string("obj_143_begin_0"), val = tensor([7, 0, 0, 0])]; tensor obj_143_end_0 = const()[name = string("obj_143_end_0"), val = tensor([8, 768, 1, 1536])]; tensor obj_143_end_mask_0 = const()[name = string("obj_143_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_143_cast_fp16 = slice_by_index(begin = obj_143_begin_0, end = obj_143_end_0, end_mask = obj_143_end_mask_0, x = read_state_2)[name = string("obj_143_cast_fp16")]; tensor obj_145_begin_0 = const()[name = string("obj_145_begin_0"), val = tensor([7, 0, 0, 0])]; tensor obj_145_end_0 = const()[name = string("obj_145_end_0"), val = tensor([8, 768, 1, 1536])]; tensor obj_145_end_mask_0 = const()[name = string("obj_145_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_145_cast_fp16 = slice_by_index(begin = obj_145_begin_0, end = obj_145_end_0, end_mask = obj_145_end_mask_0, x = read_state_3)[name = string("obj_145_cast_fp16")]; int32 var_2085 = const()[name = string("op_2085"), val = int32(3)]; tensor out_43_axes_0 = const()[name = string("out_43_axes_0"), val = tensor([1])]; fp16 var_2110_to_fp16 = const()[name = string("op_2110_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_2110_to_fp16, x = inputs_43_cast_fp16)[name = string("out_43_cast_fp16")]; tensor obj_133_gamma_0_to_fp16 = const()[name = string("obj_133_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118068288)))]; tensor obj_133_beta_0_to_fp16 = const()[name = string("obj_133_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118069888)))]; fp16 obj_133_epsilon_0_to_fp16 = const()[name = string("obj_133_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_133_cast_fp16 = batch_norm(beta = obj_133_beta_0_to_fp16, epsilon = obj_133_epsilon_0_to_fp16, gamma = obj_133_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_43_cast_fp16)[name = string("obj_133_cast_fp16")]; string var_2132_pad_type_0 = const()[name = string("op_2132_pad_type_0"), val = string("valid")]; tensor var_2132_strides_0 = const()[name = string("op_2132_strides_0"), val = tensor([1, 1])]; tensor var_2132_pad_0 = const()[name = string("op_2132_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2132_dilations_0 = const()[name = string("op_2132_dilations_0"), val = tensor([1, 1])]; int32 var_2132_groups_0 = const()[name = string("op_2132_groups_0"), val = int32(1)]; tensor layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118071488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118366464))))[name = string("layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_7_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118366592)))]; tensor var_2132_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2132_dilations_0, groups = var_2132_groups_0, pad = var_2132_pad_0, pad_type = var_2132_pad_type_0, strides = var_2132_strides_0, weight = layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_133_cast_fp16)[name = string("op_2132_cast_fp16")]; string var_2138_pad_type_0 = const()[name = string("op_2138_pad_type_0"), val = string("valid")]; tensor var_2138_strides_0 = const()[name = string("op_2138_strides_0"), val = tensor([1, 1])]; tensor var_2138_pad_0 = const()[name = string("op_2138_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2138_dilations_0 = const()[name = string("op_2138_dilations_0"), val = tensor([1, 1])]; int32 var_2138_groups_0 = const()[name = string("op_2138_groups_0"), val = int32(1)]; tensor layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118374336))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118368192))))[name = string("layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_2138_cast_fp16 = conv(dilations = var_2138_dilations_0, groups = var_2138_groups_0, pad = var_2138_pad_0, pad_type = var_2138_pad_type_0, strides = var_2138_strides_0, weight = layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_133_cast_fp16)[name = string("op_2138_cast_fp16")]; tensor query_29_cast_fp16 = add(x = var_2132_cast_fp16, y = var_2138_cast_fp16)[name = string("query_29_cast_fp16")]; string var_2147_pad_type_0 = const()[name = string("op_2147_pad_type_0"), val = string("valid")]; tensor var_2147_strides_0 = const()[name = string("op_2147_strides_0"), val = tensor([1, 1])]; tensor var_2147_pad_0 = const()[name = string("op_2147_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2147_dilations_0 = const()[name = string("op_2147_dilations_0"), val = tensor([1, 1])]; int32 var_2147_groups_0 = const()[name = string("op_2147_groups_0"), val = int32(1)]; tensor layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118448128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118743104))))[name = string("layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; tensor var_2147_cast_fp16 = conv(dilations = var_2147_dilations_0, groups = var_2147_groups_0, pad = var_2147_pad_0, pad_type = var_2147_pad_type_0, strides = var_2147_strides_0, weight = layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_133_cast_fp16)[name = string("op_2147_cast_fp16")]; string var_2153_pad_type_0 = const()[name = string("op_2153_pad_type_0"), val = string("valid")]; tensor var_2153_strides_0 = const()[name = string("op_2153_strides_0"), val = tensor([1, 1])]; tensor var_2153_pad_0 = const()[name = string("op_2153_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2153_dilations_0 = const()[name = string("op_2153_dilations_0"), val = tensor([1, 1])]; int32 var_2153_groups_0 = const()[name = string("op_2153_groups_0"), val = int32(1)]; tensor layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118749696))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118743232))))[name = string("layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_2153_cast_fp16 = conv(dilations = var_2153_dilations_0, groups = var_2153_groups_0, pad = var_2153_pad_0, pad_type = var_2153_pad_type_0, strides = var_2153_strides_0, weight = layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_133_cast_fp16)[name = string("op_2153_cast_fp16")]; tensor current_key_15_cast_fp16 = add(x = var_2147_cast_fp16, y = var_2153_cast_fp16)[name = string("current_key_15_cast_fp16")]; string var_2163_pad_type_0 = const()[name = string("op_2163_pad_type_0"), val = string("valid")]; tensor var_2163_strides_0 = const()[name = string("op_2163_strides_0"), val = tensor([1, 1])]; tensor var_2163_pad_0 = const()[name = string("op_2163_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2163_dilations_0 = const()[name = string("op_2163_dilations_0"), val = tensor([1, 1])]; int32 var_2163_groups_0 = const()[name = string("op_2163_groups_0"), val = int32(1)]; tensor layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118823488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119118464))))[name = string("layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_7_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119118592)))]; tensor var_2163_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2163_dilations_0, groups = var_2163_groups_0, pad = var_2163_pad_0, pad_type = var_2163_pad_type_0, strides = var_2163_strides_0, weight = layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_133_cast_fp16)[name = string("op_2163_cast_fp16")]; string var_2169_pad_type_0 = const()[name = string("op_2169_pad_type_0"), val = string("valid")]; tensor var_2169_strides_0 = const()[name = string("op_2169_strides_0"), val = tensor([1, 1])]; tensor var_2169_pad_0 = const()[name = string("op_2169_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2169_dilations_0 = const()[name = string("op_2169_dilations_0"), val = tensor([1, 1])]; int32 var_2169_groups_0 = const()[name = string("op_2169_groups_0"), val = int32(1)]; tensor layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119125184))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119120192))))[name = string("layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_2169_cast_fp16 = conv(dilations = var_2169_dilations_0, groups = var_2169_groups_0, pad = var_2169_pad_0, pad_type = var_2169_pad_type_0, strides = var_2169_strides_0, weight = layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_133_cast_fp16)[name = string("op_2169_cast_fp16")]; tensor current_value_15_cast_fp16 = add(x = var_2163_cast_fp16, y = var_2169_cast_fp16)[name = string("current_value_15_cast_fp16")]; tensor var_2175_cast_fp16 = mul(x = current_key_15_cast_fp16, y = var_202_cast_fp16)[name = string("op_2175_cast_fp16")]; tensor key_15_cast_fp16 = add(x = var_71_cast_fp16_7, y = var_2175_cast_fp16)[name = string("key_15_cast_fp16")]; tensor var_2177_cast_fp16 = mul(x = current_value_15_cast_fp16, y = var_202_cast_fp16)[name = string("op_2177_cast_fp16")]; tensor value_15_cast_fp16 = add(x = var_86_cast_fp16_7, y = var_2177_cast_fp16)[name = string("value_15_cast_fp16")]; tensor var_2180 = const()[name = string("op_2180"), val = tensor([1, 12, 64, -1])]; tensor mh_q_29_cast_fp16 = reshape(shape = var_2180, x = query_29_cast_fp16)[name = string("mh_q_29_cast_fp16")]; fp16 var_2182_to_fp16 = const()[name = string("op_2182_to_fp16"), val = fp16(0x1p-3)]; tensor var_2183_cast_fp16 = mul(x = mh_q_29_cast_fp16, y = var_2182_to_fp16)[name = string("op_2183_cast_fp16")]; tensor var_2184 = const()[name = string("op_2184"), val = tensor([1, 12, 64, -1])]; tensor var_2185_cast_fp16 = reshape(shape = var_2184, x = key_15_cast_fp16)[name = string("op_2185_cast_fp16")]; bool mh_w_57_transpose_x_0 = const()[name = string("mh_w_57_transpose_x_0"), val = bool(true)]; bool mh_w_57_transpose_y_0 = const()[name = string("mh_w_57_transpose_y_0"), val = bool(false)]; tensor mh_w_57_cast_fp16 = matmul(transpose_x = mh_w_57_transpose_x_0, transpose_y = mh_w_57_transpose_y_0, x = var_2183_cast_fp16, y = var_2185_cast_fp16)[name = string("mh_w_57_cast_fp16")]; tensor mh_w_59_cast_fp16 = add(x = mh_w_57_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_59_cast_fp16")]; tensor var_2193_cast_fp16 = softmax(axis = var_2085, x = mh_w_59_cast_fp16)[name = string("op_2193_cast_fp16")]; tensor var_2194 = const()[name = string("op_2194"), val = tensor([1, 12, 64, -1])]; tensor var_2195_cast_fp16 = reshape(shape = var_2194, x = value_15_cast_fp16)[name = string("op_2195_cast_fp16")]; bool attn_29_transpose_x_0 = const()[name = string("attn_29_transpose_x_0"), val = bool(false)]; bool attn_29_transpose_y_0 = const()[name = string("attn_29_transpose_y_0"), val = bool(true)]; tensor attn_29_cast_fp16 = matmul(transpose_x = attn_29_transpose_x_0, transpose_y = attn_29_transpose_y_0, x = var_2195_cast_fp16, y = var_2193_cast_fp16)[name = string("attn_29_cast_fp16")]; tensor var_2198 = const()[name = string("op_2198"), val = tensor([1, 768, 1, -1])]; tensor input_71_cast_fp16 = reshape(shape = var_2198, x = attn_29_cast_fp16)[name = string("input_71_cast_fp16")]; string var_2208_pad_type_0 = const()[name = string("op_2208_pad_type_0"), val = string("valid")]; tensor var_2208_strides_0 = const()[name = string("op_2208_strides_0"), val = tensor([1, 1])]; tensor var_2208_pad_0 = const()[name = string("op_2208_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2208_dilations_0 = const()[name = string("op_2208_dilations_0"), val = tensor([1, 1])]; int32 var_2208_groups_0 = const()[name = string("op_2208_groups_0"), val = int32(1)]; tensor layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119198976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119493952))))[name = string("layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_7_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119494080)))]; tensor var_2208_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2208_dilations_0, groups = var_2208_groups_0, pad = var_2208_pad_0, pad_type = var_2208_pad_type_0, strides = var_2208_strides_0, weight = layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = string("op_2208_cast_fp16")]; string var_2214_pad_type_0 = const()[name = string("op_2214_pad_type_0"), val = string("valid")]; tensor var_2214_strides_0 = const()[name = string("op_2214_strides_0"), val = tensor([1, 1])]; tensor var_2214_pad_0 = const()[name = string("op_2214_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2214_dilations_0 = const()[name = string("op_2214_dilations_0"), val = tensor([1, 1])]; int32 var_2214_groups_0 = const()[name = string("op_2214_groups_0"), val = int32(1)]; tensor layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119501120))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119495680))))[name = string("layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_2214_cast_fp16 = conv(dilations = var_2214_dilations_0, groups = var_2214_groups_0, pad = var_2214_pad_0, pad_type = var_2214_pad_type_0, strides = var_2214_strides_0, weight = layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_71_cast_fp16)[name = string("op_2214_cast_fp16")]; tensor obj_139_cast_fp16 = add(x = var_2208_cast_fp16, y = var_2214_cast_fp16)[name = string("obj_139_cast_fp16")]; tensor inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = obj_139_cast_fp16)[name = string("inputs_45_cast_fp16")]; tensor out_45_axes_0 = const()[name = string("out_45_axes_0"), val = tensor([1])]; fp16 var_2229_to_fp16 = const()[name = string("op_2229_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_45_cast_fp16 = layer_norm(axes = out_45_axes_0, epsilon = var_2229_to_fp16, x = inputs_45_cast_fp16)[name = string("out_45_cast_fp16")]; tensor obj_141_gamma_0_to_fp16 = const()[name = string("obj_141_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119574912)))]; tensor obj_141_beta_0_to_fp16 = const()[name = string("obj_141_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119576512)))]; fp16 obj_141_epsilon_0_to_fp16 = const()[name = string("obj_141_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_141_cast_fp16 = batch_norm(beta = obj_141_beta_0_to_fp16, epsilon = obj_141_epsilon_0_to_fp16, gamma = obj_141_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_45_cast_fp16)[name = string("obj_141_cast_fp16")]; string var_2249_pad_type_0 = const()[name = string("op_2249_pad_type_0"), val = string("valid")]; tensor var_2249_strides_0 = const()[name = string("op_2249_strides_0"), val = tensor([1, 1])]; tensor var_2249_pad_0 = const()[name = string("op_2249_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2249_dilations_0 = const()[name = string("op_2249_dilations_0"), val = tensor([1, 1])]; int32 var_2249_groups_0 = const()[name = string("op_2249_groups_0"), val = int32(1)]; tensor layers_7_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119578112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119873088))))[name = string("layers_7_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_7_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119873216)))]; tensor var_2249_cast_fp16 = conv(bias = layers_7_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2249_dilations_0, groups = var_2249_groups_0, pad = var_2249_pad_0, pad_type = var_2249_pad_type_0, strides = var_2249_strides_0, weight = layers_7_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_141_cast_fp16)[name = string("op_2249_cast_fp16")]; string var_2255_pad_type_0 = const()[name = string("op_2255_pad_type_0"), val = string("valid")]; tensor var_2255_strides_0 = const()[name = string("op_2255_strides_0"), val = tensor([1, 1])]; tensor var_2255_pad_0 = const()[name = string("op_2255_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2255_dilations_0 = const()[name = string("op_2255_dilations_0"), val = tensor([1, 1])]; int32 var_2255_groups_0 = const()[name = string("op_2255_groups_0"), val = int32(1)]; tensor layers_7_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119881664))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119874816))))[name = string("layers_7_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_2255_cast_fp16 = conv(dilations = var_2255_dilations_0, groups = var_2255_groups_0, pad = var_2255_pad_0, pad_type = var_2255_pad_type_0, strides = var_2255_strides_0, weight = layers_7_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_141_cast_fp16)[name = string("op_2255_cast_fp16")]; tensor query_31_cast_fp16 = add(x = var_2249_cast_fp16, y = var_2255_cast_fp16)[name = string("query_31_cast_fp16")]; tensor var_2258 = const()[name = string("op_2258"), val = tensor([1, 12, 64, -1])]; tensor mh_q_31_cast_fp16 = reshape(shape = var_2258, x = query_31_cast_fp16)[name = string("mh_q_31_cast_fp16")]; fp16 var_2260_to_fp16 = const()[name = string("op_2260_to_fp16"), val = fp16(0x1p-3)]; tensor var_2261_cast_fp16 = mul(x = mh_q_31_cast_fp16, y = var_2260_to_fp16)[name = string("op_2261_cast_fp16")]; tensor var_2262 = const()[name = string("op_2262"), val = tensor([1, 12, 64, -1])]; tensor var_2263_cast_fp16 = reshape(shape = var_2262, x = obj_143_cast_fp16)[name = string("op_2263_cast_fp16")]; bool mh_w_61_transpose_x_0 = const()[name = string("mh_w_61_transpose_x_0"), val = bool(true)]; bool mh_w_61_transpose_y_0 = const()[name = string("mh_w_61_transpose_y_0"), val = bool(false)]; tensor mh_w_61_cast_fp16 = matmul(transpose_x = mh_w_61_transpose_x_0, transpose_y = mh_w_61_transpose_y_0, x = var_2261_cast_fp16, y = var_2263_cast_fp16)[name = string("mh_w_61_cast_fp16")]; tensor mh_w_63_cast_fp16 = add(x = mh_w_61_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_63_cast_fp16")]; tensor obj_149_cast_fp16 = softmax(axis = var_2085, x = mh_w_63_cast_fp16)[name = string("obj_149_cast_fp16")]; tensor var_2272 = const()[name = string("op_2272"), val = tensor([1, 12, 64, -1])]; tensor var_2273_cast_fp16 = reshape(shape = var_2272, x = obj_145_cast_fp16)[name = string("op_2273_cast_fp16")]; bool attn_31_transpose_x_0 = const()[name = string("attn_31_transpose_x_0"), val = bool(false)]; bool attn_31_transpose_y_0 = const()[name = string("attn_31_transpose_y_0"), val = bool(true)]; tensor attn_31_cast_fp16 = matmul(transpose_x = attn_31_transpose_x_0, transpose_y = attn_31_transpose_y_0, x = var_2273_cast_fp16, y = obj_149_cast_fp16)[name = string("attn_31_cast_fp16")]; tensor var_2276 = const()[name = string("op_2276"), val = tensor([1, 768, 1, -1])]; tensor input_73_cast_fp16 = reshape(shape = var_2276, x = attn_31_cast_fp16)[name = string("input_73_cast_fp16")]; string var_2286_pad_type_0 = const()[name = string("op_2286_pad_type_0"), val = string("valid")]; tensor var_2286_strides_0 = const()[name = string("op_2286_strides_0"), val = tensor([1, 1])]; tensor var_2286_pad_0 = const()[name = string("op_2286_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2286_dilations_0 = const()[name = string("op_2286_dilations_0"), val = tensor([1, 1])]; int32 var_2286_groups_0 = const()[name = string("op_2286_groups_0"), val = int32(1)]; tensor layers_7_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119955456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120250432))))[name = string("layers_7_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_7_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120250560)))]; tensor var_2286_cast_fp16 = conv(bias = layers_7_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2286_dilations_0, groups = var_2286_groups_0, pad = var_2286_pad_0, pad_type = var_2286_pad_type_0, strides = var_2286_strides_0, weight = layers_7_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_73_cast_fp16)[name = string("op_2286_cast_fp16")]; string var_2292_pad_type_0 = const()[name = string("op_2292_pad_type_0"), val = string("valid")]; tensor var_2292_strides_0 = const()[name = string("op_2292_strides_0"), val = tensor([1, 1])]; tensor var_2292_pad_0 = const()[name = string("op_2292_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2292_dilations_0 = const()[name = string("op_2292_dilations_0"), val = tensor([1, 1])]; int32 var_2292_groups_0 = const()[name = string("op_2292_groups_0"), val = int32(1)]; tensor layers_7_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120257920))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120252160))))[name = string("layers_7_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_2292_cast_fp16 = conv(dilations = var_2292_dilations_0, groups = var_2292_groups_0, pad = var_2292_pad_0, pad_type = var_2292_pad_type_0, strides = var_2292_strides_0, weight = layers_7_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_73_cast_fp16)[name = string("op_2292_cast_fp16")]; tensor obj_147_cast_fp16 = add(x = var_2286_cast_fp16, y = var_2292_cast_fp16)[name = string("obj_147_cast_fp16")]; tensor inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_147_cast_fp16)[name = string("inputs_47_cast_fp16")]; tensor out_47_axes_0 = const()[name = string("out_47_axes_0"), val = tensor([1])]; fp16 var_2306_to_fp16 = const()[name = string("op_2306_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_47_cast_fp16 = layer_norm(axes = out_47_axes_0, epsilon = var_2306_to_fp16, x = inputs_47_cast_fp16)[name = string("out_47_cast_fp16")]; tensor input_75_gamma_0_to_fp16 = const()[name = string("input_75_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120331712)))]; tensor input_75_beta_0_to_fp16 = const()[name = string("input_75_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120333312)))]; fp16 input_75_epsilon_0_to_fp16 = const()[name = string("input_75_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_47_cast_fp16)[name = string("input_75_cast_fp16")]; string var_2324_pad_type_0 = const()[name = string("op_2324_pad_type_0"), val = string("valid")]; tensor var_2324_strides_0 = const()[name = string("op_2324_strides_0"), val = tensor([1, 1])]; tensor var_2324_pad_0 = const()[name = string("op_2324_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2324_dilations_0 = const()[name = string("op_2324_dilations_0"), val = tensor([1, 1])]; int32 var_2324_groups_0 = const()[name = string("op_2324_groups_0"), val = int32(1)]; tensor layers_7_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120334912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121514624))))[name = string("layers_7_fc1_inlier_module_weight_to_fp16_palettized")]; tensor layers_7_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_7_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121514752)))]; tensor var_2324_cast_fp16 = conv(bias = layers_7_fc1_inlier_module_bias_to_fp16, dilations = var_2324_dilations_0, groups = var_2324_groups_0, pad = var_2324_pad_0, pad_type = var_2324_pad_type_0, strides = var_2324_strides_0, weight = layers_7_fc1_inlier_module_weight_to_fp16_palettized, x = input_75_cast_fp16)[name = string("op_2324_cast_fp16")]; string var_2330_pad_type_0 = const()[name = string("op_2330_pad_type_0"), val = string("valid")]; tensor var_2330_strides_0 = const()[name = string("op_2330_strides_0"), val = tensor([1, 1])]; tensor var_2330_pad_0 = const()[name = string("op_2330_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2330_dilations_0 = const()[name = string("op_2330_dilations_0"), val = tensor([1, 1])]; int32 var_2330_groups_0 = const()[name = string("op_2330_groups_0"), val = int32(1)]; tensor layers_7_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121540160))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121520960))))[name = string("layers_7_fc1_outlier_module_weight_to_fp16_sparsified")]; tensor var_2330_cast_fp16 = conv(dilations = var_2330_dilations_0, groups = var_2330_groups_0, pad = var_2330_pad_0, pad_type = var_2330_pad_type_0, strides = var_2330_strides_0, weight = layers_7_fc1_outlier_module_weight_to_fp16_sparsified, x = input_75_cast_fp16)[name = string("op_2330_cast_fp16")]; tensor input_77_cast_fp16 = add(x = var_2324_cast_fp16, y = var_2330_cast_fp16)[name = string("input_77_cast_fp16")]; string input_79_mode_0 = const()[name = string("input_79_mode_0"), val = string("EXACT")]; tensor input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = string("input_79_cast_fp16")]; string var_2341_pad_type_0 = const()[name = string("op_2341_pad_type_0"), val = string("valid")]; tensor var_2341_strides_0 = const()[name = string("op_2341_strides_0"), val = tensor([1, 1])]; tensor var_2341_pad_0 = const()[name = string("op_2341_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2341_dilations_0 = const()[name = string("op_2341_dilations_0"), val = tensor([1, 1])]; int32 var_2341_groups_0 = const()[name = string("op_2341_groups_0"), val = int32(1)]; tensor layers_7_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121835136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123014848))))[name = string("layers_7_fc2_inlier_module_weight_to_fp16_palettized")]; tensor layers_7_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_7_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123014976)))]; tensor var_2341_cast_fp16 = conv(bias = layers_7_fc2_inlier_module_bias_to_fp16, dilations = var_2341_dilations_0, groups = var_2341_groups_0, pad = var_2341_pad_0, pad_type = var_2341_pad_type_0, strides = var_2341_strides_0, weight = layers_7_fc2_inlier_module_weight_to_fp16_palettized, x = input_79_cast_fp16)[name = string("op_2341_cast_fp16")]; string var_2347_pad_type_0 = const()[name = string("op_2347_pad_type_0"), val = string("valid")]; tensor var_2347_strides_0 = const()[name = string("op_2347_strides_0"), val = tensor([1, 1])]; tensor var_2347_pad_0 = const()[name = string("op_2347_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2347_dilations_0 = const()[name = string("op_2347_dilations_0"), val = tensor([1, 1])]; int32 var_2347_groups_0 = const()[name = string("op_2347_groups_0"), val = int32(1)]; tensor layers_7_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123040960))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123016576))))[name = string("layers_7_fc2_outlier_module_weight_to_fp16_sparsified")]; tensor var_2347_cast_fp16 = conv(dilations = var_2347_dilations_0, groups = var_2347_groups_0, pad = var_2347_pad_0, pad_type = var_2347_pad_type_0, strides = var_2347_strides_0, weight = layers_7_fc2_outlier_module_weight_to_fp16_sparsified, x = input_79_cast_fp16)[name = string("op_2347_cast_fp16")]; tensor hidden_states_17_cast_fp16 = add(x = var_2341_cast_fp16, y = var_2347_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; tensor inputs_49_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_17_cast_fp16)[name = string("inputs_49_cast_fp16")]; tensor obj_161_begin_0 = const()[name = string("obj_161_begin_0"), val = tensor([8, 0, 0, 0])]; tensor obj_161_end_0 = const()[name = string("obj_161_end_0"), val = tensor([9, 768, 1, 1536])]; tensor obj_161_end_mask_0 = const()[name = string("obj_161_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_161_cast_fp16 = slice_by_index(begin = obj_161_begin_0, end = obj_161_end_0, end_mask = obj_161_end_mask_0, x = read_state_2)[name = string("obj_161_cast_fp16")]; tensor obj_163_begin_0 = const()[name = string("obj_163_begin_0"), val = tensor([8, 0, 0, 0])]; tensor obj_163_end_0 = const()[name = string("obj_163_end_0"), val = tensor([9, 768, 1, 1536])]; tensor obj_163_end_mask_0 = const()[name = string("obj_163_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_163_cast_fp16 = slice_by_index(begin = obj_163_begin_0, end = obj_163_end_0, end_mask = obj_163_end_mask_0, x = read_state_3)[name = string("obj_163_cast_fp16")]; int32 var_2370 = const()[name = string("op_2370"), val = int32(3)]; tensor out_49_axes_0 = const()[name = string("out_49_axes_0"), val = tensor([1])]; fp16 var_2395_to_fp16 = const()[name = string("op_2395_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_2395_to_fp16, x = inputs_49_cast_fp16)[name = string("out_49_cast_fp16")]; tensor obj_151_gamma_0_to_fp16 = const()[name = string("obj_151_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123335936)))]; tensor obj_151_beta_0_to_fp16 = const()[name = string("obj_151_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123337536)))]; fp16 obj_151_epsilon_0_to_fp16 = const()[name = string("obj_151_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_151_cast_fp16 = batch_norm(beta = obj_151_beta_0_to_fp16, epsilon = obj_151_epsilon_0_to_fp16, gamma = obj_151_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_49_cast_fp16)[name = string("obj_151_cast_fp16")]; string var_2417_pad_type_0 = const()[name = string("op_2417_pad_type_0"), val = string("valid")]; tensor var_2417_strides_0 = const()[name = string("op_2417_strides_0"), val = tensor([1, 1])]; tensor var_2417_pad_0 = const()[name = string("op_2417_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2417_dilations_0 = const()[name = string("op_2417_dilations_0"), val = tensor([1, 1])]; int32 var_2417_groups_0 = const()[name = string("op_2417_groups_0"), val = int32(1)]; tensor layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123339136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123634112))))[name = string("layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_8_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123634240)))]; tensor var_2417_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2417_dilations_0, groups = var_2417_groups_0, pad = var_2417_pad_0, pad_type = var_2417_pad_type_0, strides = var_2417_strides_0, weight = layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_151_cast_fp16)[name = string("op_2417_cast_fp16")]; string var_2423_pad_type_0 = const()[name = string("op_2423_pad_type_0"), val = string("valid")]; tensor var_2423_strides_0 = const()[name = string("op_2423_strides_0"), val = tensor([1, 1])]; tensor var_2423_pad_0 = const()[name = string("op_2423_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2423_dilations_0 = const()[name = string("op_2423_dilations_0"), val = tensor([1, 1])]; int32 var_2423_groups_0 = const()[name = string("op_2423_groups_0"), val = int32(1)]; tensor layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123641792))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123635840))))[name = string("layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_2423_cast_fp16 = conv(dilations = var_2423_dilations_0, groups = var_2423_groups_0, pad = var_2423_pad_0, pad_type = var_2423_pad_type_0, strides = var_2423_strides_0, weight = layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_151_cast_fp16)[name = string("op_2423_cast_fp16")]; tensor query_33_cast_fp16 = add(x = var_2417_cast_fp16, y = var_2423_cast_fp16)[name = string("query_33_cast_fp16")]; string var_2432_pad_type_0 = const()[name = string("op_2432_pad_type_0"), val = string("valid")]; tensor var_2432_strides_0 = const()[name = string("op_2432_strides_0"), val = tensor([1, 1])]; tensor var_2432_pad_0 = const()[name = string("op_2432_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2432_dilations_0 = const()[name = string("op_2432_dilations_0"), val = tensor([1, 1])]; int32 var_2432_groups_0 = const()[name = string("op_2432_groups_0"), val = int32(1)]; tensor layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123715584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124010560))))[name = string("layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; tensor var_2432_cast_fp16 = conv(dilations = var_2432_dilations_0, groups = var_2432_groups_0, pad = var_2432_pad_0, pad_type = var_2432_pad_type_0, strides = var_2432_strides_0, weight = layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_151_cast_fp16)[name = string("op_2432_cast_fp16")]; string var_2438_pad_type_0 = const()[name = string("op_2438_pad_type_0"), val = string("valid")]; tensor var_2438_strides_0 = const()[name = string("op_2438_strides_0"), val = tensor([1, 1])]; tensor var_2438_pad_0 = const()[name = string("op_2438_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2438_dilations_0 = const()[name = string("op_2438_dilations_0"), val = tensor([1, 1])]; int32 var_2438_groups_0 = const()[name = string("op_2438_groups_0"), val = int32(1)]; tensor layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124017600))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124010688))))[name = string("layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_2438_cast_fp16 = conv(dilations = var_2438_dilations_0, groups = var_2438_groups_0, pad = var_2438_pad_0, pad_type = var_2438_pad_type_0, strides = var_2438_strides_0, weight = layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_151_cast_fp16)[name = string("op_2438_cast_fp16")]; tensor current_key_17_cast_fp16 = add(x = var_2432_cast_fp16, y = var_2438_cast_fp16)[name = string("current_key_17_cast_fp16")]; string var_2448_pad_type_0 = const()[name = string("op_2448_pad_type_0"), val = string("valid")]; tensor var_2448_strides_0 = const()[name = string("op_2448_strides_0"), val = tensor([1, 1])]; tensor var_2448_pad_0 = const()[name = string("op_2448_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2448_dilations_0 = const()[name = string("op_2448_dilations_0"), val = tensor([1, 1])]; int32 var_2448_groups_0 = const()[name = string("op_2448_groups_0"), val = int32(1)]; tensor layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124091392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124386368))))[name = string("layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_8_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124386496)))]; tensor var_2448_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2448_dilations_0, groups = var_2448_groups_0, pad = var_2448_pad_0, pad_type = var_2448_pad_type_0, strides = var_2448_strides_0, weight = layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_151_cast_fp16)[name = string("op_2448_cast_fp16")]; string var_2454_pad_type_0 = const()[name = string("op_2454_pad_type_0"), val = string("valid")]; tensor var_2454_strides_0 = const()[name = string("op_2454_strides_0"), val = tensor([1, 1])]; tensor var_2454_pad_0 = const()[name = string("op_2454_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2454_dilations_0 = const()[name = string("op_2454_dilations_0"), val = tensor([1, 1])]; int32 var_2454_groups_0 = const()[name = string("op_2454_groups_0"), val = int32(1)]; tensor layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124394432))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124388096))))[name = string("layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_2454_cast_fp16 = conv(dilations = var_2454_dilations_0, groups = var_2454_groups_0, pad = var_2454_pad_0, pad_type = var_2454_pad_type_0, strides = var_2454_strides_0, weight = layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_151_cast_fp16)[name = string("op_2454_cast_fp16")]; tensor current_value_17_cast_fp16 = add(x = var_2448_cast_fp16, y = var_2454_cast_fp16)[name = string("current_value_17_cast_fp16")]; tensor var_2460_cast_fp16 = mul(x = current_key_17_cast_fp16, y = var_202_cast_fp16)[name = string("op_2460_cast_fp16")]; tensor key_17_cast_fp16 = add(x = var_71_cast_fp16_8, y = var_2460_cast_fp16)[name = string("key_17_cast_fp16")]; tensor var_2462_cast_fp16 = mul(x = current_value_17_cast_fp16, y = var_202_cast_fp16)[name = string("op_2462_cast_fp16")]; tensor value_17_cast_fp16 = add(x = var_86_cast_fp16_8, y = var_2462_cast_fp16)[name = string("value_17_cast_fp16")]; tensor var_2465 = const()[name = string("op_2465"), val = tensor([1, 12, 64, -1])]; tensor mh_q_33_cast_fp16 = reshape(shape = var_2465, x = query_33_cast_fp16)[name = string("mh_q_33_cast_fp16")]; fp16 var_2467_to_fp16 = const()[name = string("op_2467_to_fp16"), val = fp16(0x1p-3)]; tensor var_2468_cast_fp16 = mul(x = mh_q_33_cast_fp16, y = var_2467_to_fp16)[name = string("op_2468_cast_fp16")]; tensor var_2469 = const()[name = string("op_2469"), val = tensor([1, 12, 64, -1])]; tensor var_2470_cast_fp16 = reshape(shape = var_2469, x = key_17_cast_fp16)[name = string("op_2470_cast_fp16")]; bool mh_w_65_transpose_x_0 = const()[name = string("mh_w_65_transpose_x_0"), val = bool(true)]; bool mh_w_65_transpose_y_0 = const()[name = string("mh_w_65_transpose_y_0"), val = bool(false)]; tensor mh_w_65_cast_fp16 = matmul(transpose_x = mh_w_65_transpose_x_0, transpose_y = mh_w_65_transpose_y_0, x = var_2468_cast_fp16, y = var_2470_cast_fp16)[name = string("mh_w_65_cast_fp16")]; tensor mh_w_67_cast_fp16 = add(x = mh_w_65_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_67_cast_fp16")]; tensor var_2478_cast_fp16 = softmax(axis = var_2370, x = mh_w_67_cast_fp16)[name = string("op_2478_cast_fp16")]; tensor var_2479 = const()[name = string("op_2479"), val = tensor([1, 12, 64, -1])]; tensor var_2480_cast_fp16 = reshape(shape = var_2479, x = value_17_cast_fp16)[name = string("op_2480_cast_fp16")]; bool attn_33_transpose_x_0 = const()[name = string("attn_33_transpose_x_0"), val = bool(false)]; bool attn_33_transpose_y_0 = const()[name = string("attn_33_transpose_y_0"), val = bool(true)]; tensor attn_33_cast_fp16 = matmul(transpose_x = attn_33_transpose_x_0, transpose_y = attn_33_transpose_y_0, x = var_2480_cast_fp16, y = var_2478_cast_fp16)[name = string("attn_33_cast_fp16")]; tensor var_2483 = const()[name = string("op_2483"), val = tensor([1, 768, 1, -1])]; tensor input_81_cast_fp16 = reshape(shape = var_2483, x = attn_33_cast_fp16)[name = string("input_81_cast_fp16")]; string var_2493_pad_type_0 = const()[name = string("op_2493_pad_type_0"), val = string("valid")]; tensor var_2493_strides_0 = const()[name = string("op_2493_strides_0"), val = tensor([1, 1])]; tensor var_2493_pad_0 = const()[name = string("op_2493_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2493_dilations_0 = const()[name = string("op_2493_dilations_0"), val = tensor([1, 1])]; int32 var_2493_groups_0 = const()[name = string("op_2493_groups_0"), val = int32(1)]; tensor layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124468224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124763200))))[name = string("layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_8_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124763328)))]; tensor var_2493_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2493_dilations_0, groups = var_2493_groups_0, pad = var_2493_pad_0, pad_type = var_2493_pad_type_0, strides = var_2493_strides_0, weight = layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = string("op_2493_cast_fp16")]; string var_2499_pad_type_0 = const()[name = string("op_2499_pad_type_0"), val = string("valid")]; tensor var_2499_strides_0 = const()[name = string("op_2499_strides_0"), val = tensor([1, 1])]; tensor var_2499_pad_0 = const()[name = string("op_2499_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2499_dilations_0 = const()[name = string("op_2499_dilations_0"), val = tensor([1, 1])]; int32 var_2499_groups_0 = const()[name = string("op_2499_groups_0"), val = int32(1)]; tensor layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124771328))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124764928))))[name = string("layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_2499_cast_fp16 = conv(dilations = var_2499_dilations_0, groups = var_2499_groups_0, pad = var_2499_pad_0, pad_type = var_2499_pad_type_0, strides = var_2499_strides_0, weight = layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_81_cast_fp16)[name = string("op_2499_cast_fp16")]; tensor obj_157_cast_fp16 = add(x = var_2493_cast_fp16, y = var_2499_cast_fp16)[name = string("obj_157_cast_fp16")]; tensor inputs_51_cast_fp16 = add(x = inputs_49_cast_fp16, y = obj_157_cast_fp16)[name = string("inputs_51_cast_fp16")]; tensor out_51_axes_0 = const()[name = string("out_51_axes_0"), val = tensor([1])]; fp16 var_2514_to_fp16 = const()[name = string("op_2514_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_51_cast_fp16 = layer_norm(axes = out_51_axes_0, epsilon = var_2514_to_fp16, x = inputs_51_cast_fp16)[name = string("out_51_cast_fp16")]; tensor obj_159_gamma_0_to_fp16 = const()[name = string("obj_159_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124845120)))]; tensor obj_159_beta_0_to_fp16 = const()[name = string("obj_159_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124846720)))]; fp16 obj_159_epsilon_0_to_fp16 = const()[name = string("obj_159_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_159_cast_fp16 = batch_norm(beta = obj_159_beta_0_to_fp16, epsilon = obj_159_epsilon_0_to_fp16, gamma = obj_159_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_51_cast_fp16)[name = string("obj_159_cast_fp16")]; string var_2534_pad_type_0 = const()[name = string("op_2534_pad_type_0"), val = string("valid")]; tensor var_2534_strides_0 = const()[name = string("op_2534_strides_0"), val = tensor([1, 1])]; tensor var_2534_pad_0 = const()[name = string("op_2534_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2534_dilations_0 = const()[name = string("op_2534_dilations_0"), val = tensor([1, 1])]; int32 var_2534_groups_0 = const()[name = string("op_2534_groups_0"), val = int32(1)]; tensor layers_8_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124848320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125143296))))[name = string("layers_8_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_8_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125143424)))]; tensor var_2534_cast_fp16 = conv(bias = layers_8_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2534_dilations_0, groups = var_2534_groups_0, pad = var_2534_pad_0, pad_type = var_2534_pad_type_0, strides = var_2534_strides_0, weight = layers_8_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_159_cast_fp16)[name = string("op_2534_cast_fp16")]; string var_2540_pad_type_0 = const()[name = string("op_2540_pad_type_0"), val = string("valid")]; tensor var_2540_strides_0 = const()[name = string("op_2540_strides_0"), val = tensor([1, 1])]; tensor var_2540_pad_0 = const()[name = string("op_2540_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2540_dilations_0 = const()[name = string("op_2540_dilations_0"), val = tensor([1, 1])]; int32 var_2540_groups_0 = const()[name = string("op_2540_groups_0"), val = int32(1)]; tensor layers_8_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125150656))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125145024))))[name = string("layers_8_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_2540_cast_fp16 = conv(dilations = var_2540_dilations_0, groups = var_2540_groups_0, pad = var_2540_pad_0, pad_type = var_2540_pad_type_0, strides = var_2540_strides_0, weight = layers_8_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_159_cast_fp16)[name = string("op_2540_cast_fp16")]; tensor query_35_cast_fp16 = add(x = var_2534_cast_fp16, y = var_2540_cast_fp16)[name = string("query_35_cast_fp16")]; tensor var_2543 = const()[name = string("op_2543"), val = tensor([1, 12, 64, -1])]; tensor mh_q_35_cast_fp16 = reshape(shape = var_2543, x = query_35_cast_fp16)[name = string("mh_q_35_cast_fp16")]; fp16 var_2545_to_fp16 = const()[name = string("op_2545_to_fp16"), val = fp16(0x1p-3)]; tensor var_2546_cast_fp16 = mul(x = mh_q_35_cast_fp16, y = var_2545_to_fp16)[name = string("op_2546_cast_fp16")]; tensor var_2547 = const()[name = string("op_2547"), val = tensor([1, 12, 64, -1])]; tensor var_2548_cast_fp16 = reshape(shape = var_2547, x = obj_161_cast_fp16)[name = string("op_2548_cast_fp16")]; bool mh_w_69_transpose_x_0 = const()[name = string("mh_w_69_transpose_x_0"), val = bool(true)]; bool mh_w_69_transpose_y_0 = const()[name = string("mh_w_69_transpose_y_0"), val = bool(false)]; tensor mh_w_69_cast_fp16 = matmul(transpose_x = mh_w_69_transpose_x_0, transpose_y = mh_w_69_transpose_y_0, x = var_2546_cast_fp16, y = var_2548_cast_fp16)[name = string("mh_w_69_cast_fp16")]; tensor mh_w_71_cast_fp16 = add(x = mh_w_69_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_71_cast_fp16")]; tensor obj_167_cast_fp16 = softmax(axis = var_2370, x = mh_w_71_cast_fp16)[name = string("obj_167_cast_fp16")]; tensor var_2557 = const()[name = string("op_2557"), val = tensor([1, 12, 64, -1])]; tensor var_2558_cast_fp16 = reshape(shape = var_2557, x = obj_163_cast_fp16)[name = string("op_2558_cast_fp16")]; bool attn_35_transpose_x_0 = const()[name = string("attn_35_transpose_x_0"), val = bool(false)]; bool attn_35_transpose_y_0 = const()[name = string("attn_35_transpose_y_0"), val = bool(true)]; tensor attn_35_cast_fp16 = matmul(transpose_x = attn_35_transpose_x_0, transpose_y = attn_35_transpose_y_0, x = var_2558_cast_fp16, y = obj_167_cast_fp16)[name = string("attn_35_cast_fp16")]; tensor var_2561 = const()[name = string("op_2561"), val = tensor([1, 768, 1, -1])]; tensor input_83_cast_fp16 = reshape(shape = var_2561, x = attn_35_cast_fp16)[name = string("input_83_cast_fp16")]; string var_2571_pad_type_0 = const()[name = string("op_2571_pad_type_0"), val = string("valid")]; tensor var_2571_strides_0 = const()[name = string("op_2571_strides_0"), val = tensor([1, 1])]; tensor var_2571_pad_0 = const()[name = string("op_2571_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2571_dilations_0 = const()[name = string("op_2571_dilations_0"), val = tensor([1, 1])]; int32 var_2571_groups_0 = const()[name = string("op_2571_groups_0"), val = int32(1)]; tensor layers_8_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125224448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125519424))))[name = string("layers_8_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_8_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125519552)))]; tensor var_2571_cast_fp16 = conv(bias = layers_8_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2571_dilations_0, groups = var_2571_groups_0, pad = var_2571_pad_0, pad_type = var_2571_pad_type_0, strides = var_2571_strides_0, weight = layers_8_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = string("op_2571_cast_fp16")]; string var_2577_pad_type_0 = const()[name = string("op_2577_pad_type_0"), val = string("valid")]; tensor var_2577_strides_0 = const()[name = string("op_2577_strides_0"), val = tensor([1, 1])]; tensor var_2577_pad_0 = const()[name = string("op_2577_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2577_dilations_0 = const()[name = string("op_2577_dilations_0"), val = tensor([1, 1])]; int32 var_2577_groups_0 = const()[name = string("op_2577_groups_0"), val = int32(1)]; tensor layers_8_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125526592))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125521152))))[name = string("layers_8_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_2577_cast_fp16 = conv(dilations = var_2577_dilations_0, groups = var_2577_groups_0, pad = var_2577_pad_0, pad_type = var_2577_pad_type_0, strides = var_2577_strides_0, weight = layers_8_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_83_cast_fp16)[name = string("op_2577_cast_fp16")]; tensor obj_165_cast_fp16 = add(x = var_2571_cast_fp16, y = var_2577_cast_fp16)[name = string("obj_165_cast_fp16")]; tensor inputs_53_cast_fp16 = add(x = inputs_51_cast_fp16, y = obj_165_cast_fp16)[name = string("inputs_53_cast_fp16")]; tensor out_53_axes_0 = const()[name = string("out_53_axes_0"), val = tensor([1])]; fp16 var_2591_to_fp16 = const()[name = string("op_2591_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_53_cast_fp16 = layer_norm(axes = out_53_axes_0, epsilon = var_2591_to_fp16, x = inputs_53_cast_fp16)[name = string("out_53_cast_fp16")]; tensor input_85_gamma_0_to_fp16 = const()[name = string("input_85_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125600384)))]; tensor input_85_beta_0_to_fp16 = const()[name = string("input_85_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125601984)))]; fp16 input_85_epsilon_0_to_fp16 = const()[name = string("input_85_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_85_cast_fp16 = batch_norm(beta = input_85_beta_0_to_fp16, epsilon = input_85_epsilon_0_to_fp16, gamma = input_85_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_53_cast_fp16)[name = string("input_85_cast_fp16")]; string var_2609_pad_type_0 = const()[name = string("op_2609_pad_type_0"), val = string("valid")]; tensor var_2609_strides_0 = const()[name = string("op_2609_strides_0"), val = tensor([1, 1])]; tensor var_2609_pad_0 = const()[name = string("op_2609_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2609_dilations_0 = const()[name = string("op_2609_dilations_0"), val = tensor([1, 1])]; int32 var_2609_groups_0 = const()[name = string("op_2609_groups_0"), val = int32(1)]; tensor layers_8_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125603584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126783296))))[name = string("layers_8_fc1_inlier_module_weight_to_fp16_palettized")]; tensor layers_8_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_8_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126783424)))]; tensor var_2609_cast_fp16 = conv(bias = layers_8_fc1_inlier_module_bias_to_fp16, dilations = var_2609_dilations_0, groups = var_2609_groups_0, pad = var_2609_pad_0, pad_type = var_2609_pad_type_0, strides = var_2609_strides_0, weight = layers_8_fc1_inlier_module_weight_to_fp16_palettized, x = input_85_cast_fp16)[name = string("op_2609_cast_fp16")]; string var_2615_pad_type_0 = const()[name = string("op_2615_pad_type_0"), val = string("valid")]; tensor var_2615_strides_0 = const()[name = string("op_2615_strides_0"), val = tensor([1, 1])]; tensor var_2615_pad_0 = const()[name = string("op_2615_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2615_dilations_0 = const()[name = string("op_2615_dilations_0"), val = tensor([1, 1])]; int32 var_2615_groups_0 = const()[name = string("op_2615_groups_0"), val = int32(1)]; tensor layers_8_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126807552))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126789632))))[name = string("layers_8_fc1_outlier_module_weight_to_fp16_sparsified")]; tensor var_2615_cast_fp16 = conv(dilations = var_2615_dilations_0, groups = var_2615_groups_0, pad = var_2615_pad_0, pad_type = var_2615_pad_type_0, strides = var_2615_strides_0, weight = layers_8_fc1_outlier_module_weight_to_fp16_sparsified, x = input_85_cast_fp16)[name = string("op_2615_cast_fp16")]; tensor input_87_cast_fp16 = add(x = var_2609_cast_fp16, y = var_2615_cast_fp16)[name = string("input_87_cast_fp16")]; string input_89_mode_0 = const()[name = string("input_89_mode_0"), val = string("EXACT")]; tensor input_89_cast_fp16 = gelu(mode = input_89_mode_0, x = input_87_cast_fp16)[name = string("input_89_cast_fp16")]; string var_2626_pad_type_0 = const()[name = string("op_2626_pad_type_0"), val = string("valid")]; tensor var_2626_strides_0 = const()[name = string("op_2626_strides_0"), val = tensor([1, 1])]; tensor var_2626_pad_0 = const()[name = string("op_2626_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2626_dilations_0 = const()[name = string("op_2626_dilations_0"), val = tensor([1, 1])]; int32 var_2626_groups_0 = const()[name = string("op_2626_groups_0"), val = int32(1)]; tensor layers_8_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127102528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128282240))))[name = string("layers_8_fc2_inlier_module_weight_to_fp16_palettized")]; tensor layers_8_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_8_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128282368)))]; tensor var_2626_cast_fp16 = conv(bias = layers_8_fc2_inlier_module_bias_to_fp16, dilations = var_2626_dilations_0, groups = var_2626_groups_0, pad = var_2626_pad_0, pad_type = var_2626_pad_type_0, strides = var_2626_strides_0, weight = layers_8_fc2_inlier_module_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = string("op_2626_cast_fp16")]; string var_2632_pad_type_0 = const()[name = string("op_2632_pad_type_0"), val = string("valid")]; tensor var_2632_strides_0 = const()[name = string("op_2632_strides_0"), val = tensor([1, 1])]; tensor var_2632_pad_0 = const()[name = string("op_2632_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2632_dilations_0 = const()[name = string("op_2632_dilations_0"), val = tensor([1, 1])]; int32 var_2632_groups_0 = const()[name = string("op_2632_groups_0"), val = int32(1)]; tensor layers_8_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128308096))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128283968))))[name = string("layers_8_fc2_outlier_module_weight_to_fp16_sparsified")]; tensor var_2632_cast_fp16 = conv(dilations = var_2632_dilations_0, groups = var_2632_groups_0, pad = var_2632_pad_0, pad_type = var_2632_pad_type_0, strides = var_2632_strides_0, weight = layers_8_fc2_outlier_module_weight_to_fp16_sparsified, x = input_89_cast_fp16)[name = string("op_2632_cast_fp16")]; tensor hidden_states_19_cast_fp16 = add(x = var_2626_cast_fp16, y = var_2632_cast_fp16)[name = string("hidden_states_19_cast_fp16")]; tensor inputs_55_cast_fp16 = add(x = inputs_53_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("inputs_55_cast_fp16")]; tensor obj_179_begin_0 = const()[name = string("obj_179_begin_0"), val = tensor([9, 0, 0, 0])]; tensor obj_179_end_0 = const()[name = string("obj_179_end_0"), val = tensor([10, 768, 1, 1536])]; tensor obj_179_end_mask_0 = const()[name = string("obj_179_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_179_cast_fp16 = slice_by_index(begin = obj_179_begin_0, end = obj_179_end_0, end_mask = obj_179_end_mask_0, x = read_state_2)[name = string("obj_179_cast_fp16")]; tensor obj_181_begin_0 = const()[name = string("obj_181_begin_0"), val = tensor([9, 0, 0, 0])]; tensor obj_181_end_0 = const()[name = string("obj_181_end_0"), val = tensor([10, 768, 1, 1536])]; tensor obj_181_end_mask_0 = const()[name = string("obj_181_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_181_cast_fp16 = slice_by_index(begin = obj_181_begin_0, end = obj_181_end_0, end_mask = obj_181_end_mask_0, x = read_state_3)[name = string("obj_181_cast_fp16")]; int32 var_2655 = const()[name = string("op_2655"), val = int32(3)]; tensor out_55_axes_0 = const()[name = string("out_55_axes_0"), val = tensor([1])]; fp16 var_2680_to_fp16 = const()[name = string("op_2680_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_2680_to_fp16, x = inputs_55_cast_fp16)[name = string("out_55_cast_fp16")]; tensor obj_169_gamma_0_to_fp16 = const()[name = string("obj_169_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128603072)))]; tensor obj_169_beta_0_to_fp16 = const()[name = string("obj_169_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128604672)))]; fp16 obj_169_epsilon_0_to_fp16 = const()[name = string("obj_169_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_169_cast_fp16 = batch_norm(beta = obj_169_beta_0_to_fp16, epsilon = obj_169_epsilon_0_to_fp16, gamma = obj_169_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_55_cast_fp16)[name = string("obj_169_cast_fp16")]; string var_2702_pad_type_0 = const()[name = string("op_2702_pad_type_0"), val = string("valid")]; tensor var_2702_strides_0 = const()[name = string("op_2702_strides_0"), val = tensor([1, 1])]; tensor var_2702_pad_0 = const()[name = string("op_2702_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2702_dilations_0 = const()[name = string("op_2702_dilations_0"), val = tensor([1, 1])]; int32 var_2702_groups_0 = const()[name = string("op_2702_groups_0"), val = int32(1)]; tensor layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128606272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128901248))))[name = string("layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_9_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128901376)))]; tensor var_2702_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2702_dilations_0, groups = var_2702_groups_0, pad = var_2702_pad_0, pad_type = var_2702_pad_type_0, strides = var_2702_strides_0, weight = layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_169_cast_fp16)[name = string("op_2702_cast_fp16")]; string var_2708_pad_type_0 = const()[name = string("op_2708_pad_type_0"), val = string("valid")]; tensor var_2708_strides_0 = const()[name = string("op_2708_strides_0"), val = tensor([1, 1])]; tensor var_2708_pad_0 = const()[name = string("op_2708_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2708_dilations_0 = const()[name = string("op_2708_dilations_0"), val = tensor([1, 1])]; int32 var_2708_groups_0 = const()[name = string("op_2708_groups_0"), val = int32(1)]; tensor layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128908288))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128902976))))[name = string("layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_2708_cast_fp16 = conv(dilations = var_2708_dilations_0, groups = var_2708_groups_0, pad = var_2708_pad_0, pad_type = var_2708_pad_type_0, strides = var_2708_strides_0, weight = layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_169_cast_fp16)[name = string("op_2708_cast_fp16")]; tensor query_37_cast_fp16 = add(x = var_2702_cast_fp16, y = var_2708_cast_fp16)[name = string("query_37_cast_fp16")]; string var_2717_pad_type_0 = const()[name = string("op_2717_pad_type_0"), val = string("valid")]; tensor var_2717_strides_0 = const()[name = string("op_2717_strides_0"), val = tensor([1, 1])]; tensor var_2717_pad_0 = const()[name = string("op_2717_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2717_dilations_0 = const()[name = string("op_2717_dilations_0"), val = tensor([1, 1])]; int32 var_2717_groups_0 = const()[name = string("op_2717_groups_0"), val = int32(1)]; tensor layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128982080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129277056))))[name = string("layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; tensor var_2717_cast_fp16 = conv(dilations = var_2717_dilations_0, groups = var_2717_groups_0, pad = var_2717_pad_0, pad_type = var_2717_pad_type_0, strides = var_2717_strides_0, weight = layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_169_cast_fp16)[name = string("op_2717_cast_fp16")]; string var_2723_pad_type_0 = const()[name = string("op_2723_pad_type_0"), val = string("valid")]; tensor var_2723_strides_0 = const()[name = string("op_2723_strides_0"), val = tensor([1, 1])]; tensor var_2723_pad_0 = const()[name = string("op_2723_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2723_dilations_0 = const()[name = string("op_2723_dilations_0"), val = tensor([1, 1])]; int32 var_2723_groups_0 = const()[name = string("op_2723_groups_0"), val = int32(1)]; tensor layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129282624))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129277184))))[name = string("layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_2723_cast_fp16 = conv(dilations = var_2723_dilations_0, groups = var_2723_groups_0, pad = var_2723_pad_0, pad_type = var_2723_pad_type_0, strides = var_2723_strides_0, weight = layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_169_cast_fp16)[name = string("op_2723_cast_fp16")]; tensor current_key_19_cast_fp16 = add(x = var_2717_cast_fp16, y = var_2723_cast_fp16)[name = string("current_key_19_cast_fp16")]; string var_2733_pad_type_0 = const()[name = string("op_2733_pad_type_0"), val = string("valid")]; tensor var_2733_strides_0 = const()[name = string("op_2733_strides_0"), val = tensor([1, 1])]; tensor var_2733_pad_0 = const()[name = string("op_2733_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2733_dilations_0 = const()[name = string("op_2733_dilations_0"), val = tensor([1, 1])]; int32 var_2733_groups_0 = const()[name = string("op_2733_groups_0"), val = int32(1)]; tensor layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129356416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129651392))))[name = string("layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_9_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129651520)))]; tensor var_2733_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2733_dilations_0, groups = var_2733_groups_0, pad = var_2733_pad_0, pad_type = var_2733_pad_type_0, strides = var_2733_strides_0, weight = layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_169_cast_fp16)[name = string("op_2733_cast_fp16")]; string var_2739_pad_type_0 = const()[name = string("op_2739_pad_type_0"), val = string("valid")]; tensor var_2739_strides_0 = const()[name = string("op_2739_strides_0"), val = tensor([1, 1])]; tensor var_2739_pad_0 = const()[name = string("op_2739_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2739_dilations_0 = const()[name = string("op_2739_dilations_0"), val = tensor([1, 1])]; int32 var_2739_groups_0 = const()[name = string("op_2739_groups_0"), val = int32(1)]; tensor layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129659840))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129653120))))[name = string("layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_2739_cast_fp16 = conv(dilations = var_2739_dilations_0, groups = var_2739_groups_0, pad = var_2739_pad_0, pad_type = var_2739_pad_type_0, strides = var_2739_strides_0, weight = layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_169_cast_fp16)[name = string("op_2739_cast_fp16")]; tensor current_value_19_cast_fp16 = add(x = var_2733_cast_fp16, y = var_2739_cast_fp16)[name = string("current_value_19_cast_fp16")]; tensor var_2745_cast_fp16 = mul(x = current_key_19_cast_fp16, y = var_202_cast_fp16)[name = string("op_2745_cast_fp16")]; tensor key_19_cast_fp16 = add(x = var_71_cast_fp16_9, y = var_2745_cast_fp16)[name = string("key_19_cast_fp16")]; tensor var_2747_cast_fp16 = mul(x = current_value_19_cast_fp16, y = var_202_cast_fp16)[name = string("op_2747_cast_fp16")]; tensor value_19_cast_fp16 = add(x = var_86_cast_fp16_9, y = var_2747_cast_fp16)[name = string("value_19_cast_fp16")]; tensor var_2750 = const()[name = string("op_2750"), val = tensor([1, 12, 64, -1])]; tensor mh_q_37_cast_fp16 = reshape(shape = var_2750, x = query_37_cast_fp16)[name = string("mh_q_37_cast_fp16")]; fp16 var_2752_to_fp16 = const()[name = string("op_2752_to_fp16"), val = fp16(0x1p-3)]; tensor var_2753_cast_fp16 = mul(x = mh_q_37_cast_fp16, y = var_2752_to_fp16)[name = string("op_2753_cast_fp16")]; tensor var_2754 = const()[name = string("op_2754"), val = tensor([1, 12, 64, -1])]; tensor var_2755_cast_fp16 = reshape(shape = var_2754, x = key_19_cast_fp16)[name = string("op_2755_cast_fp16")]; bool mh_w_73_transpose_x_0 = const()[name = string("mh_w_73_transpose_x_0"), val = bool(true)]; bool mh_w_73_transpose_y_0 = const()[name = string("mh_w_73_transpose_y_0"), val = bool(false)]; tensor mh_w_73_cast_fp16 = matmul(transpose_x = mh_w_73_transpose_x_0, transpose_y = mh_w_73_transpose_y_0, x = var_2753_cast_fp16, y = var_2755_cast_fp16)[name = string("mh_w_73_cast_fp16")]; tensor mh_w_75_cast_fp16 = add(x = mh_w_73_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_75_cast_fp16")]; tensor var_2763_cast_fp16 = softmax(axis = var_2655, x = mh_w_75_cast_fp16)[name = string("op_2763_cast_fp16")]; tensor var_2764 = const()[name = string("op_2764"), val = tensor([1, 12, 64, -1])]; tensor var_2765_cast_fp16 = reshape(shape = var_2764, x = value_19_cast_fp16)[name = string("op_2765_cast_fp16")]; bool attn_37_transpose_x_0 = const()[name = string("attn_37_transpose_x_0"), val = bool(false)]; bool attn_37_transpose_y_0 = const()[name = string("attn_37_transpose_y_0"), val = bool(true)]; tensor attn_37_cast_fp16 = matmul(transpose_x = attn_37_transpose_x_0, transpose_y = attn_37_transpose_y_0, x = var_2765_cast_fp16, y = var_2763_cast_fp16)[name = string("attn_37_cast_fp16")]; tensor var_2768 = const()[name = string("op_2768"), val = tensor([1, 768, 1, -1])]; tensor input_91_cast_fp16 = reshape(shape = var_2768, x = attn_37_cast_fp16)[name = string("input_91_cast_fp16")]; string var_2778_pad_type_0 = const()[name = string("op_2778_pad_type_0"), val = string("valid")]; tensor var_2778_strides_0 = const()[name = string("op_2778_strides_0"), val = tensor([1, 1])]; tensor var_2778_pad_0 = const()[name = string("op_2778_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2778_dilations_0 = const()[name = string("op_2778_dilations_0"), val = tensor([1, 1])]; int32 var_2778_groups_0 = const()[name = string("op_2778_groups_0"), val = int32(1)]; tensor layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129733632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130028608))))[name = string("layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_9_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130028736)))]; tensor var_2778_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2778_dilations_0, groups = var_2778_groups_0, pad = var_2778_pad_0, pad_type = var_2778_pad_type_0, strides = var_2778_strides_0, weight = layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = string("op_2778_cast_fp16")]; string var_2784_pad_type_0 = const()[name = string("op_2784_pad_type_0"), val = string("valid")]; tensor var_2784_strides_0 = const()[name = string("op_2784_strides_0"), val = tensor([1, 1])]; tensor var_2784_pad_0 = const()[name = string("op_2784_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2784_dilations_0 = const()[name = string("op_2784_dilations_0"), val = tensor([1, 1])]; int32 var_2784_groups_0 = const()[name = string("op_2784_groups_0"), val = int32(1)]; tensor layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130036928))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130030336))))[name = string("layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_2784_cast_fp16 = conv(dilations = var_2784_dilations_0, groups = var_2784_groups_0, pad = var_2784_pad_0, pad_type = var_2784_pad_type_0, strides = var_2784_strides_0, weight = layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_91_cast_fp16)[name = string("op_2784_cast_fp16")]; tensor obj_175_cast_fp16 = add(x = var_2778_cast_fp16, y = var_2784_cast_fp16)[name = string("obj_175_cast_fp16")]; tensor inputs_57_cast_fp16 = add(x = inputs_55_cast_fp16, y = obj_175_cast_fp16)[name = string("inputs_57_cast_fp16")]; tensor out_57_axes_0 = const()[name = string("out_57_axes_0"), val = tensor([1])]; fp16 var_2799_to_fp16 = const()[name = string("op_2799_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_57_cast_fp16 = layer_norm(axes = out_57_axes_0, epsilon = var_2799_to_fp16, x = inputs_57_cast_fp16)[name = string("out_57_cast_fp16")]; tensor obj_177_gamma_0_to_fp16 = const()[name = string("obj_177_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130110720)))]; tensor obj_177_beta_0_to_fp16 = const()[name = string("obj_177_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130112320)))]; fp16 obj_177_epsilon_0_to_fp16 = const()[name = string("obj_177_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_177_cast_fp16 = batch_norm(beta = obj_177_beta_0_to_fp16, epsilon = obj_177_epsilon_0_to_fp16, gamma = obj_177_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_57_cast_fp16)[name = string("obj_177_cast_fp16")]; string var_2819_pad_type_0 = const()[name = string("op_2819_pad_type_0"), val = string("valid")]; tensor var_2819_strides_0 = const()[name = string("op_2819_strides_0"), val = tensor([1, 1])]; tensor var_2819_pad_0 = const()[name = string("op_2819_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2819_dilations_0 = const()[name = string("op_2819_dilations_0"), val = tensor([1, 1])]; int32 var_2819_groups_0 = const()[name = string("op_2819_groups_0"), val = int32(1)]; tensor layers_9_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130113920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130408896))))[name = string("layers_9_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_9_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130409024)))]; tensor var_2819_cast_fp16 = conv(bias = layers_9_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2819_dilations_0, groups = var_2819_groups_0, pad = var_2819_pad_0, pad_type = var_2819_pad_type_0, strides = var_2819_strides_0, weight = layers_9_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_177_cast_fp16)[name = string("op_2819_cast_fp16")]; string var_2825_pad_type_0 = const()[name = string("op_2825_pad_type_0"), val = string("valid")]; tensor var_2825_strides_0 = const()[name = string("op_2825_strides_0"), val = tensor([1, 1])]; tensor var_2825_pad_0 = const()[name = string("op_2825_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2825_dilations_0 = const()[name = string("op_2825_dilations_0"), val = tensor([1, 1])]; int32 var_2825_groups_0 = const()[name = string("op_2825_groups_0"), val = int32(1)]; tensor layers_9_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130415744))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130410624))))[name = string("layers_9_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_2825_cast_fp16 = conv(dilations = var_2825_dilations_0, groups = var_2825_groups_0, pad = var_2825_pad_0, pad_type = var_2825_pad_type_0, strides = var_2825_strides_0, weight = layers_9_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_177_cast_fp16)[name = string("op_2825_cast_fp16")]; tensor query_39_cast_fp16 = add(x = var_2819_cast_fp16, y = var_2825_cast_fp16)[name = string("query_39_cast_fp16")]; tensor var_2828 = const()[name = string("op_2828"), val = tensor([1, 12, 64, -1])]; tensor mh_q_39_cast_fp16 = reshape(shape = var_2828, x = query_39_cast_fp16)[name = string("mh_q_39_cast_fp16")]; fp16 var_2830_to_fp16 = const()[name = string("op_2830_to_fp16"), val = fp16(0x1p-3)]; tensor var_2831_cast_fp16 = mul(x = mh_q_39_cast_fp16, y = var_2830_to_fp16)[name = string("op_2831_cast_fp16")]; tensor var_2832 = const()[name = string("op_2832"), val = tensor([1, 12, 64, -1])]; tensor var_2833_cast_fp16 = reshape(shape = var_2832, x = obj_179_cast_fp16)[name = string("op_2833_cast_fp16")]; bool mh_w_77_transpose_x_0 = const()[name = string("mh_w_77_transpose_x_0"), val = bool(true)]; bool mh_w_77_transpose_y_0 = const()[name = string("mh_w_77_transpose_y_0"), val = bool(false)]; tensor mh_w_77_cast_fp16 = matmul(transpose_x = mh_w_77_transpose_x_0, transpose_y = mh_w_77_transpose_y_0, x = var_2831_cast_fp16, y = var_2833_cast_fp16)[name = string("mh_w_77_cast_fp16")]; tensor mh_w_79_cast_fp16 = add(x = mh_w_77_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_79_cast_fp16")]; tensor obj_185_cast_fp16 = softmax(axis = var_2655, x = mh_w_79_cast_fp16)[name = string("obj_185_cast_fp16")]; tensor var_2842 = const()[name = string("op_2842"), val = tensor([1, 12, 64, -1])]; tensor var_2843_cast_fp16 = reshape(shape = var_2842, x = obj_181_cast_fp16)[name = string("op_2843_cast_fp16")]; bool attn_39_transpose_x_0 = const()[name = string("attn_39_transpose_x_0"), val = bool(false)]; bool attn_39_transpose_y_0 = const()[name = string("attn_39_transpose_y_0"), val = bool(true)]; tensor attn_39_cast_fp16 = matmul(transpose_x = attn_39_transpose_x_0, transpose_y = attn_39_transpose_y_0, x = var_2843_cast_fp16, y = obj_185_cast_fp16)[name = string("attn_39_cast_fp16")]; tensor var_2846 = const()[name = string("op_2846"), val = tensor([1, 768, 1, -1])]; tensor input_93_cast_fp16 = reshape(shape = var_2846, x = attn_39_cast_fp16)[name = string("input_93_cast_fp16")]; string var_2856_pad_type_0 = const()[name = string("op_2856_pad_type_0"), val = string("valid")]; tensor var_2856_strides_0 = const()[name = string("op_2856_strides_0"), val = tensor([1, 1])]; tensor var_2856_pad_0 = const()[name = string("op_2856_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2856_dilations_0 = const()[name = string("op_2856_dilations_0"), val = tensor([1, 1])]; int32 var_2856_groups_0 = const()[name = string("op_2856_groups_0"), val = int32(1)]; tensor layers_9_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130489536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130784512))))[name = string("layers_9_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_9_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130784640)))]; tensor var_2856_cast_fp16 = conv(bias = layers_9_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2856_dilations_0, groups = var_2856_groups_0, pad = var_2856_pad_0, pad_type = var_2856_pad_type_0, strides = var_2856_strides_0, weight = layers_9_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_93_cast_fp16)[name = string("op_2856_cast_fp16")]; string var_2862_pad_type_0 = const()[name = string("op_2862_pad_type_0"), val = string("valid")]; tensor var_2862_strides_0 = const()[name = string("op_2862_strides_0"), val = tensor([1, 1])]; tensor var_2862_pad_0 = const()[name = string("op_2862_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2862_dilations_0 = const()[name = string("op_2862_dilations_0"), val = tensor([1, 1])]; int32 var_2862_groups_0 = const()[name = string("op_2862_groups_0"), val = int32(1)]; tensor layers_9_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130791680))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130786240))))[name = string("layers_9_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_2862_cast_fp16 = conv(dilations = var_2862_dilations_0, groups = var_2862_groups_0, pad = var_2862_pad_0, pad_type = var_2862_pad_type_0, strides = var_2862_strides_0, weight = layers_9_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_93_cast_fp16)[name = string("op_2862_cast_fp16")]; tensor obj_183_cast_fp16 = add(x = var_2856_cast_fp16, y = var_2862_cast_fp16)[name = string("obj_183_cast_fp16")]; tensor inputs_59_cast_fp16 = add(x = inputs_57_cast_fp16, y = obj_183_cast_fp16)[name = string("inputs_59_cast_fp16")]; tensor out_59_axes_0 = const()[name = string("out_59_axes_0"), val = tensor([1])]; fp16 var_2876_to_fp16 = const()[name = string("op_2876_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_59_cast_fp16 = layer_norm(axes = out_59_axes_0, epsilon = var_2876_to_fp16, x = inputs_59_cast_fp16)[name = string("out_59_cast_fp16")]; tensor input_95_gamma_0_to_fp16 = const()[name = string("input_95_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130865472)))]; tensor input_95_beta_0_to_fp16 = const()[name = string("input_95_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130867072)))]; fp16 input_95_epsilon_0_to_fp16 = const()[name = string("input_95_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_95_cast_fp16 = batch_norm(beta = input_95_beta_0_to_fp16, epsilon = input_95_epsilon_0_to_fp16, gamma = input_95_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_59_cast_fp16)[name = string("input_95_cast_fp16")]; string var_2894_pad_type_0 = const()[name = string("op_2894_pad_type_0"), val = string("valid")]; tensor var_2894_strides_0 = const()[name = string("op_2894_strides_0"), val = tensor([1, 1])]; tensor var_2894_pad_0 = const()[name = string("op_2894_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2894_dilations_0 = const()[name = string("op_2894_dilations_0"), val = tensor([1, 1])]; int32 var_2894_groups_0 = const()[name = string("op_2894_groups_0"), val = int32(1)]; tensor layers_9_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130868672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132048384))))[name = string("layers_9_fc1_inlier_module_weight_to_fp16_palettized")]; tensor layers_9_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_9_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132048512)))]; tensor var_2894_cast_fp16 = conv(bias = layers_9_fc1_inlier_module_bias_to_fp16, dilations = var_2894_dilations_0, groups = var_2894_groups_0, pad = var_2894_pad_0, pad_type = var_2894_pad_type_0, strides = var_2894_strides_0, weight = layers_9_fc1_inlier_module_weight_to_fp16_palettized, x = input_95_cast_fp16)[name = string("op_2894_cast_fp16")]; string var_2900_pad_type_0 = const()[name = string("op_2900_pad_type_0"), val = string("valid")]; tensor var_2900_strides_0 = const()[name = string("op_2900_strides_0"), val = tensor([1, 1])]; tensor var_2900_pad_0 = const()[name = string("op_2900_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2900_dilations_0 = const()[name = string("op_2900_dilations_0"), val = tensor([1, 1])]; int32 var_2900_groups_0 = const()[name = string("op_2900_groups_0"), val = int32(1)]; tensor layers_9_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132072448))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132054720))))[name = string("layers_9_fc1_outlier_module_weight_to_fp16_sparsified")]; tensor var_2900_cast_fp16 = conv(dilations = var_2900_dilations_0, groups = var_2900_groups_0, pad = var_2900_pad_0, pad_type = var_2900_pad_type_0, strides = var_2900_strides_0, weight = layers_9_fc1_outlier_module_weight_to_fp16_sparsified, x = input_95_cast_fp16)[name = string("op_2900_cast_fp16")]; tensor input_97_cast_fp16 = add(x = var_2894_cast_fp16, y = var_2900_cast_fp16)[name = string("input_97_cast_fp16")]; string input_99_mode_0 = const()[name = string("input_99_mode_0"), val = string("EXACT")]; tensor input_99_cast_fp16 = gelu(mode = input_99_mode_0, x = input_97_cast_fp16)[name = string("input_99_cast_fp16")]; string var_2911_pad_type_0 = const()[name = string("op_2911_pad_type_0"), val = string("valid")]; tensor var_2911_strides_0 = const()[name = string("op_2911_strides_0"), val = tensor([1, 1])]; tensor var_2911_pad_0 = const()[name = string("op_2911_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2911_dilations_0 = const()[name = string("op_2911_dilations_0"), val = tensor([1, 1])]; int32 var_2911_groups_0 = const()[name = string("op_2911_groups_0"), val = int32(1)]; tensor layers_9_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132367424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133547136))))[name = string("layers_9_fc2_inlier_module_weight_to_fp16_palettized")]; tensor layers_9_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_9_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133547264)))]; tensor var_2911_cast_fp16 = conv(bias = layers_9_fc2_inlier_module_bias_to_fp16, dilations = var_2911_dilations_0, groups = var_2911_groups_0, pad = var_2911_pad_0, pad_type = var_2911_pad_type_0, strides = var_2911_strides_0, weight = layers_9_fc2_inlier_module_weight_to_fp16_palettized, x = input_99_cast_fp16)[name = string("op_2911_cast_fp16")]; string var_2917_pad_type_0 = const()[name = string("op_2917_pad_type_0"), val = string("valid")]; tensor var_2917_strides_0 = const()[name = string("op_2917_strides_0"), val = tensor([1, 1])]; tensor var_2917_pad_0 = const()[name = string("op_2917_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2917_dilations_0 = const()[name = string("op_2917_dilations_0"), val = tensor([1, 1])]; int32 var_2917_groups_0 = const()[name = string("op_2917_groups_0"), val = int32(1)]; tensor layers_9_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133574720))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133548864))))[name = string("layers_9_fc2_outlier_module_weight_to_fp16_sparsified")]; tensor var_2917_cast_fp16 = conv(dilations = var_2917_dilations_0, groups = var_2917_groups_0, pad = var_2917_pad_0, pad_type = var_2917_pad_type_0, strides = var_2917_strides_0, weight = layers_9_fc2_outlier_module_weight_to_fp16_sparsified, x = input_99_cast_fp16)[name = string("op_2917_cast_fp16")]; tensor hidden_states_21_cast_fp16 = add(x = var_2911_cast_fp16, y = var_2917_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; tensor inputs_61_cast_fp16 = add(x = inputs_59_cast_fp16, y = hidden_states_21_cast_fp16)[name = string("inputs_61_cast_fp16")]; tensor obj_197_begin_0 = const()[name = string("obj_197_begin_0"), val = tensor([10, 0, 0, 0])]; tensor obj_197_end_0 = const()[name = string("obj_197_end_0"), val = tensor([11, 768, 1, 1536])]; tensor obj_197_end_mask_0 = const()[name = string("obj_197_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_197_cast_fp16 = slice_by_index(begin = obj_197_begin_0, end = obj_197_end_0, end_mask = obj_197_end_mask_0, x = read_state_2)[name = string("obj_197_cast_fp16")]; tensor obj_199_begin_0 = const()[name = string("obj_199_begin_0"), val = tensor([10, 0, 0, 0])]; tensor obj_199_end_0 = const()[name = string("obj_199_end_0"), val = tensor([11, 768, 1, 1536])]; tensor obj_199_end_mask_0 = const()[name = string("obj_199_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_199_cast_fp16 = slice_by_index(begin = obj_199_begin_0, end = obj_199_end_0, end_mask = obj_199_end_mask_0, x = read_state_3)[name = string("obj_199_cast_fp16")]; int32 var_2940 = const()[name = string("op_2940"), val = int32(3)]; tensor out_61_axes_0 = const()[name = string("out_61_axes_0"), val = tensor([1])]; fp16 var_2965_to_fp16 = const()[name = string("op_2965_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_2965_to_fp16, x = inputs_61_cast_fp16)[name = string("out_61_cast_fp16")]; tensor obj_187_gamma_0_to_fp16 = const()[name = string("obj_187_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133869696)))]; tensor obj_187_beta_0_to_fp16 = const()[name = string("obj_187_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133871296)))]; fp16 obj_187_epsilon_0_to_fp16 = const()[name = string("obj_187_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_187_cast_fp16 = batch_norm(beta = obj_187_beta_0_to_fp16, epsilon = obj_187_epsilon_0_to_fp16, gamma = obj_187_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_61_cast_fp16)[name = string("obj_187_cast_fp16")]; string var_2987_pad_type_0 = const()[name = string("op_2987_pad_type_0"), val = string("valid")]; tensor var_2987_strides_0 = const()[name = string("op_2987_strides_0"), val = tensor([1, 1])]; tensor var_2987_pad_0 = const()[name = string("op_2987_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2987_dilations_0 = const()[name = string("op_2987_dilations_0"), val = tensor([1, 1])]; int32 var_2987_groups_0 = const()[name = string("op_2987_groups_0"), val = int32(1)]; tensor layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133872896))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134167872))))[name = string("layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_10_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134168000)))]; tensor var_2987_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2987_dilations_0, groups = var_2987_groups_0, pad = var_2987_pad_0, pad_type = var_2987_pad_type_0, strides = var_2987_strides_0, weight = layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_187_cast_fp16)[name = string("op_2987_cast_fp16")]; string var_2993_pad_type_0 = const()[name = string("op_2993_pad_type_0"), val = string("valid")]; tensor var_2993_strides_0 = const()[name = string("op_2993_strides_0"), val = tensor([1, 1])]; tensor var_2993_pad_0 = const()[name = string("op_2993_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_2993_dilations_0 = const()[name = string("op_2993_dilations_0"), val = tensor([1, 1])]; int32 var_2993_groups_0 = const()[name = string("op_2993_groups_0"), val = int32(1)]; tensor layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134175168))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134169600))))[name = string("layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_2993_cast_fp16 = conv(dilations = var_2993_dilations_0, groups = var_2993_groups_0, pad = var_2993_pad_0, pad_type = var_2993_pad_type_0, strides = var_2993_strides_0, weight = layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_187_cast_fp16)[name = string("op_2993_cast_fp16")]; tensor query_41_cast_fp16 = add(x = var_2987_cast_fp16, y = var_2993_cast_fp16)[name = string("query_41_cast_fp16")]; string var_3002_pad_type_0 = const()[name = string("op_3002_pad_type_0"), val = string("valid")]; tensor var_3002_strides_0 = const()[name = string("op_3002_strides_0"), val = tensor([1, 1])]; tensor var_3002_pad_0 = const()[name = string("op_3002_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3002_dilations_0 = const()[name = string("op_3002_dilations_0"), val = tensor([1, 1])]; int32 var_3002_groups_0 = const()[name = string("op_3002_groups_0"), val = int32(1)]; tensor layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134248960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134543936))))[name = string("layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; tensor var_3002_cast_fp16 = conv(dilations = var_3002_dilations_0, groups = var_3002_groups_0, pad = var_3002_pad_0, pad_type = var_3002_pad_type_0, strides = var_3002_strides_0, weight = layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_187_cast_fp16)[name = string("op_3002_cast_fp16")]; string var_3008_pad_type_0 = const()[name = string("op_3008_pad_type_0"), val = string("valid")]; tensor var_3008_strides_0 = const()[name = string("op_3008_strides_0"), val = tensor([1, 1])]; tensor var_3008_pad_0 = const()[name = string("op_3008_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3008_dilations_0 = const()[name = string("op_3008_dilations_0"), val = tensor([1, 1])]; int32 var_3008_groups_0 = const()[name = string("op_3008_groups_0"), val = int32(1)]; tensor layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134549888))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134544064))))[name = string("layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_3008_cast_fp16 = conv(dilations = var_3008_dilations_0, groups = var_3008_groups_0, pad = var_3008_pad_0, pad_type = var_3008_pad_type_0, strides = var_3008_strides_0, weight = layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_187_cast_fp16)[name = string("op_3008_cast_fp16")]; tensor current_key_21_cast_fp16 = add(x = var_3002_cast_fp16, y = var_3008_cast_fp16)[name = string("current_key_21_cast_fp16")]; string var_3018_pad_type_0 = const()[name = string("op_3018_pad_type_0"), val = string("valid")]; tensor var_3018_strides_0 = const()[name = string("op_3018_strides_0"), val = tensor([1, 1])]; tensor var_3018_pad_0 = const()[name = string("op_3018_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3018_dilations_0 = const()[name = string("op_3018_dilations_0"), val = tensor([1, 1])]; int32 var_3018_groups_0 = const()[name = string("op_3018_groups_0"), val = int32(1)]; tensor layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134623680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134918656))))[name = string("layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_10_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134918784)))]; tensor var_3018_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3018_dilations_0, groups = var_3018_groups_0, pad = var_3018_pad_0, pad_type = var_3018_pad_type_0, strides = var_3018_strides_0, weight = layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_187_cast_fp16)[name = string("op_3018_cast_fp16")]; string var_3024_pad_type_0 = const()[name = string("op_3024_pad_type_0"), val = string("valid")]; tensor var_3024_strides_0 = const()[name = string("op_3024_strides_0"), val = tensor([1, 1])]; tensor var_3024_pad_0 = const()[name = string("op_3024_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3024_dilations_0 = const()[name = string("op_3024_dilations_0"), val = tensor([1, 1])]; int32 var_3024_groups_0 = const()[name = string("op_3024_groups_0"), val = int32(1)]; tensor layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134928448))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134920384))))[name = string("layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_3024_cast_fp16 = conv(dilations = var_3024_dilations_0, groups = var_3024_groups_0, pad = var_3024_pad_0, pad_type = var_3024_pad_type_0, strides = var_3024_strides_0, weight = layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_187_cast_fp16)[name = string("op_3024_cast_fp16")]; tensor current_value_21_cast_fp16 = add(x = var_3018_cast_fp16, y = var_3024_cast_fp16)[name = string("current_value_21_cast_fp16")]; tensor var_3030_cast_fp16 = mul(x = current_key_21_cast_fp16, y = var_202_cast_fp16)[name = string("op_3030_cast_fp16")]; tensor key_21_cast_fp16 = add(x = var_71_cast_fp16_10, y = var_3030_cast_fp16)[name = string("key_21_cast_fp16")]; tensor var_3032_cast_fp16 = mul(x = current_value_21_cast_fp16, y = var_202_cast_fp16)[name = string("op_3032_cast_fp16")]; tensor value_21_cast_fp16 = add(x = var_86_cast_fp16_10, y = var_3032_cast_fp16)[name = string("value_21_cast_fp16")]; tensor var_3035 = const()[name = string("op_3035"), val = tensor([1, 12, 64, -1])]; tensor mh_q_41_cast_fp16 = reshape(shape = var_3035, x = query_41_cast_fp16)[name = string("mh_q_41_cast_fp16")]; fp16 var_3037_to_fp16 = const()[name = string("op_3037_to_fp16"), val = fp16(0x1p-3)]; tensor var_3038_cast_fp16 = mul(x = mh_q_41_cast_fp16, y = var_3037_to_fp16)[name = string("op_3038_cast_fp16")]; tensor var_3039 = const()[name = string("op_3039"), val = tensor([1, 12, 64, -1])]; tensor var_3040_cast_fp16 = reshape(shape = var_3039, x = key_21_cast_fp16)[name = string("op_3040_cast_fp16")]; bool mh_w_81_transpose_x_0 = const()[name = string("mh_w_81_transpose_x_0"), val = bool(true)]; bool mh_w_81_transpose_y_0 = const()[name = string("mh_w_81_transpose_y_0"), val = bool(false)]; tensor mh_w_81_cast_fp16 = matmul(transpose_x = mh_w_81_transpose_x_0, transpose_y = mh_w_81_transpose_y_0, x = var_3038_cast_fp16, y = var_3040_cast_fp16)[name = string("mh_w_81_cast_fp16")]; tensor mh_w_83_cast_fp16 = add(x = mh_w_81_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_83_cast_fp16")]; tensor var_3048_cast_fp16 = softmax(axis = var_2940, x = mh_w_83_cast_fp16)[name = string("op_3048_cast_fp16")]; tensor var_3049 = const()[name = string("op_3049"), val = tensor([1, 12, 64, -1])]; tensor var_3050_cast_fp16 = reshape(shape = var_3049, x = value_21_cast_fp16)[name = string("op_3050_cast_fp16")]; bool attn_41_transpose_x_0 = const()[name = string("attn_41_transpose_x_0"), val = bool(false)]; bool attn_41_transpose_y_0 = const()[name = string("attn_41_transpose_y_0"), val = bool(true)]; tensor attn_41_cast_fp16 = matmul(transpose_x = attn_41_transpose_x_0, transpose_y = attn_41_transpose_y_0, x = var_3050_cast_fp16, y = var_3048_cast_fp16)[name = string("attn_41_cast_fp16")]; tensor var_3053 = const()[name = string("op_3053"), val = tensor([1, 768, 1, -1])]; tensor input_101_cast_fp16 = reshape(shape = var_3053, x = attn_41_cast_fp16)[name = string("input_101_cast_fp16")]; string var_3063_pad_type_0 = const()[name = string("op_3063_pad_type_0"), val = string("valid")]; tensor var_3063_strides_0 = const()[name = string("op_3063_strides_0"), val = tensor([1, 1])]; tensor var_3063_pad_0 = const()[name = string("op_3063_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3063_dilations_0 = const()[name = string("op_3063_dilations_0"), val = tensor([1, 1])]; int32 var_3063_groups_0 = const()[name = string("op_3063_groups_0"), val = int32(1)]; tensor layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135002240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135297216))))[name = string("layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_10_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135297344)))]; tensor var_3063_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3063_dilations_0, groups = var_3063_groups_0, pad = var_3063_pad_0, pad_type = var_3063_pad_type_0, strides = var_3063_strides_0, weight = layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_101_cast_fp16)[name = string("op_3063_cast_fp16")]; string var_3069_pad_type_0 = const()[name = string("op_3069_pad_type_0"), val = string("valid")]; tensor var_3069_strides_0 = const()[name = string("op_3069_strides_0"), val = tensor([1, 1])]; tensor var_3069_pad_0 = const()[name = string("op_3069_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3069_dilations_0 = const()[name = string("op_3069_dilations_0"), val = tensor([1, 1])]; int32 var_3069_groups_0 = const()[name = string("op_3069_groups_0"), val = int32(1)]; tensor layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135306944))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135298944))))[name = string("layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_3069_cast_fp16 = conv(dilations = var_3069_dilations_0, groups = var_3069_groups_0, pad = var_3069_pad_0, pad_type = var_3069_pad_type_0, strides = var_3069_strides_0, weight = layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_101_cast_fp16)[name = string("op_3069_cast_fp16")]; tensor obj_193_cast_fp16 = add(x = var_3063_cast_fp16, y = var_3069_cast_fp16)[name = string("obj_193_cast_fp16")]; tensor inputs_63_cast_fp16 = add(x = inputs_61_cast_fp16, y = obj_193_cast_fp16)[name = string("inputs_63_cast_fp16")]; tensor out_63_axes_0 = const()[name = string("out_63_axes_0"), val = tensor([1])]; fp16 var_3084_to_fp16 = const()[name = string("op_3084_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_63_cast_fp16 = layer_norm(axes = out_63_axes_0, epsilon = var_3084_to_fp16, x = inputs_63_cast_fp16)[name = string("out_63_cast_fp16")]; tensor obj_195_gamma_0_to_fp16 = const()[name = string("obj_195_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135380736)))]; tensor obj_195_beta_0_to_fp16 = const()[name = string("obj_195_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135382336)))]; fp16 obj_195_epsilon_0_to_fp16 = const()[name = string("obj_195_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_195_cast_fp16 = batch_norm(beta = obj_195_beta_0_to_fp16, epsilon = obj_195_epsilon_0_to_fp16, gamma = obj_195_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_63_cast_fp16)[name = string("obj_195_cast_fp16")]; string var_3104_pad_type_0 = const()[name = string("op_3104_pad_type_0"), val = string("valid")]; tensor var_3104_strides_0 = const()[name = string("op_3104_strides_0"), val = tensor([1, 1])]; tensor var_3104_pad_0 = const()[name = string("op_3104_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3104_dilations_0 = const()[name = string("op_3104_dilations_0"), val = tensor([1, 1])]; int32 var_3104_groups_0 = const()[name = string("op_3104_groups_0"), val = int32(1)]; tensor layers_10_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135383936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135678912))))[name = string("layers_10_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_10_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135679040)))]; tensor var_3104_cast_fp16 = conv(bias = layers_10_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3104_dilations_0, groups = var_3104_groups_0, pad = var_3104_pad_0, pad_type = var_3104_pad_type_0, strides = var_3104_strides_0, weight = layers_10_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_195_cast_fp16)[name = string("op_3104_cast_fp16")]; string var_3110_pad_type_0 = const()[name = string("op_3110_pad_type_0"), val = string("valid")]; tensor var_3110_strides_0 = const()[name = string("op_3110_strides_0"), val = tensor([1, 1])]; tensor var_3110_pad_0 = const()[name = string("op_3110_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3110_dilations_0 = const()[name = string("op_3110_dilations_0"), val = tensor([1, 1])]; int32 var_3110_groups_0 = const()[name = string("op_3110_groups_0"), val = int32(1)]; tensor layers_10_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135686464))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135680640))))[name = string("layers_10_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_3110_cast_fp16 = conv(dilations = var_3110_dilations_0, groups = var_3110_groups_0, pad = var_3110_pad_0, pad_type = var_3110_pad_type_0, strides = var_3110_strides_0, weight = layers_10_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_195_cast_fp16)[name = string("op_3110_cast_fp16")]; tensor query_43_cast_fp16 = add(x = var_3104_cast_fp16, y = var_3110_cast_fp16)[name = string("query_43_cast_fp16")]; tensor var_3113 = const()[name = string("op_3113"), val = tensor([1, 12, 64, -1])]; tensor mh_q_43_cast_fp16 = reshape(shape = var_3113, x = query_43_cast_fp16)[name = string("mh_q_43_cast_fp16")]; fp16 var_3115_to_fp16 = const()[name = string("op_3115_to_fp16"), val = fp16(0x1p-3)]; tensor var_3116_cast_fp16 = mul(x = mh_q_43_cast_fp16, y = var_3115_to_fp16)[name = string("op_3116_cast_fp16")]; tensor var_3117 = const()[name = string("op_3117"), val = tensor([1, 12, 64, -1])]; tensor var_3118_cast_fp16 = reshape(shape = var_3117, x = obj_197_cast_fp16)[name = string("op_3118_cast_fp16")]; bool mh_w_85_transpose_x_0 = const()[name = string("mh_w_85_transpose_x_0"), val = bool(true)]; bool mh_w_85_transpose_y_0 = const()[name = string("mh_w_85_transpose_y_0"), val = bool(false)]; tensor mh_w_85_cast_fp16 = matmul(transpose_x = mh_w_85_transpose_x_0, transpose_y = mh_w_85_transpose_y_0, x = var_3116_cast_fp16, y = var_3118_cast_fp16)[name = string("mh_w_85_cast_fp16")]; tensor mh_w_87_cast_fp16 = add(x = mh_w_85_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_87_cast_fp16")]; tensor obj_203_cast_fp16 = softmax(axis = var_2940, x = mh_w_87_cast_fp16)[name = string("obj_203_cast_fp16")]; tensor var_3127 = const()[name = string("op_3127"), val = tensor([1, 12, 64, -1])]; tensor var_3128_cast_fp16 = reshape(shape = var_3127, x = obj_199_cast_fp16)[name = string("op_3128_cast_fp16")]; bool attn_43_transpose_x_0 = const()[name = string("attn_43_transpose_x_0"), val = bool(false)]; bool attn_43_transpose_y_0 = const()[name = string("attn_43_transpose_y_0"), val = bool(true)]; tensor attn_43_cast_fp16 = matmul(transpose_x = attn_43_transpose_x_0, transpose_y = attn_43_transpose_y_0, x = var_3128_cast_fp16, y = obj_203_cast_fp16)[name = string("attn_43_cast_fp16")]; tensor var_3131 = const()[name = string("op_3131"), val = tensor([1, 768, 1, -1])]; tensor input_103_cast_fp16 = reshape(shape = var_3131, x = attn_43_cast_fp16)[name = string("input_103_cast_fp16")]; string var_3141_pad_type_0 = const()[name = string("op_3141_pad_type_0"), val = string("valid")]; tensor var_3141_strides_0 = const()[name = string("op_3141_strides_0"), val = tensor([1, 1])]; tensor var_3141_pad_0 = const()[name = string("op_3141_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3141_dilations_0 = const()[name = string("op_3141_dilations_0"), val = tensor([1, 1])]; int32 var_3141_groups_0 = const()[name = string("op_3141_groups_0"), val = int32(1)]; tensor layers_10_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135760256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136055232))))[name = string("layers_10_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_10_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136055360)))]; tensor var_3141_cast_fp16 = conv(bias = layers_10_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3141_dilations_0, groups = var_3141_groups_0, pad = var_3141_pad_0, pad_type = var_3141_pad_type_0, strides = var_3141_strides_0, weight = layers_10_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_103_cast_fp16)[name = string("op_3141_cast_fp16")]; string var_3147_pad_type_0 = const()[name = string("op_3147_pad_type_0"), val = string("valid")]; tensor var_3147_strides_0 = const()[name = string("op_3147_strides_0"), val = tensor([1, 1])]; tensor var_3147_pad_0 = const()[name = string("op_3147_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3147_dilations_0 = const()[name = string("op_3147_dilations_0"), val = tensor([1, 1])]; int32 var_3147_groups_0 = const()[name = string("op_3147_groups_0"), val = int32(1)]; tensor layers_10_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136066368))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136056960))))[name = string("layers_10_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_3147_cast_fp16 = conv(dilations = var_3147_dilations_0, groups = var_3147_groups_0, pad = var_3147_pad_0, pad_type = var_3147_pad_type_0, strides = var_3147_strides_0, weight = layers_10_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_103_cast_fp16)[name = string("op_3147_cast_fp16")]; tensor obj_201_cast_fp16 = add(x = var_3141_cast_fp16, y = var_3147_cast_fp16)[name = string("obj_201_cast_fp16")]; tensor inputs_65_cast_fp16 = add(x = inputs_63_cast_fp16, y = obj_201_cast_fp16)[name = string("inputs_65_cast_fp16")]; tensor out_65_axes_0 = const()[name = string("out_65_axes_0"), val = tensor([1])]; fp16 var_3161_to_fp16 = const()[name = string("op_3161_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_65_cast_fp16 = layer_norm(axes = out_65_axes_0, epsilon = var_3161_to_fp16, x = inputs_65_cast_fp16)[name = string("out_65_cast_fp16")]; tensor input_105_gamma_0_to_fp16 = const()[name = string("input_105_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136140160)))]; tensor input_105_beta_0_to_fp16 = const()[name = string("input_105_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136141760)))]; fp16 input_105_epsilon_0_to_fp16 = const()[name = string("input_105_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_105_cast_fp16 = batch_norm(beta = input_105_beta_0_to_fp16, epsilon = input_105_epsilon_0_to_fp16, gamma = input_105_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_65_cast_fp16)[name = string("input_105_cast_fp16")]; string var_3179_pad_type_0 = const()[name = string("op_3179_pad_type_0"), val = string("valid")]; tensor var_3179_strides_0 = const()[name = string("op_3179_strides_0"), val = tensor([1, 1])]; tensor var_3179_pad_0 = const()[name = string("op_3179_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3179_dilations_0 = const()[name = string("op_3179_dilations_0"), val = tensor([1, 1])]; int32 var_3179_groups_0 = const()[name = string("op_3179_groups_0"), val = int32(1)]; tensor layers_10_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136143360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137323072))))[name = string("layers_10_fc1_inlier_module_weight_to_fp16_palettized")]; tensor layers_10_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_10_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137323200)))]; tensor var_3179_cast_fp16 = conv(bias = layers_10_fc1_inlier_module_bias_to_fp16, dilations = var_3179_dilations_0, groups = var_3179_groups_0, pad = var_3179_pad_0, pad_type = var_3179_pad_type_0, strides = var_3179_strides_0, weight = layers_10_fc1_inlier_module_weight_to_fp16_palettized, x = input_105_cast_fp16)[name = string("op_3179_cast_fp16")]; string var_3185_pad_type_0 = const()[name = string("op_3185_pad_type_0"), val = string("valid")]; tensor var_3185_strides_0 = const()[name = string("op_3185_strides_0"), val = tensor([1, 1])]; tensor var_3185_pad_0 = const()[name = string("op_3185_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3185_dilations_0 = const()[name = string("op_3185_dilations_0"), val = tensor([1, 1])]; int32 var_3185_groups_0 = const()[name = string("op_3185_groups_0"), val = int32(1)]; tensor layers_10_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137350912))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137329408))))[name = string("layers_10_fc1_outlier_module_weight_to_fp16_sparsified")]; tensor var_3185_cast_fp16 = conv(dilations = var_3185_dilations_0, groups = var_3185_groups_0, pad = var_3185_pad_0, pad_type = var_3185_pad_type_0, strides = var_3185_strides_0, weight = layers_10_fc1_outlier_module_weight_to_fp16_sparsified, x = input_105_cast_fp16)[name = string("op_3185_cast_fp16")]; tensor input_107_cast_fp16 = add(x = var_3179_cast_fp16, y = var_3185_cast_fp16)[name = string("input_107_cast_fp16")]; string input_109_mode_0 = const()[name = string("input_109_mode_0"), val = string("EXACT")]; tensor input_109_cast_fp16 = gelu(mode = input_109_mode_0, x = input_107_cast_fp16)[name = string("input_109_cast_fp16")]; string var_3196_pad_type_0 = const()[name = string("op_3196_pad_type_0"), val = string("valid")]; tensor var_3196_strides_0 = const()[name = string("op_3196_strides_0"), val = tensor([1, 1])]; tensor var_3196_pad_0 = const()[name = string("op_3196_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3196_dilations_0 = const()[name = string("op_3196_dilations_0"), val = tensor([1, 1])]; int32 var_3196_groups_0 = const()[name = string("op_3196_groups_0"), val = int32(1)]; tensor layers_10_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137645888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138825600))))[name = string("layers_10_fc2_inlier_module_weight_to_fp16_palettized")]; tensor layers_10_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_10_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138825728)))]; tensor var_3196_cast_fp16 = conv(bias = layers_10_fc2_inlier_module_bias_to_fp16, dilations = var_3196_dilations_0, groups = var_3196_groups_0, pad = var_3196_pad_0, pad_type = var_3196_pad_type_0, strides = var_3196_strides_0, weight = layers_10_fc2_inlier_module_weight_to_fp16_palettized, x = input_109_cast_fp16)[name = string("op_3196_cast_fp16")]; string var_3202_pad_type_0 = const()[name = string("op_3202_pad_type_0"), val = string("valid")]; tensor var_3202_strides_0 = const()[name = string("op_3202_strides_0"), val = tensor([1, 1])]; tensor var_3202_pad_0 = const()[name = string("op_3202_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3202_dilations_0 = const()[name = string("op_3202_dilations_0"), val = tensor([1, 1])]; int32 var_3202_groups_0 = const()[name = string("op_3202_groups_0"), val = int32(1)]; tensor layers_10_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138857344))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138827328))))[name = string("layers_10_fc2_outlier_module_weight_to_fp16_sparsified")]; tensor var_3202_cast_fp16 = conv(dilations = var_3202_dilations_0, groups = var_3202_groups_0, pad = var_3202_pad_0, pad_type = var_3202_pad_type_0, strides = var_3202_strides_0, weight = layers_10_fc2_outlier_module_weight_to_fp16_sparsified, x = input_109_cast_fp16)[name = string("op_3202_cast_fp16")]; tensor hidden_states_23_cast_fp16 = add(x = var_3196_cast_fp16, y = var_3202_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; tensor inputs_67_cast_fp16 = add(x = inputs_65_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("inputs_67_cast_fp16")]; tensor obj_215_begin_0 = const()[name = string("obj_215_begin_0"), val = tensor([11, 0, 0, 0])]; tensor obj_215_end_0 = const()[name = string("obj_215_end_0"), val = tensor([12, 768, 1, 1536])]; tensor obj_215_end_mask_0 = const()[name = string("obj_215_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_215_cast_fp16 = slice_by_index(begin = obj_215_begin_0, end = obj_215_end_0, end_mask = obj_215_end_mask_0, x = read_state_2)[name = string("obj_215_cast_fp16")]; tensor obj_217_begin_0 = const()[name = string("obj_217_begin_0"), val = tensor([11, 0, 0, 0])]; tensor obj_217_end_0 = const()[name = string("obj_217_end_0"), val = tensor([12, 768, 1, 1536])]; tensor obj_217_end_mask_0 = const()[name = string("obj_217_end_mask_0"), val = tensor([false, true, true, true])]; tensor obj_217_cast_fp16 = slice_by_index(begin = obj_217_begin_0, end = obj_217_end_0, end_mask = obj_217_end_mask_0, x = read_state_3)[name = string("obj_217_cast_fp16")]; int32 var_3225 = const()[name = string("op_3225"), val = int32(3)]; tensor out_67_axes_0 = const()[name = string("out_67_axes_0"), val = tensor([1])]; fp16 var_3250_to_fp16 = const()[name = string("op_3250_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_3250_to_fp16, x = inputs_67_cast_fp16)[name = string("out_67_cast_fp16")]; tensor obj_205_gamma_0_to_fp16 = const()[name = string("obj_205_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139152320)))]; tensor obj_205_beta_0_to_fp16 = const()[name = string("obj_205_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139153920)))]; fp16 obj_205_epsilon_0_to_fp16 = const()[name = string("obj_205_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_205_cast_fp16 = batch_norm(beta = obj_205_beta_0_to_fp16, epsilon = obj_205_epsilon_0_to_fp16, gamma = obj_205_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_67_cast_fp16)[name = string("obj_205_cast_fp16")]; string var_3272_pad_type_0 = const()[name = string("op_3272_pad_type_0"), val = string("valid")]; tensor var_3272_strides_0 = const()[name = string("op_3272_strides_0"), val = tensor([1, 1])]; tensor var_3272_pad_0 = const()[name = string("op_3272_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3272_dilations_0 = const()[name = string("op_3272_dilations_0"), val = tensor([1, 1])]; int32 var_3272_groups_0 = const()[name = string("op_3272_groups_0"), val = int32(1)]; tensor layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139155520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139450496))))[name = string("layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_11_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139450624)))]; tensor var_3272_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3272_dilations_0, groups = var_3272_groups_0, pad = var_3272_pad_0, pad_type = var_3272_pad_type_0, strides = var_3272_strides_0, weight = layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_205_cast_fp16)[name = string("op_3272_cast_fp16")]; string var_3278_pad_type_0 = const()[name = string("op_3278_pad_type_0"), val = string("valid")]; tensor var_3278_strides_0 = const()[name = string("op_3278_strides_0"), val = tensor([1, 1])]; tensor var_3278_pad_0 = const()[name = string("op_3278_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3278_dilations_0 = const()[name = string("op_3278_dilations_0"), val = tensor([1, 1])]; int32 var_3278_groups_0 = const()[name = string("op_3278_groups_0"), val = int32(1)]; tensor layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139457408))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139452224))))[name = string("layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_3278_cast_fp16 = conv(dilations = var_3278_dilations_0, groups = var_3278_groups_0, pad = var_3278_pad_0, pad_type = var_3278_pad_type_0, strides = var_3278_strides_0, weight = layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_205_cast_fp16)[name = string("op_3278_cast_fp16")]; tensor query_45_cast_fp16 = add(x = var_3272_cast_fp16, y = var_3278_cast_fp16)[name = string("query_45_cast_fp16")]; string var_3287_pad_type_0 = const()[name = string("op_3287_pad_type_0"), val = string("valid")]; tensor var_3287_strides_0 = const()[name = string("op_3287_strides_0"), val = tensor([1, 1])]; tensor var_3287_pad_0 = const()[name = string("op_3287_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3287_dilations_0 = const()[name = string("op_3287_dilations_0"), val = tensor([1, 1])]; int32 var_3287_groups_0 = const()[name = string("op_3287_groups_0"), val = int32(1)]; tensor layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139531200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139826176))))[name = string("layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")]; tensor var_3287_cast_fp16 = conv(dilations = var_3287_dilations_0, groups = var_3287_groups_0, pad = var_3287_pad_0, pad_type = var_3287_pad_type_0, strides = var_3287_strides_0, weight = layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_205_cast_fp16)[name = string("op_3287_cast_fp16")]; string var_3293_pad_type_0 = const()[name = string("op_3293_pad_type_0"), val = string("valid")]; tensor var_3293_strides_0 = const()[name = string("op_3293_strides_0"), val = tensor([1, 1])]; tensor var_3293_pad_0 = const()[name = string("op_3293_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3293_dilations_0 = const()[name = string("op_3293_dilations_0"), val = tensor([1, 1])]; int32 var_3293_groups_0 = const()[name = string("op_3293_groups_0"), val = int32(1)]; tensor layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139831488))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139826304))))[name = string("layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_3293_cast_fp16 = conv(dilations = var_3293_dilations_0, groups = var_3293_groups_0, pad = var_3293_pad_0, pad_type = var_3293_pad_type_0, strides = var_3293_strides_0, weight = layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_205_cast_fp16)[name = string("op_3293_cast_fp16")]; tensor current_key_cast_fp16 = add(x = var_3287_cast_fp16, y = var_3293_cast_fp16)[name = string("current_key_cast_fp16")]; string var_3303_pad_type_0 = const()[name = string("op_3303_pad_type_0"), val = string("valid")]; tensor var_3303_strides_0 = const()[name = string("op_3303_strides_0"), val = tensor([1, 1])]; tensor var_3303_pad_0 = const()[name = string("op_3303_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3303_dilations_0 = const()[name = string("op_3303_dilations_0"), val = tensor([1, 1])]; int32 var_3303_groups_0 = const()[name = string("op_3303_groups_0"), val = int32(1)]; tensor layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139905280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140200256))))[name = string("layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_11_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140200384)))]; tensor var_3303_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3303_dilations_0, groups = var_3303_groups_0, pad = var_3303_pad_0, pad_type = var_3303_pad_type_0, strides = var_3303_strides_0, weight = layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_205_cast_fp16)[name = string("op_3303_cast_fp16")]; string var_3309_pad_type_0 = const()[name = string("op_3309_pad_type_0"), val = string("valid")]; tensor var_3309_strides_0 = const()[name = string("op_3309_strides_0"), val = tensor([1, 1])]; tensor var_3309_pad_0 = const()[name = string("op_3309_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3309_dilations_0 = const()[name = string("op_3309_dilations_0"), val = tensor([1, 1])]; int32 var_3309_groups_0 = const()[name = string("op_3309_groups_0"), val = int32(1)]; tensor layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140207744))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140201984))))[name = string("layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_3309_cast_fp16 = conv(dilations = var_3309_dilations_0, groups = var_3309_groups_0, pad = var_3309_pad_0, pad_type = var_3309_pad_type_0, strides = var_3309_strides_0, weight = layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_205_cast_fp16)[name = string("op_3309_cast_fp16")]; tensor current_value_cast_fp16 = add(x = var_3303_cast_fp16, y = var_3309_cast_fp16)[name = string("current_value_cast_fp16")]; tensor var_3315_cast_fp16 = mul(x = current_key_cast_fp16, y = var_202_cast_fp16)[name = string("op_3315_cast_fp16")]; tensor key_cast_fp16 = add(x = var_71_cast_fp16_11, y = var_3315_cast_fp16)[name = string("key_cast_fp16")]; tensor var_3317_cast_fp16 = mul(x = current_value_cast_fp16, y = var_202_cast_fp16)[name = string("op_3317_cast_fp16")]; tensor value_cast_fp16 = add(x = var_86_cast_fp16_11, y = var_3317_cast_fp16)[name = string("value_cast_fp16")]; tensor var_3320 = const()[name = string("op_3320"), val = tensor([1, 12, 64, -1])]; tensor mh_q_45_cast_fp16 = reshape(shape = var_3320, x = query_45_cast_fp16)[name = string("mh_q_45_cast_fp16")]; fp16 var_3322_to_fp16 = const()[name = string("op_3322_to_fp16"), val = fp16(0x1p-3)]; tensor var_3323_cast_fp16 = mul(x = mh_q_45_cast_fp16, y = var_3322_to_fp16)[name = string("op_3323_cast_fp16")]; tensor var_3324 = const()[name = string("op_3324"), val = tensor([1, 12, 64, -1])]; tensor var_3325_cast_fp16 = reshape(shape = var_3324, x = key_cast_fp16)[name = string("op_3325_cast_fp16")]; bool mh_w_89_transpose_x_0 = const()[name = string("mh_w_89_transpose_x_0"), val = bool(true)]; bool mh_w_89_transpose_y_0 = const()[name = string("mh_w_89_transpose_y_0"), val = bool(false)]; tensor mh_w_89_cast_fp16 = matmul(transpose_x = mh_w_89_transpose_x_0, transpose_y = mh_w_89_transpose_y_0, x = var_3323_cast_fp16, y = var_3325_cast_fp16)[name = string("mh_w_89_cast_fp16")]; tensor mh_w_91_cast_fp16 = add(x = mh_w_89_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_91_cast_fp16")]; tensor var_3333_cast_fp16 = softmax(axis = var_3225, x = mh_w_91_cast_fp16)[name = string("op_3333_cast_fp16")]; tensor var_3334 = const()[name = string("op_3334"), val = tensor([1, 12, 64, -1])]; tensor var_3335_cast_fp16 = reshape(shape = var_3334, x = value_cast_fp16)[name = string("op_3335_cast_fp16")]; bool attn_45_transpose_x_0 = const()[name = string("attn_45_transpose_x_0"), val = bool(false)]; bool attn_45_transpose_y_0 = const()[name = string("attn_45_transpose_y_0"), val = bool(true)]; tensor attn_45_cast_fp16 = matmul(transpose_x = attn_45_transpose_x_0, transpose_y = attn_45_transpose_y_0, x = var_3335_cast_fp16, y = var_3333_cast_fp16)[name = string("attn_45_cast_fp16")]; tensor var_3338 = const()[name = string("op_3338"), val = tensor([1, 768, 1, -1])]; tensor input_111_cast_fp16 = reshape(shape = var_3338, x = attn_45_cast_fp16)[name = string("input_111_cast_fp16")]; string var_3348_pad_type_0 = const()[name = string("op_3348_pad_type_0"), val = string("valid")]; tensor var_3348_strides_0 = const()[name = string("op_3348_strides_0"), val = tensor([1, 1])]; tensor var_3348_pad_0 = const()[name = string("op_3348_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3348_dilations_0 = const()[name = string("op_3348_dilations_0"), val = tensor([1, 1])]; int32 var_3348_groups_0 = const()[name = string("op_3348_groups_0"), val = int32(1)]; tensor layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140281536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140576512))))[name = string("layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_11_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140576640)))]; tensor var_3348_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3348_dilations_0, groups = var_3348_groups_0, pad = var_3348_pad_0, pad_type = var_3348_pad_type_0, strides = var_3348_strides_0, weight = layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_111_cast_fp16)[name = string("op_3348_cast_fp16")]; string var_3354_pad_type_0 = const()[name = string("op_3354_pad_type_0"), val = string("valid")]; tensor var_3354_strides_0 = const()[name = string("op_3354_strides_0"), val = tensor([1, 1])]; tensor var_3354_pad_0 = const()[name = string("op_3354_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3354_dilations_0 = const()[name = string("op_3354_dilations_0"), val = tensor([1, 1])]; int32 var_3354_groups_0 = const()[name = string("op_3354_groups_0"), val = int32(1)]; tensor layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140587392))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140578240))))[name = string("layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_3354_cast_fp16 = conv(dilations = var_3354_dilations_0, groups = var_3354_groups_0, pad = var_3354_pad_0, pad_type = var_3354_pad_type_0, strides = var_3354_strides_0, weight = layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_111_cast_fp16)[name = string("op_3354_cast_fp16")]; tensor obj_211_cast_fp16 = add(x = var_3348_cast_fp16, y = var_3354_cast_fp16)[name = string("obj_211_cast_fp16")]; tensor inputs_69_cast_fp16 = add(x = inputs_67_cast_fp16, y = obj_211_cast_fp16)[name = string("inputs_69_cast_fp16")]; tensor out_69_axes_0 = const()[name = string("out_69_axes_0"), val = tensor([1])]; fp16 var_3369_to_fp16 = const()[name = string("op_3369_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_69_cast_fp16 = layer_norm(axes = out_69_axes_0, epsilon = var_3369_to_fp16, x = inputs_69_cast_fp16)[name = string("out_69_cast_fp16")]; tensor obj_213_gamma_0_to_fp16 = const()[name = string("obj_213_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140661184)))]; tensor obj_213_beta_0_to_fp16 = const()[name = string("obj_213_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140662784)))]; fp16 obj_213_epsilon_0_to_fp16 = const()[name = string("obj_213_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor obj_213_cast_fp16 = batch_norm(beta = obj_213_beta_0_to_fp16, epsilon = obj_213_epsilon_0_to_fp16, gamma = obj_213_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_69_cast_fp16)[name = string("obj_213_cast_fp16")]; string var_3389_pad_type_0 = const()[name = string("op_3389_pad_type_0"), val = string("valid")]; tensor var_3389_strides_0 = const()[name = string("op_3389_strides_0"), val = tensor([1, 1])]; tensor var_3389_pad_0 = const()[name = string("op_3389_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3389_dilations_0 = const()[name = string("op_3389_dilations_0"), val = tensor([1, 1])]; int32 var_3389_groups_0 = const()[name = string("op_3389_groups_0"), val = int32(1)]; tensor layers_11_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140664384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140959360))))[name = string("layers_11_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_11_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140959488)))]; tensor var_3389_cast_fp16 = conv(bias = layers_11_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3389_dilations_0, groups = var_3389_groups_0, pad = var_3389_pad_0, pad_type = var_3389_pad_type_0, strides = var_3389_strides_0, weight = layers_11_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_213_cast_fp16)[name = string("op_3389_cast_fp16")]; string var_3395_pad_type_0 = const()[name = string("op_3395_pad_type_0"), val = string("valid")]; tensor var_3395_strides_0 = const()[name = string("op_3395_strides_0"), val = tensor([1, 1])]; tensor var_3395_pad_0 = const()[name = string("op_3395_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3395_dilations_0 = const()[name = string("op_3395_dilations_0"), val = tensor([1, 1])]; int32 var_3395_groups_0 = const()[name = string("op_3395_groups_0"), val = int32(1)]; tensor layers_11_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140968064))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140961088))))[name = string("layers_11_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_3395_cast_fp16 = conv(dilations = var_3395_dilations_0, groups = var_3395_groups_0, pad = var_3395_pad_0, pad_type = var_3395_pad_type_0, strides = var_3395_strides_0, weight = layers_11_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_213_cast_fp16)[name = string("op_3395_cast_fp16")]; tensor query_cast_fp16 = add(x = var_3389_cast_fp16, y = var_3395_cast_fp16)[name = string("query_cast_fp16")]; tensor var_3398 = const()[name = string("op_3398"), val = tensor([1, 12, 64, -1])]; tensor mh_q_cast_fp16 = reshape(shape = var_3398, x = query_cast_fp16)[name = string("mh_q_cast_fp16")]; fp16 var_3400_to_fp16 = const()[name = string("op_3400_to_fp16"), val = fp16(0x1p-3)]; tensor var_3401_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_3400_to_fp16)[name = string("op_3401_cast_fp16")]; tensor var_3402 = const()[name = string("op_3402"), val = tensor([1, 12, 64, -1])]; tensor var_3403_cast_fp16 = reshape(shape = var_3402, x = obj_215_cast_fp16)[name = string("op_3403_cast_fp16")]; bool mh_w_93_transpose_x_0 = const()[name = string("mh_w_93_transpose_x_0"), val = bool(true)]; bool mh_w_93_transpose_y_0 = const()[name = string("mh_w_93_transpose_y_0"), val = bool(false)]; tensor mh_w_93_cast_fp16 = matmul(transpose_x = mh_w_93_transpose_x_0, transpose_y = mh_w_93_transpose_y_0, x = var_3401_cast_fp16, y = var_3403_cast_fp16)[name = string("mh_w_93_cast_fp16")]; tensor mh_w_cast_fp16 = add(x = mh_w_93_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_cast_fp16")]; tensor obj_221_cast_fp16 = softmax(axis = var_3225, x = mh_w_cast_fp16)[name = string("obj_221_cast_fp16")]; tensor var_3412 = const()[name = string("op_3412"), val = tensor([1, 12, 64, -1])]; tensor var_3413_cast_fp16 = reshape(shape = var_3412, x = obj_217_cast_fp16)[name = string("op_3413_cast_fp16")]; bool attn_transpose_x_0 = const()[name = string("attn_transpose_x_0"), val = bool(false)]; bool attn_transpose_y_0 = const()[name = string("attn_transpose_y_0"), val = bool(true)]; tensor attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_3413_cast_fp16, y = obj_221_cast_fp16)[name = string("attn_cast_fp16")]; tensor var_3416 = const()[name = string("op_3416"), val = tensor([1, 768, 1, -1])]; tensor input_113_cast_fp16 = reshape(shape = var_3416, x = attn_cast_fp16)[name = string("input_113_cast_fp16")]; string var_3426_pad_type_0 = const()[name = string("op_3426_pad_type_0"), val = string("valid")]; tensor var_3426_strides_0 = const()[name = string("op_3426_strides_0"), val = tensor([1, 1])]; tensor var_3426_pad_0 = const()[name = string("op_3426_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3426_dilations_0 = const()[name = string("op_3426_dilations_0"), val = tensor([1, 1])]; int32 var_3426_groups_0 = const()[name = string("op_3426_groups_0"), val = int32(1)]; tensor layers_11_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141041856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141336832))))[name = string("layers_11_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")]; tensor layers_11_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141336960)))]; tensor var_3426_cast_fp16 = conv(bias = layers_11_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3426_dilations_0, groups = var_3426_groups_0, pad = var_3426_pad_0, pad_type = var_3426_pad_type_0, strides = var_3426_strides_0, weight = layers_11_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_113_cast_fp16)[name = string("op_3426_cast_fp16")]; string var_3432_pad_type_0 = const()[name = string("op_3432_pad_type_0"), val = string("valid")]; tensor var_3432_strides_0 = const()[name = string("op_3432_strides_0"), val = tensor([1, 1])]; tensor var_3432_pad_0 = const()[name = string("op_3432_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3432_dilations_0 = const()[name = string("op_3432_dilations_0"), val = tensor([1, 1])]; int32 var_3432_groups_0 = const()[name = string("op_3432_groups_0"), val = int32(1)]; tensor layers_11_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141352000))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141338560))))[name = string("layers_11_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")]; tensor var_3432_cast_fp16 = conv(dilations = var_3432_dilations_0, groups = var_3432_groups_0, pad = var_3432_pad_0, pad_type = var_3432_pad_type_0, strides = var_3432_strides_0, weight = layers_11_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_113_cast_fp16)[name = string("op_3432_cast_fp16")]; tensor obj_219_cast_fp16 = add(x = var_3426_cast_fp16, y = var_3432_cast_fp16)[name = string("obj_219_cast_fp16")]; tensor inputs_71_cast_fp16 = add(x = inputs_69_cast_fp16, y = obj_219_cast_fp16)[name = string("inputs_71_cast_fp16")]; tensor out_71_axes_0 = const()[name = string("out_71_axes_0"), val = tensor([1])]; fp16 var_3446_to_fp16 = const()[name = string("op_3446_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_71_cast_fp16 = layer_norm(axes = out_71_axes_0, epsilon = var_3446_to_fp16, x = inputs_71_cast_fp16)[name = string("out_71_cast_fp16")]; tensor input_115_gamma_0_to_fp16 = const()[name = string("input_115_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141425792)))]; tensor input_115_beta_0_to_fp16 = const()[name = string("input_115_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141427392)))]; fp16 input_115_epsilon_0_to_fp16 = const()[name = string("input_115_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_115_cast_fp16 = batch_norm(beta = input_115_beta_0_to_fp16, epsilon = input_115_epsilon_0_to_fp16, gamma = input_115_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_71_cast_fp16)[name = string("input_115_cast_fp16")]; string var_3464_pad_type_0 = const()[name = string("op_3464_pad_type_0"), val = string("valid")]; tensor var_3464_strides_0 = const()[name = string("op_3464_strides_0"), val = tensor([1, 1])]; tensor var_3464_pad_0 = const()[name = string("op_3464_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3464_dilations_0 = const()[name = string("op_3464_dilations_0"), val = tensor([1, 1])]; int32 var_3464_groups_0 = const()[name = string("op_3464_groups_0"), val = int32(1)]; tensor layers_11_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141428992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142608704))))[name = string("layers_11_fc1_inlier_module_weight_to_fp16_palettized")]; tensor layers_11_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_11_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142608832)))]; tensor var_3464_cast_fp16 = conv(bias = layers_11_fc1_inlier_module_bias_to_fp16, dilations = var_3464_dilations_0, groups = var_3464_groups_0, pad = var_3464_pad_0, pad_type = var_3464_pad_type_0, strides = var_3464_strides_0, weight = layers_11_fc1_inlier_module_weight_to_fp16_palettized, x = input_115_cast_fp16)[name = string("op_3464_cast_fp16")]; string var_3470_pad_type_0 = const()[name = string("op_3470_pad_type_0"), val = string("valid")]; tensor var_3470_strides_0 = const()[name = string("op_3470_strides_0"), val = tensor([1, 1])]; tensor var_3470_pad_0 = const()[name = string("op_3470_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3470_dilations_0 = const()[name = string("op_3470_dilations_0"), val = tensor([1, 1])]; int32 var_3470_groups_0 = const()[name = string("op_3470_groups_0"), val = int32(1)]; tensor layers_11_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142640768))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142615040))))[name = string("layers_11_fc1_outlier_module_weight_to_fp16_sparsified")]; tensor var_3470_cast_fp16 = conv(dilations = var_3470_dilations_0, groups = var_3470_groups_0, pad = var_3470_pad_0, pad_type = var_3470_pad_type_0, strides = var_3470_strides_0, weight = layers_11_fc1_outlier_module_weight_to_fp16_sparsified, x = input_115_cast_fp16)[name = string("op_3470_cast_fp16")]; tensor input_117_cast_fp16 = add(x = var_3464_cast_fp16, y = var_3470_cast_fp16)[name = string("input_117_cast_fp16")]; string input_mode_0 = const()[name = string("input_mode_0"), val = string("EXACT")]; tensor input_cast_fp16 = gelu(mode = input_mode_0, x = input_117_cast_fp16)[name = string("input_cast_fp16")]; string var_3481_pad_type_0 = const()[name = string("op_3481_pad_type_0"), val = string("valid")]; tensor var_3481_strides_0 = const()[name = string("op_3481_strides_0"), val = tensor([1, 1])]; tensor var_3481_pad_0 = const()[name = string("op_3481_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3481_dilations_0 = const()[name = string("op_3481_dilations_0"), val = tensor([1, 1])]; int32 var_3481_groups_0 = const()[name = string("op_3481_groups_0"), val = int32(1)]; tensor layers_11_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142935744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144115456))))[name = string("layers_11_fc2_inlier_module_weight_to_fp16_palettized")]; tensor layers_11_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_11_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144115584)))]; tensor var_3481_cast_fp16 = conv(bias = layers_11_fc2_inlier_module_bias_to_fp16, dilations = var_3481_dilations_0, groups = var_3481_groups_0, pad = var_3481_pad_0, pad_type = var_3481_pad_type_0, strides = var_3481_strides_0, weight = layers_11_fc2_inlier_module_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("op_3481_cast_fp16")]; string var_3487_pad_type_0 = const()[name = string("op_3487_pad_type_0"), val = string("valid")]; tensor var_3487_strides_0 = const()[name = string("op_3487_strides_0"), val = tensor([1, 1])]; tensor var_3487_pad_0 = const()[name = string("op_3487_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3487_dilations_0 = const()[name = string("op_3487_dilations_0"), val = tensor([1, 1])]; int32 var_3487_groups_0 = const()[name = string("op_3487_groups_0"), val = int32(1)]; tensor layers_11_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144151104))), nonzero_data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144117184))))[name = string("layers_11_fc2_outlier_module_weight_to_fp16_sparsified")]; tensor var_3487_cast_fp16 = conv(dilations = var_3487_dilations_0, groups = var_3487_groups_0, pad = var_3487_pad_0, pad_type = var_3487_pad_type_0, strides = var_3487_strides_0, weight = layers_11_fc2_outlier_module_weight_to_fp16_sparsified, x = input_cast_fp16)[name = string("op_3487_cast_fp16")]; tensor hidden_states_25_cast_fp16 = add(x = var_3481_cast_fp16, y = var_3487_cast_fp16)[name = string("hidden_states_25_cast_fp16")]; tensor inputs_cast_fp16 = add(x = inputs_71_cast_fp16, y = hidden_states_25_cast_fp16)[name = string("inputs_cast_fp16")]; tensor out_axes_0 = const()[name = string("out_axes_0"), val = tensor([1])]; fp16 var_3507_to_fp16 = const()[name = string("op_3507_to_fp16"), val = fp16(0x1.5p-17)]; tensor out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_3507_to_fp16, x = inputs_cast_fp16)[name = string("out_cast_fp16")]; tensor hidden_states_gamma_0_to_fp16 = const()[name = string("hidden_states_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144446080)))]; tensor hidden_states_beta_0_to_fp16 = const()[name = string("hidden_states_beta_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144447680)))]; fp16 hidden_states_epsilon_0_to_fp16 = const()[name = string("hidden_states_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)]; tensor hidden_states_cast_fp16 = batch_norm(beta = hidden_states_beta_0_to_fp16, epsilon = hidden_states_epsilon_0_to_fp16, gamma = hidden_states_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_cast_fp16)[name = string("hidden_states_cast_fp16")]; tensor var_3518_axes_0 = const()[name = string("op_3518_axes_0"), val = tensor([2])]; tensor var_3518_cast_fp16 = squeeze(axes = var_3518_axes_0, x = hidden_states_cast_fp16)[name = string("op_3518_cast_fp16")]; tensor var_3521_perm_0 = const()[name = string("op_3521_perm_0"), val = tensor([0, 2, 1])]; tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144449280)))]; tensor var_3521_cast_fp16 = transpose(perm = var_3521_perm_0, x = var_3518_cast_fp16)[name = string("transpose_0")]; tensor logits = linear(bias = linear_0_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = var_3521_cast_fp16)[name = string("linear_0_cast_fp16")]; int32 var_3525 = const()[name = string("op_3525"), val = int32(1)]; bool obj_225_interleave_0 = const()[name = string("obj_225_interleave_0"), val = bool(false)]; tensor key_cache_updates = concat(axis = var_3525, interleave = obj_225_interleave_0, values = (current_key_1_cast_fp16, current_key_3_cast_fp16, current_key_5_cast_fp16, current_key_7_cast_fp16, current_key_9_cast_fp16, current_key_11_cast_fp16, current_key_13_cast_fp16, current_key_15_cast_fp16, current_key_17_cast_fp16, current_key_19_cast_fp16, current_key_21_cast_fp16, current_key_cast_fp16))[name = string("obj_225_cast_fp16")]; int32 var_3528 = const()[name = string("op_3528"), val = int32(1)]; bool obj_227_interleave_0 = const()[name = string("obj_227_interleave_0"), val = bool(false)]; tensor value_cache_updates = concat(axis = var_3528, interleave = obj_227_interleave_0, values = (current_value_1_cast_fp16, current_value_3_cast_fp16, current_value_5_cast_fp16, current_value_7_cast_fp16, current_value_9_cast_fp16, current_value_11_cast_fp16, current_value_13_cast_fp16, current_value_15_cast_fp16, current_value_17_cast_fp16, current_value_19_cast_fp16, current_value_21_cast_fp16, current_value_cast_fp16))[name = string("obj_227_cast_fp16")]; tensor var_3539_begin_0 = const()[name = string("op_3539_begin_0"), val = tensor([0, 6, 0, 0])]; tensor var_3539_end_0 = const()[name = string("op_3539_end_0"), val = tensor([1, 7, 1, 1536])]; tensor var_3539_end_mask_0 = const()[name = string("op_3539_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3539_cast_fp16 = slice_by_index(begin = var_3539_begin_0, end = var_3539_end_0, end_mask = var_3539_end_mask_0, x = obj_131_cast_fp16)[name = string("op_3539_cast_fp16")]; tensor var_3542_begin_0 = const()[name = string("op_3542_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3542_end_0 = const()[name = string("op_3542_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3542_end_mask_0 = const()[name = string("op_3542_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3542_squeeze_mask_0 = const()[name = string("op_3542_squeeze_mask_0"), val = tensor([false, false, true, false])]; tensor var_3542_cast_fp16 = slice_by_index(begin = var_3542_begin_0, end = var_3542_end_0, end_mask = var_3542_end_mask_0, squeeze_mask = var_3542_squeeze_mask_0, x = var_3539_cast_fp16)[name = string("op_3542_cast_fp16")]; tensor var_3557_begin_0 = const()[name = string("op_3557_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3557_end_0 = const()[name = string("op_3557_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3557_end_mask_0 = const()[name = string("op_3557_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3557_cast_fp16 = slice_by_index(begin = var_3557_begin_0, end = var_3557_end_0, end_mask = var_3557_end_mask_0, x = obj_149_cast_fp16)[name = string("op_3557_cast_fp16")]; tensor var_3560_begin_0 = const()[name = string("op_3560_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3560_end_0 = const()[name = string("op_3560_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3560_end_mask_0 = const()[name = string("op_3560_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3560_squeeze_mask_0 = const()[name = string("op_3560_squeeze_mask_0"), val = tensor([false, false, true, false])]; tensor var_3560_cast_fp16 = slice_by_index(begin = var_3560_begin_0, end = var_3560_end_0, end_mask = var_3560_end_mask_0, squeeze_mask = var_3560_squeeze_mask_0, x = var_3557_cast_fp16)[name = string("op_3560_cast_fp16")]; tensor var_3575_begin_0 = const()[name = string("op_3575_begin_0"), val = tensor([0, 3, 0, 0])]; tensor var_3575_end_0 = const()[name = string("op_3575_end_0"), val = tensor([1, 4, 1, 1536])]; tensor var_3575_end_mask_0 = const()[name = string("op_3575_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3575_cast_fp16 = slice_by_index(begin = var_3575_begin_0, end = var_3575_end_0, end_mask = var_3575_end_mask_0, x = obj_149_cast_fp16)[name = string("op_3575_cast_fp16")]; tensor var_3578_begin_0 = const()[name = string("op_3578_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3578_end_0 = const()[name = string("op_3578_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3578_end_mask_0 = const()[name = string("op_3578_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3578_squeeze_mask_0 = const()[name = string("op_3578_squeeze_mask_0"), val = tensor([false, false, true, false])]; tensor var_3578_cast_fp16 = slice_by_index(begin = var_3578_begin_0, end = var_3578_end_0, end_mask = var_3578_end_mask_0, squeeze_mask = var_3578_squeeze_mask_0, x = var_3575_cast_fp16)[name = string("op_3578_cast_fp16")]; tensor var_3593_begin_0 = const()[name = string("op_3593_begin_0"), val = tensor([0, 8, 0, 0])]; tensor var_3593_end_0 = const()[name = string("op_3593_end_0"), val = tensor([1, 9, 1, 1536])]; tensor var_3593_end_mask_0 = const()[name = string("op_3593_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3593_cast_fp16 = slice_by_index(begin = var_3593_begin_0, end = var_3593_end_0, end_mask = var_3593_end_mask_0, x = obj_149_cast_fp16)[name = string("op_3593_cast_fp16")]; tensor var_3596_begin_0 = const()[name = string("op_3596_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3596_end_0 = const()[name = string("op_3596_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3596_end_mask_0 = const()[name = string("op_3596_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3596_squeeze_mask_0 = const()[name = string("op_3596_squeeze_mask_0"), val = tensor([false, false, true, false])]; tensor var_3596_cast_fp16 = slice_by_index(begin = var_3596_begin_0, end = var_3596_end_0, end_mask = var_3596_end_mask_0, squeeze_mask = var_3596_squeeze_mask_0, x = var_3593_cast_fp16)[name = string("op_3596_cast_fp16")]; tensor var_3611_begin_0 = const()[name = string("op_3611_begin_0"), val = tensor([0, 2, 0, 0])]; tensor var_3611_end_0 = const()[name = string("op_3611_end_0"), val = tensor([1, 3, 1, 1536])]; tensor var_3611_end_mask_0 = const()[name = string("op_3611_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3611_cast_fp16 = slice_by_index(begin = var_3611_begin_0, end = var_3611_end_0, end_mask = var_3611_end_mask_0, x = obj_167_cast_fp16)[name = string("op_3611_cast_fp16")]; tensor var_3614_begin_0 = const()[name = string("op_3614_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3614_end_0 = const()[name = string("op_3614_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3614_end_mask_0 = const()[name = string("op_3614_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3614_squeeze_mask_0 = const()[name = string("op_3614_squeeze_mask_0"), val = tensor([false, false, true, false])]; tensor var_3614_cast_fp16 = slice_by_index(begin = var_3614_begin_0, end = var_3614_end_0, end_mask = var_3614_end_mask_0, squeeze_mask = var_3614_squeeze_mask_0, x = var_3611_cast_fp16)[name = string("op_3614_cast_fp16")]; tensor var_3629_begin_0 = const()[name = string("op_3629_begin_0"), val = tensor([0, 5, 0, 0])]; tensor var_3629_end_0 = const()[name = string("op_3629_end_0"), val = tensor([1, 6, 1, 1536])]; tensor var_3629_end_mask_0 = const()[name = string("op_3629_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3629_cast_fp16 = slice_by_index(begin = var_3629_begin_0, end = var_3629_end_0, end_mask = var_3629_end_mask_0, x = obj_167_cast_fp16)[name = string("op_3629_cast_fp16")]; tensor var_3632_begin_0 = const()[name = string("op_3632_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3632_end_0 = const()[name = string("op_3632_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3632_end_mask_0 = const()[name = string("op_3632_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3632_squeeze_mask_0 = const()[name = string("op_3632_squeeze_mask_0"), val = tensor([false, false, true, false])]; tensor var_3632_cast_fp16 = slice_by_index(begin = var_3632_begin_0, end = var_3632_end_0, end_mask = var_3632_end_mask_0, squeeze_mask = var_3632_squeeze_mask_0, x = var_3629_cast_fp16)[name = string("op_3632_cast_fp16")]; tensor var_3647_begin_0 = const()[name = string("op_3647_begin_0"), val = tensor([0, 7, 0, 0])]; tensor var_3647_end_0 = const()[name = string("op_3647_end_0"), val = tensor([1, 8, 1, 1536])]; tensor var_3647_end_mask_0 = const()[name = string("op_3647_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3647_cast_fp16 = slice_by_index(begin = var_3647_begin_0, end = var_3647_end_0, end_mask = var_3647_end_mask_0, x = obj_167_cast_fp16)[name = string("op_3647_cast_fp16")]; tensor var_3650_begin_0 = const()[name = string("op_3650_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3650_end_0 = const()[name = string("op_3650_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3650_end_mask_0 = const()[name = string("op_3650_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3650_squeeze_mask_0 = const()[name = string("op_3650_squeeze_mask_0"), val = tensor([false, false, true, false])]; tensor var_3650_cast_fp16 = slice_by_index(begin = var_3650_begin_0, end = var_3650_end_0, end_mask = var_3650_end_mask_0, squeeze_mask = var_3650_squeeze_mask_0, x = var_3647_cast_fp16)[name = string("op_3650_cast_fp16")]; tensor var_3665_begin_0 = const()[name = string("op_3665_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3665_end_0 = const()[name = string("op_3665_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3665_end_mask_0 = const()[name = string("op_3665_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3665_cast_fp16 = slice_by_index(begin = var_3665_begin_0, end = var_3665_end_0, end_mask = var_3665_end_mask_0, x = obj_185_cast_fp16)[name = string("op_3665_cast_fp16")]; tensor var_3668_begin_0 = const()[name = string("op_3668_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3668_end_0 = const()[name = string("op_3668_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3668_end_mask_0 = const()[name = string("op_3668_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3668_squeeze_mask_0 = const()[name = string("op_3668_squeeze_mask_0"), val = tensor([false, false, true, false])]; tensor var_3668_cast_fp16 = slice_by_index(begin = var_3668_begin_0, end = var_3668_end_0, end_mask = var_3668_end_mask_0, squeeze_mask = var_3668_squeeze_mask_0, x = var_3665_cast_fp16)[name = string("op_3668_cast_fp16")]; tensor var_3683_begin_0 = const()[name = string("op_3683_begin_0"), val = tensor([0, 4, 0, 0])]; tensor var_3683_end_0 = const()[name = string("op_3683_end_0"), val = tensor([1, 5, 1, 1536])]; tensor var_3683_end_mask_0 = const()[name = string("op_3683_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3683_cast_fp16 = slice_by_index(begin = var_3683_begin_0, end = var_3683_end_0, end_mask = var_3683_end_mask_0, x = obj_185_cast_fp16)[name = string("op_3683_cast_fp16")]; tensor var_3686_begin_0 = const()[name = string("op_3686_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3686_end_0 = const()[name = string("op_3686_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3686_end_mask_0 = const()[name = string("op_3686_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3686_squeeze_mask_0 = const()[name = string("op_3686_squeeze_mask_0"), val = tensor([false, false, true, false])]; tensor var_3686_cast_fp16 = slice_by_index(begin = var_3686_begin_0, end = var_3686_end_0, end_mask = var_3686_end_mask_0, squeeze_mask = var_3686_squeeze_mask_0, x = var_3683_cast_fp16)[name = string("op_3686_cast_fp16")]; tensor var_3701_begin_0 = const()[name = string("op_3701_begin_0"), val = tensor([0, 8, 0, 0])]; tensor var_3701_end_0 = const()[name = string("op_3701_end_0"), val = tensor([1, 9, 1, 1536])]; tensor var_3701_end_mask_0 = const()[name = string("op_3701_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3701_cast_fp16 = slice_by_index(begin = var_3701_begin_0, end = var_3701_end_0, end_mask = var_3701_end_mask_0, x = obj_185_cast_fp16)[name = string("op_3701_cast_fp16")]; tensor var_3704_begin_0 = const()[name = string("op_3704_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3704_end_0 = const()[name = string("op_3704_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3704_end_mask_0 = const()[name = string("op_3704_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3704_squeeze_mask_0 = const()[name = string("op_3704_squeeze_mask_0"), val = tensor([false, false, true, false])]; tensor var_3704_cast_fp16 = slice_by_index(begin = var_3704_begin_0, end = var_3704_end_0, end_mask = var_3704_end_mask_0, squeeze_mask = var_3704_squeeze_mask_0, x = var_3701_cast_fp16)[name = string("op_3704_cast_fp16")]; tensor var_3719_begin_0 = const()[name = string("op_3719_begin_0"), val = tensor([0, 10, 0, 0])]; tensor var_3719_end_0 = const()[name = string("op_3719_end_0"), val = tensor([1, 11, 1, 1536])]; tensor var_3719_end_mask_0 = const()[name = string("op_3719_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3719_cast_fp16 = slice_by_index(begin = var_3719_begin_0, end = var_3719_end_0, end_mask = var_3719_end_mask_0, x = obj_185_cast_fp16)[name = string("op_3719_cast_fp16")]; tensor var_3722_begin_0 = const()[name = string("op_3722_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3722_end_0 = const()[name = string("op_3722_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3722_end_mask_0 = const()[name = string("op_3722_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3722_squeeze_mask_0 = const()[name = string("op_3722_squeeze_mask_0"), val = tensor([false, false, true, false])]; tensor var_3722_cast_fp16 = slice_by_index(begin = var_3722_begin_0, end = var_3722_end_0, end_mask = var_3722_end_mask_0, squeeze_mask = var_3722_squeeze_mask_0, x = var_3719_cast_fp16)[name = string("op_3722_cast_fp16")]; tensor var_3737_begin_0 = const()[name = string("op_3737_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3737_end_0 = const()[name = string("op_3737_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3737_end_mask_0 = const()[name = string("op_3737_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3737_cast_fp16 = slice_by_index(begin = var_3737_begin_0, end = var_3737_end_0, end_mask = var_3737_end_mask_0, x = obj_203_cast_fp16)[name = string("op_3737_cast_fp16")]; tensor var_3740_begin_0 = const()[name = string("op_3740_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3740_end_0 = const()[name = string("op_3740_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3740_end_mask_0 = const()[name = string("op_3740_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3740_squeeze_mask_0 = const()[name = string("op_3740_squeeze_mask_0"), val = tensor([false, false, true, false])]; tensor var_3740_cast_fp16 = slice_by_index(begin = var_3740_begin_0, end = var_3740_end_0, end_mask = var_3740_end_mask_0, squeeze_mask = var_3740_squeeze_mask_0, x = var_3737_cast_fp16)[name = string("op_3740_cast_fp16")]; tensor var_3755_begin_0 = const()[name = string("op_3755_begin_0"), val = tensor([0, 1, 0, 0])]; tensor var_3755_end_0 = const()[name = string("op_3755_end_0"), val = tensor([1, 2, 1, 1536])]; tensor var_3755_end_mask_0 = const()[name = string("op_3755_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3755_cast_fp16 = slice_by_index(begin = var_3755_begin_0, end = var_3755_end_0, end_mask = var_3755_end_mask_0, x = obj_203_cast_fp16)[name = string("op_3755_cast_fp16")]; tensor var_3758_begin_0 = const()[name = string("op_3758_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3758_end_0 = const()[name = string("op_3758_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3758_end_mask_0 = const()[name = string("op_3758_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3758_squeeze_mask_0 = const()[name = string("op_3758_squeeze_mask_0"), val = tensor([false, false, true, false])]; tensor var_3758_cast_fp16 = slice_by_index(begin = var_3758_begin_0, end = var_3758_end_0, end_mask = var_3758_end_mask_0, squeeze_mask = var_3758_squeeze_mask_0, x = var_3755_cast_fp16)[name = string("op_3758_cast_fp16")]; tensor var_3773_begin_0 = const()[name = string("op_3773_begin_0"), val = tensor([0, 2, 0, 0])]; tensor var_3773_end_0 = const()[name = string("op_3773_end_0"), val = tensor([1, 3, 1, 1536])]; tensor var_3773_end_mask_0 = const()[name = string("op_3773_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3773_cast_fp16 = slice_by_index(begin = var_3773_begin_0, end = var_3773_end_0, end_mask = var_3773_end_mask_0, x = obj_203_cast_fp16)[name = string("op_3773_cast_fp16")]; tensor var_3776_begin_0 = const()[name = string("op_3776_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3776_end_0 = const()[name = string("op_3776_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3776_end_mask_0 = const()[name = string("op_3776_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3776_squeeze_mask_0 = const()[name = string("op_3776_squeeze_mask_0"), val = tensor([false, false, true, false])]; tensor var_3776_cast_fp16 = slice_by_index(begin = var_3776_begin_0, end = var_3776_end_0, end_mask = var_3776_end_mask_0, squeeze_mask = var_3776_squeeze_mask_0, x = var_3773_cast_fp16)[name = string("op_3776_cast_fp16")]; tensor var_3791_begin_0 = const()[name = string("op_3791_begin_0"), val = tensor([0, 3, 0, 0])]; tensor var_3791_end_0 = const()[name = string("op_3791_end_0"), val = tensor([1, 4, 1, 1536])]; tensor var_3791_end_mask_0 = const()[name = string("op_3791_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3791_cast_fp16 = slice_by_index(begin = var_3791_begin_0, end = var_3791_end_0, end_mask = var_3791_end_mask_0, x = obj_203_cast_fp16)[name = string("op_3791_cast_fp16")]; tensor var_3794_begin_0 = const()[name = string("op_3794_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3794_end_0 = const()[name = string("op_3794_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3794_end_mask_0 = const()[name = string("op_3794_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3794_squeeze_mask_0 = const()[name = string("op_3794_squeeze_mask_0"), val = tensor([false, false, true, false])]; tensor var_3794_cast_fp16 = slice_by_index(begin = var_3794_begin_0, end = var_3794_end_0, end_mask = var_3794_end_mask_0, squeeze_mask = var_3794_squeeze_mask_0, x = var_3791_cast_fp16)[name = string("op_3794_cast_fp16")]; tensor var_3809_begin_0 = const()[name = string("op_3809_begin_0"), val = tensor([0, 6, 0, 0])]; tensor var_3809_end_0 = const()[name = string("op_3809_end_0"), val = tensor([1, 7, 1, 1536])]; tensor var_3809_end_mask_0 = const()[name = string("op_3809_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3809_cast_fp16 = slice_by_index(begin = var_3809_begin_0, end = var_3809_end_0, end_mask = var_3809_end_mask_0, x = obj_203_cast_fp16)[name = string("op_3809_cast_fp16")]; tensor var_3812_begin_0 = const()[name = string("op_3812_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3812_end_0 = const()[name = string("op_3812_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3812_end_mask_0 = const()[name = string("op_3812_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3812_squeeze_mask_0 = const()[name = string("op_3812_squeeze_mask_0"), val = tensor([false, false, true, false])]; tensor var_3812_cast_fp16 = slice_by_index(begin = var_3812_begin_0, end = var_3812_end_0, end_mask = var_3812_end_mask_0, squeeze_mask = var_3812_squeeze_mask_0, x = var_3809_cast_fp16)[name = string("op_3812_cast_fp16")]; tensor var_3827_begin_0 = const()[name = string("op_3827_begin_0"), val = tensor([0, 11, 0, 0])]; tensor var_3827_end_0 = const()[name = string("op_3827_end_0"), val = tensor([1, 12, 1, 1536])]; tensor var_3827_end_mask_0 = const()[name = string("op_3827_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3827_cast_fp16 = slice_by_index(begin = var_3827_begin_0, end = var_3827_end_0, end_mask = var_3827_end_mask_0, x = obj_203_cast_fp16)[name = string("op_3827_cast_fp16")]; tensor var_3830_begin_0 = const()[name = string("op_3830_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3830_end_0 = const()[name = string("op_3830_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3830_end_mask_0 = const()[name = string("op_3830_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3830_squeeze_mask_0 = const()[name = string("op_3830_squeeze_mask_0"), val = tensor([false, false, true, false])]; tensor var_3830_cast_fp16 = slice_by_index(begin = var_3830_begin_0, end = var_3830_end_0, end_mask = var_3830_end_mask_0, squeeze_mask = var_3830_squeeze_mask_0, x = var_3827_cast_fp16)[name = string("op_3830_cast_fp16")]; tensor var_3845_begin_0 = const()[name = string("op_3845_begin_0"), val = tensor([0, 2, 0, 0])]; tensor var_3845_end_0 = const()[name = string("op_3845_end_0"), val = tensor([1, 3, 1, 1536])]; tensor var_3845_end_mask_0 = const()[name = string("op_3845_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3845_cast_fp16 = slice_by_index(begin = var_3845_begin_0, end = var_3845_end_0, end_mask = var_3845_end_mask_0, x = obj_221_cast_fp16)[name = string("op_3845_cast_fp16")]; tensor var_3848_begin_0 = const()[name = string("op_3848_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3848_end_0 = const()[name = string("op_3848_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3848_end_mask_0 = const()[name = string("op_3848_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3848_squeeze_mask_0 = const()[name = string("op_3848_squeeze_mask_0"), val = tensor([false, false, true, false])]; tensor var_3848_cast_fp16 = slice_by_index(begin = var_3848_begin_0, end = var_3848_end_0, end_mask = var_3848_end_mask_0, squeeze_mask = var_3848_squeeze_mask_0, x = var_3845_cast_fp16)[name = string("op_3848_cast_fp16")]; tensor var_3863_begin_0 = const()[name = string("op_3863_begin_0"), val = tensor([0, 4, 0, 0])]; tensor var_3863_end_0 = const()[name = string("op_3863_end_0"), val = tensor([1, 5, 1, 1536])]; tensor var_3863_end_mask_0 = const()[name = string("op_3863_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3863_cast_fp16 = slice_by_index(begin = var_3863_begin_0, end = var_3863_end_0, end_mask = var_3863_end_mask_0, x = obj_221_cast_fp16)[name = string("op_3863_cast_fp16")]; tensor var_3866_begin_0 = const()[name = string("op_3866_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3866_end_0 = const()[name = string("op_3866_end_0"), val = tensor([1, 1, 1, 1536])]; tensor var_3866_end_mask_0 = const()[name = string("op_3866_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3866_squeeze_mask_0 = const()[name = string("op_3866_squeeze_mask_0"), val = tensor([false, false, true, false])]; tensor var_3866_cast_fp16 = slice_by_index(begin = var_3866_begin_0, end = var_3866_end_0, end_mask = var_3866_end_mask_0, squeeze_mask = var_3866_squeeze_mask_0, x = var_3863_cast_fp16)[name = string("op_3866_cast_fp16")]; int32 var_3873 = const()[name = string("op_3873"), val = int32(1)]; bool var_3874_interleave_0 = const()[name = string("op_3874_interleave_0"), val = bool(false)]; tensor var_3874_cast_fp16 = concat(axis = var_3873, interleave = var_3874_interleave_0, values = (var_3542_cast_fp16, var_3560_cast_fp16, var_3578_cast_fp16, var_3596_cast_fp16, var_3614_cast_fp16, var_3632_cast_fp16, var_3650_cast_fp16, var_3668_cast_fp16, var_3686_cast_fp16, var_3704_cast_fp16, var_3722_cast_fp16, var_3740_cast_fp16, var_3758_cast_fp16, var_3776_cast_fp16, var_3794_cast_fp16, var_3812_cast_fp16, var_3830_cast_fp16, var_3848_cast_fp16, var_3866_cast_fp16))[name = string("op_3874_cast_fp16")]; bool var_3877 = const()[name = string("op_3877"), val = bool(false)]; tensor obj_axes_0 = const()[name = string("obj_axes_0"), val = tensor([1])]; tensor alignment_heads_weights = reduce_mean(axes = obj_axes_0, keep_dims = var_3877, x = var_3874_cast_fp16)[name = string("obj_cast_fp16")]; } -> (logits, key_cache_updates, value_cache_updates, alignment_heads_weights); }