#loc = loc(unknown)
#loc308 = loc("Cast_0")
#loc309 = loc("Transpose_9")
#loc310 = loc("Transpose_10")
#loc311 = loc("Transpose_11")
#loc312 = loc("Transpose_12")
module attributes {
  llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128",
  llvm.target_triple = "x86_64-unknown-linux-gnu",
  "onnx-mlir.symbol-postfix" = "onnxmodel.onnx.mlir",
  vaimlconf.device = "stx",
  vaimlconf.device_models = "${vaimlconf.install_dir}/data/deviceModels",
  vaimlconf.install_dir = "/usr/local/lib/python3.10/dist-packages/flexml/flexml_extras",
  vaimlconf.library_metadata = ["${vaimlconf.install_dir}/data/libraryMetadata/L1", "${vaimlconf.install_dir}/data/libraryMetadata/L2", "${vaimlconf.install_dir}/../../vitis_mllib/L1/metadata", "${vaimlconf.install_dir}/../../vitis_mllib/L2/metadata", "${vaimlconf.install_dir}/share/microkernel-tiling/tiling-recipe-specs"],
  vaimlconf.single_core_compiler = "chess"} {
  func.func private @forward_outlined_part_0(%arg0: tensor<1x180x320x4xbf16> loc("Cast_0"), %arg1: tensor<1x16x90x160xbf16> loc("Transpose_9"), %arg2: tensor<1x20x45x80xbf16> loc("Transpose_10"), %arg3: tensor<1x40x23x40xbf16> loc("Transpose_11"), %arg4: tensor<1x64x12x20xbf16> loc("Transpose_12")) -> (tensor<1x16x90x160xbf16>, tensor<1x20x45x80xbf16>, tensor<1x40x23x40xbf16>, tensor<1x64x12x20xbf16>, tensor<1x3x180x320xbf16>, tensor<1x1x180x320xbf16>) attributes {aie_partition = 0 : i32, kernel} {
    %0 = xten_nn.subgraph (%arg5 = %arg0: tensor<1x180x320x4xbf16>)  attributes {
      LayerName = "Div_2",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "386",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 180, 320, 4]> : vector<4xindex>
        }
      ],
      OutputName = "Div_2",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "387",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 180, 320, 4]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x180x320x4xbf16>)  attributes {
        LayerName = "Div_2",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "386",
            Port = "data_io.ifm",
            l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 180, 320, 4]> : vector<4xindex>
          }
        ],
        OutputName = "Div_2",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "387",
            Port = "data_io.ofm",
            l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 180, 320, 4]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.906250e-03 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.906250e-03> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_2",
          OutputName = "Div_2",
          shift = 0 : i8} : (tensor<1x180x320x4xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x180x320x4xbf16> loc(#loc1)
        xten_nn.output %463 : tensor<1x180x320x4xbf16> loc(#loc1)
      } -> tensor<1x180x320x4xbf16> loc(#loc1)
      xten_nn.output %461 : tensor<1x180x320x4xbf16> loc(#loc1)
    } -> tensor<1x180x320x4xbf16> loc(#loc1)
    %1 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_443/biases"} -> tensor<4xbf16> loc(#loc)
    %2 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_443/weights"} -> tensor<4x16x1x1xbf16> loc(#loc)
    %3 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_441/biases"} -> tensor<16xbf16> loc(#loc)
    %4 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_441/weights"} -> tensor<16x16x3x3xbf16> loc(#loc)
    %5 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_439/biases"} -> tensor<16xbf16> loc(#loc)
    %6 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_439/weights"} -> tensor<16x35x3x3xbf16> loc(#loc)
    %7 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_428/biases"} -> tensor<16xbf16> loc(#loc)
    %8 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_428/weights"} -> tensor<16x32x3x3xbf16> loc(#loc)
    %9 = xten_nn.load_external_const {file = "constants.h5", key = "Sub_431/Constant_0_0"} -> tensor<1x16x90x160xbf16> loc(#loc2)
    %10 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_423/biases"} -> tensor<32xbf16> loc(#loc)
    %11 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_423/weights"} -> tensor<32x32x3x3xbf16> loc(#loc)
    %12 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_419/biases"} -> tensor<32xbf16> loc(#loc)
    %13 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_419/weights"} -> tensor<32x59x3x3xbf16> loc(#loc)
    %14 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_408/biases"} -> tensor<20xbf16> loc(#loc)
    %15 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_408/weights"} -> tensor<20x40x3x3xbf16> loc(#loc)
    %16 = xten_nn.load_external_const {file = "constants.h5", key = "Sub_411/Constant_0_0"} -> tensor<1x20x45x80xbf16> loc(#loc3)
    %17 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_403/biases"} -> tensor<40xbf16> loc(#loc)
    %18 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_403/weights"} -> tensor<40x40x3x3xbf16> loc(#loc)
    %19 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_399/biases"} -> tensor<40xbf16> loc(#loc)
    %20 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_399/weights"} -> tensor<40x107x3x3xbf16> loc(#loc)
    %21 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_382/biases"} -> tensor<40xbf16> loc(#loc)
    %22 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_382/weights"} -> tensor<40x80x3x3xbf16> loc(#loc)
    %23 = xten_nn.load_external_const {file = "constants.h5", key = "Sub_385/Constant_0_0"} -> tensor<1x40x23x40xbf16> loc(#loc4)
    %24 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_377/biases"} -> tensor<80xbf16> loc(#loc)
    %25 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_377/weights"} -> tensor<80x80x3x3xbf16> loc(#loc)
    %26 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_373/biases"} -> tensor<80xbf16> loc(#loc)
    %27 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_373/weights"} -> tensor<80x171x3x3xbf16> loc(#loc)
    %28 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_356/biases"} -> tensor<64xbf16> loc(#loc)
    %29 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_356/weights"} -> tensor<64x128x3x3xbf16> loc(#loc)
    %30 = xten_nn.load_external_const {file = "constants.h5", key = "Sub_359/Constant_0_0"} -> tensor<1x64x12x20xbf16> loc(#loc5)
    %31 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_351/biases"} -> tensor<128xbf16> loc(#loc)
    %32 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_351/weights"} -> tensor<128x128x3x3xbf16> loc(#loc)
    %33 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_340/biases"} -> tensor<128xbf16> loc(#loc)
    %34 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_340/weights"} -> tensor<128x960x1x1xbf16> loc(#loc)
    %35 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_343/biases"} -> tensor<128xbf16> loc(#loc)
    %36 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_343/weights"} -> tensor<128x960x1x1xbf16> loc(#loc)
    %37 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_331/biases"} -> tensor<960xbf16> loc(#loc)
    %38 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_331/weights"} -> tensor<960x160x1x1xbf16> loc(#loc)
    %39 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_329/biases"} -> tensor<160xbf16> loc(#loc)
    %40 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_329/weights"} -> tensor<160x960x1x1xbf16> loc(#loc)
    %41 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_320/biases"} -> tensor<960xbf16> loc(#loc)
    %42 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_320/weights"} -> tensor<960x240x1x1xbf16> loc(#loc)
    %43 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_318/biases"} -> tensor<240xbf16> loc(#loc)
    %44 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_318/weights"} -> tensor<240x960x1x1xbf16> loc(#loc)
    %45 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_308/biases"} -> tensor<960xbf16> loc(#loc)
    %46 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_308/weights"} -> tensor<960x1x9x9xbf16> loc(#loc)
    %47 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_299/biases"} -> tensor<960xbf16> loc(#loc)
    %48 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_299/weights"} -> tensor<960x160x1x1xbf16> loc(#loc)
    %49 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_297/biases"} -> tensor<160xbf16> loc(#loc)
    %50 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_297/weights"} -> tensor<160x960x1x1xbf16> loc(#loc)
    %51 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_288/biases"} -> tensor<960xbf16> loc(#loc)
    %52 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_288/weights"} -> tensor<960x240x1x1xbf16> loc(#loc)
    %53 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_286/biases"} -> tensor<240xbf16> loc(#loc)
    %54 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_286/weights"} -> tensor<240x960x1x1xbf16> loc(#loc)
    %55 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_276/biases"} -> tensor<960xbf16> loc(#loc)
    %56 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_276/weights"} -> tensor<960x1x9x9xbf16> loc(#loc)
    %57 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_267/biases"} -> tensor<960xbf16> loc(#loc)
    %58 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_267/weights"} -> tensor<960x160x1x1xbf16> loc(#loc)
    %59 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_266/biases"} -> tensor<160xbf16> loc(#loc)
    %60 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_266/weights"} -> tensor<160x672x1x1xbf16> loc(#loc)
    %61 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_257/biases"} -> tensor<672xbf16> loc(#loc)
    %62 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_257/weights"} -> tensor<672x168x1x1xbf16> loc(#loc)
    %63 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_255/biases"} -> tensor<168xbf16> loc(#loc)
    %64 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_255/weights"} -> tensor<168x672x1x1xbf16> loc(#loc)
    %65 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_245/biases"} -> tensor<672xbf16> loc(#loc)
    %66 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_245/weights"} -> tensor<672x1x9x9xbf16> loc(#loc)
    %67 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_236/biases"} -> tensor<672xbf16> loc(#loc)
    %68 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_236/weights"} -> tensor<672x112x1x1xbf16> loc(#loc)
    %69 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_234/biases"} -> tensor<112xbf16> loc(#loc)
    %70 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_234/weights"} -> tensor<112x672x1x1xbf16> loc(#loc)
    %71 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_225/biases"} -> tensor<672xbf16> loc(#loc)
    %72 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_225/weights"} -> tensor<672x168x1x1xbf16> loc(#loc)
    %73 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_223/biases"} -> tensor<168xbf16> loc(#loc)
    %74 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_223/weights"} -> tensor<168x672x1x1xbf16> loc(#loc)
    %75 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_213/biases"} -> tensor<672xbf16> loc(#loc)
    %76 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_213/weights"} -> tensor<672x1x3x3xbf16> loc(#loc)
    %77 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_204/biases"} -> tensor<672xbf16> loc(#loc)
    %78 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_204/weights"} -> tensor<672x112x1x1xbf16> loc(#loc)
    %79 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_203/biases"} -> tensor<112xbf16> loc(#loc)
    %80 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_203/weights"} -> tensor<112x480x1x1xbf16> loc(#loc)
    %81 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_194/biases"} -> tensor<480xbf16> loc(#loc)
    %82 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_194/weights"} -> tensor<480x120x1x1xbf16> loc(#loc)
    %83 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_192/biases"} -> tensor<120xbf16> loc(#loc)
    %84 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_192/weights"} -> tensor<120x480x1x1xbf16> loc(#loc)
    %85 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_182/biases"} -> tensor<480xbf16> loc(#loc)
    %86 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_182/weights"} -> tensor<480x1x3x3xbf16> loc(#loc)
    %87 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_173/biases"} -> tensor<480xbf16> loc(#loc)
    %88 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_173/weights"} -> tensor<480x80x1x1xbf16> loc(#loc)
    %89 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_171/biases"} -> tensor<80xbf16> loc(#loc)
    %90 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_171/weights"} -> tensor<80x184x1x1xbf16> loc(#loc)
    %91 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_162/biases"} -> tensor<184xbf16> loc(#loc)
    %92 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_162/weights"} -> tensor<184x1x3x3xbf16> loc(#loc)
    %93 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_153/biases"} -> tensor<184xbf16> loc(#loc)
    %94 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_153/weights"} -> tensor<184x80x1x1xbf16> loc(#loc)
    %95 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_151/biases"} -> tensor<80xbf16> loc(#loc)
    %96 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_151/weights"} -> tensor<80x184x1x1xbf16> loc(#loc)
    %97 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_142/biases"} -> tensor<184xbf16> loc(#loc)
    %98 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_142/weights"} -> tensor<184x1x3x3xbf16> loc(#loc)
    %99 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_133/biases"} -> tensor<184xbf16> loc(#loc)
    %100 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_133/weights"} -> tensor<184x80x1x1xbf16> loc(#loc)
    %101 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_131/biases"} -> tensor<80xbf16> loc(#loc)
    %102 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_131/weights"} -> tensor<80x200x1x1xbf16> loc(#loc)
    %103 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_122/biases"} -> tensor<200xbf16> loc(#loc)
    %104 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_122/weights"} -> tensor<200x1x3x3xbf16> loc(#loc)
    %105 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_113/biases"} -> tensor<200xbf16> loc(#loc)
    %106 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_113/weights"} -> tensor<200x80x1x1xbf16> loc(#loc)
    %107 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_112/biases"} -> tensor<80xbf16> loc(#loc)
    %108 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_112/weights"} -> tensor<80x240x1x1xbf16> loc(#loc)
    %109 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_103/biases"} -> tensor<240xbf16> loc(#loc)
    %110 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_103/weights"} -> tensor<240x1x3x3xbf16> loc(#loc)
    %111 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_94/biases"} -> tensor<240xbf16> loc(#loc)
    %112 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_94/weights"} -> tensor<240x40x1x1xbf16> loc(#loc)
    %113 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_92/biases"} -> tensor<40xbf16> loc(#loc)
    %114 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_92/weights"} -> tensor<40x120x1x1xbf16> loc(#loc)
    %115 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_83/biases"} -> tensor<120xbf16> loc(#loc)
    %116 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_83/weights"} -> tensor<120x32x1x1xbf16> loc(#loc)
    %117 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_81/biases"} -> tensor<32xbf16> loc(#loc)
    %118 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_81/weights"} -> tensor<32x120x1x1xbf16> loc(#loc)
    %119 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_78/biases"} -> tensor<120xbf16> loc(#loc)
    %120 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_78/weights"} -> tensor<120x1x5x5xbf16> loc(#loc)
    %121 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_76/biases"} -> tensor<120xbf16> loc(#loc)
    %122 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_76/weights"} -> tensor<120x40x1x1xbf16> loc(#loc)
    %123 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_74/biases"} -> tensor<40xbf16> loc(#loc)
    %124 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_74/weights"} -> tensor<40x120x1x1xbf16> loc(#loc)
    %125 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_65/biases"} -> tensor<120xbf16> loc(#loc)
    %126 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_65/weights"} -> tensor<120x32x1x1xbf16> loc(#loc)
    %127 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_63/biases"} -> tensor<32xbf16> loc(#loc)
    %128 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_63/weights"} -> tensor<32x120x1x1xbf16> loc(#loc)
    %129 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_60/biases"} -> tensor<120xbf16> loc(#loc)
    %130 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_60/weights"} -> tensor<120x1x5x5xbf16> loc(#loc)
    %131 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_58/biases"} -> tensor<120xbf16> loc(#loc)
    %132 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_58/weights"} -> tensor<120x40x1x1xbf16> loc(#loc)
    %133 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_57/biases"} -> tensor<40xbf16> loc(#loc)
    %134 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_57/weights"} -> tensor<40x72x1x1xbf16> loc(#loc)
    %135 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_48/biases"} -> tensor<72xbf16> loc(#loc)
    %136 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_48/weights"} -> tensor<72x24x1x1xbf16> loc(#loc)
    %137 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_46/biases"} -> tensor<24xbf16> loc(#loc)
    %138 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_46/weights"} -> tensor<24x72x1x1xbf16> loc(#loc)
    %139 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_43/biases"} -> tensor<72xbf16> loc(#loc)
    %140 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_43/weights"} -> tensor<72x1x5x5xbf16> loc(#loc)
    %141 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_41/biases"} -> tensor<72xbf16> loc(#loc)
    %142 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_41/weights"} -> tensor<72x24x1x1xbf16> loc(#loc)
    %143 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_39/biases"} -> tensor<24xbf16> loc(#loc)
    %144 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_39/weights"} -> tensor<24x72x1x1xbf16> loc(#loc)
    %145 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_37/biases"} -> tensor<72xbf16> loc(#loc)
    %146 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_37/weights"} -> tensor<72x1x3x3xbf16> loc(#loc)
    %147 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_35/biases"} -> tensor<72xbf16> loc(#loc)
    %148 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_35/weights"} -> tensor<72x24x1x1xbf16> loc(#loc)
    %149 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_34/biases"} -> tensor<24xbf16> loc(#loc)
    %150 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_34/weights"} -> tensor<24x64x1x1xbf16> loc(#loc)
    %151 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_32/biases"} -> tensor<64xbf16> loc(#loc)
    %152 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_32/weights"} -> tensor<64x1x3x3xbf16> loc(#loc)
    %153 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_30/biases"} -> tensor<64xbf16> loc(#loc)
    %154 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_30/weights"} -> tensor<64x16x1x1xbf16> loc(#loc)
    %155 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_28/biases"} -> tensor<16xbf16> loc(#loc)
    %156 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_28/weights"} -> tensor<16x16x1x1xbf16> loc(#loc)
    %157 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_26/biases"} -> tensor<16xbf16> loc(#loc)
    %158 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_26/weights"} -> tensor<16x1x3x3xbf16> loc(#loc)
    %159 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_17/biases"} -> tensor<16xbf16> loc(#loc)
    %160 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_17/weights"} -> tensor<16x3x3x3xbf16> loc(#loc)
    %161 = xten_nn.load_external_const {file = "constants.h5", key = "Div_16/Constant_1_0"} -> tensor<1x3x180x320xbf16> loc(#loc6)
    %162 = xten_nn.load_external_const {file = "constants.h5", key = "Sub_14/Constant_1_0"} -> tensor<1x3x180x320xbf16> loc(#loc320)
    %163 = xten_nn.subgraph (%arg5 = %0: tensor<1x180x320x4xbf16>)  attributes {
      LayerName = "Slice_7_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "387",
          Port = "data_io.ifm",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 180, 320, 4]> : vector<4xindex>
        }
      ],
      OutputName = "Slice_7",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "392",
          Port = "data_io.ofm",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 180, 320, 3]> : vector<4xindex>
        }
      ],
      Specializes = "SliceHCWC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.axis_letter = "W",
        config.dim_c = 184 : ui32,
        config.dim_h = 320 : ui32,
        config.dim_w = 4 : ui32,
        config.dtype = "bfloat16",
        config.end = 3 : ui32,
        config.start = 0 : ui32,
        config.step = 1 : ui32
      }} {
      %461 = tosa.slice %arg5 {
        LayerName = "Slice_7",
        OutputName = "Slice_7",
        size = array<i64: 1, 180, 320, 3>,
        start = array<i64: 0, 0, 0, 0>} : (tensor<1x180x320x4xbf16>) -> tensor<1x180x320x3xbf16> loc(#loc9)
      xten_nn.output %461 : tensor<1x180x320x3xbf16> loc(#loc9)
    } -> tensor<1x180x320x3xbf16> loc(#loc9)
    %164 = xten_nn.subgraph (%arg5 = %163: tensor<1x180x320x3xbf16>)  attributes {
      LayerName = "CompilerGenerated_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Port = "data_io.ifm",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 180, 320, 3]> : vector<4xindex>
        }
      ],
      OutputName = "CompilerGenerated_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Port = "data_io.ofm",
          l3_extend_end = dense<[0, 4, 0, 5]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 180, 320, 3]> : vector<4xindex>
        }
      ],
      Specializes = "BufferPadAdf",
      With = {
        config.aie_arch = "aie2p",
        config.dim_0 = 320 : ui32,
        config.dim_0_padded = 320 : ui32,
        config.dim_1 = 23 : ui32,
        config.dim_1_padded = 23 : ui32,
        config.dim_2 = 3 : ui32,
        config.dim_2_padded = 8 : ui32,
        config.dim_3 = 8 : ui32,
        config.dim_3_padded = 8 : ui32,
        config.dtype = "bfloat16"
      }} {
      xten_nn.output %arg5 : tensor<1x180x320x3xbf16> loc(#loc10)
    } -> tensor<1x180x320x3xbf16> loc(#loc10)
    %165 = xten_nn.subgraph (%arg5 = %164: tensor<1x180x320x3xbf16>)  attributes {
      LayerName = "Slice_7_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "387",
          Port = "data_io.ifm",
          l3_extend_end = dense<[0, 4, 0, 5]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 180, 320, 3]> : vector<4xindex>
        }
      ],
      OutputName = "Add_445_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "911",
          Port = "data_io.ofm",
          l3_extend_end = dense<[0, 5, 4, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
        }
      ],
      Specializes = "Transpose4dAdf",
      With = {
        config.aie_arch = "aie2p",
        config.dim_0 = 320 : ui32,
        config.dim_1 = 23 : ui32,
        config.dim_2 = 8 : ui32,
        config.dim_3 = 8 : ui32,
        config.dtype = "bfloat16",
        config.perm = 10 : ui32
      }} {
      %461 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc11)
      %462 = tosa.transpose %arg5, %461 : (tensor<1x180x320x3xbf16>, tensor<4xi32>) -> tensor<1x3x180x320xbf16> loc(#loc322)
      xten_nn.output %462 : tensor<1x3x180x320xbf16> loc(#loc322)
    } -> tensor<1x3x180x320xbf16> loc(#loc321)
    %166 = xten_nn.subgraph (%arg5 = %165: tensor<1x3x180x320xbf16>)  attributes {
      LayerName = "CompilerGenerated_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Port = "data_io.ifm",
          l3_extend_end = dense<[0, 5, 4, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
        }
      ],
      OutputName = "CompilerGenerated_Duplicated#1",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Port = "data_io.ofm",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
        }
      ],
      Specializes = "BufferUnpadAdf",
      With = {
        config.aie_arch = "aie2p",
        config.dim_0 = 184 : ui32,
        config.dim_0_unpadded = 180 : ui32,
        config.dim_1 = 1 : ui32,
        config.dim_1_unpadded = 1 : ui32,
        config.dim_2 = 320 : ui32,
        config.dim_2_unpadded = 320 : ui32,
        config.dim_3 = 8 : ui32,
        config.dim_3_unpadded = 8 : ui32,
        config.dtype = "bfloat16"
      }} {
      xten_nn.output %arg5 : tensor<1x3x180x320xbf16> loc(#loc10)
    } -> tensor<1x3x180x320xbf16> loc(#loc10)
    %167 = xten_nn.subgraph (%arg5 = %166: tensor<1x3x180x320xbf16>)  attributes {
      LayerName = "AveragePool_346",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "393",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
        }
      ],
      OutputName = "AveragePool_346",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "778",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 90, 160]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x3x180x320xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        HWPaddingNotCounted = [[0, 0], [0, 0]],
        LayerName = "AveragePool_346",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "393",
            Port = "data_io.ifm",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
          }
        ],
        OutputName = "AveragePool_346",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "778",
            Port = "data_io.ofm",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 3, 90, 160]> : vector<4xindex>
          }
        ],
        Specializes = "AvgPool2dBf16",
        With = {
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.ksize = 2 : ui8,
          config.stride_log2 = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc12)
        %464 = tosa.transpose %arg6, %463 : (tensor<1x3x180x320xbf16>, tensor<4xi32>) -> tensor<1x180x320x3xbf16> loc(#loc12)
        %465 = tosa.avg_pool2d %464 {
          PartOfLayerName = "AveragePool_346",
          PartOfOutputName = "AveragePool_346",
          acc_type = f32,
          kernel = array<i64: 2, 2>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 2, 2>} : (tensor<1x180x320x3xbf16>) -> tensor<1x90x160x3xbf16> loc(#loc12)
        %466 = tosa.transpose %465, %462 : (tensor<1x90x160x3xbf16>, tensor<4xi32>) -> tensor<1x3x90x160xbf16> loc(#loc12)
        xten_nn.output %466 : tensor<1x3x90x160xbf16> loc(#loc12)
      } -> tensor<1x3x90x160xbf16> loc(#loc12)
      xten_nn.output %461 : tensor<1x3x90x160xbf16> loc(#loc12)
    } -> tensor<1x3x90x160xbf16> loc(#loc12)
    %168 = xten_nn.subgraph (%arg5 = %167: tensor<1x3x90x160xbf16>)  attributes {
      LayerName = "AveragePool_347",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "778",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "AveragePool_347",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "779",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 45, 80]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x3x90x160xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        HWPaddingNotCounted = [[0, 0], [0, 0]],
        LayerName = "AveragePool_347",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "778",
            Port = "data_io.ifm",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 3, 90, 160]> : vector<4xindex>
          }
        ],
        OutputName = "AveragePool_347",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "779",
            Port = "data_io.ofm",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 3, 45, 80]> : vector<4xindex>
          }
        ],
        Specializes = "AvgPool2dBf16",
        With = {
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.ksize = 2 : ui8,
          config.stride_log2 = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc13)
        %464 = tosa.transpose %arg6, %463 : (tensor<1x3x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x3xbf16> loc(#loc13)
        %465 = tosa.avg_pool2d %464 {
          PartOfLayerName = "AveragePool_347",
          PartOfOutputName = "AveragePool_347",
          acc_type = f32,
          kernel = array<i64: 2, 2>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 2, 2>} : (tensor<1x90x160x3xbf16>) -> tensor<1x45x80x3xbf16> loc(#loc13)
        %466 = tosa.transpose %465, %462 : (tensor<1x45x80x3xbf16>, tensor<4xi32>) -> tensor<1x3x45x80xbf16> loc(#loc13)
        xten_nn.output %466 : tensor<1x3x45x80xbf16> loc(#loc13)
      } -> tensor<1x3x45x80xbf16> loc(#loc13)
      xten_nn.output %461 : tensor<1x3x45x80xbf16> loc(#loc13)
    } -> tensor<1x3x45x80xbf16> loc(#loc13)
    %169 = xten_nn.subgraph (%arg5 = %168: tensor<1x3x45x80xbf16>)  attributes {
      LayerName = "AveragePool_348",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "779",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 45, 80]> : vector<4xindex>
        }
      ],
      OutputName = "AveragePool_348",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "780",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x3x45x80xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 1], [0, 0]],
        HWPaddingNotCounted = [[0, 1], [0, 0]],
        LayerName = "AveragePool_348",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "779",
            Port = "data_io.ifm",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 3, 45, 80]> : vector<4xindex>
          }
        ],
        OutputName = "AveragePool_348",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "780",
            Port = "data_io.ofm",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 3, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "AvgPool2dBf16",
        With = {
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.ksize = 2 : ui8,
          config.stride_log2 = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc14)
        %464 = tosa.transpose %arg6, %463 : (tensor<1x3x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x3xbf16> loc(#loc14)
        %465 = tosa.avg_pool2d %464 {
          PartOfLayerName = "AveragePool_348",
          PartOfOutputName = "AveragePool_348",
          acc_type = f32,
          kernel = array<i64: 2, 2>,
          pad = array<i64: 0, 1, 0, 0>,
          stride = array<i64: 2, 2>} : (tensor<1x45x80x3xbf16>) -> tensor<1x23x40x3xbf16> loc(#loc14)
        %466 = tosa.transpose %465, %462 : (tensor<1x23x40x3xbf16>, tensor<4xi32>) -> tensor<1x3x23x40xbf16> loc(#loc14)
        xten_nn.output %466 : tensor<1x3x23x40xbf16> loc(#loc14)
      } -> tensor<1x3x23x40xbf16> loc(#loc14)
      xten_nn.output %461 : tensor<1x3x23x40xbf16> loc(#loc14)
    } -> tensor<1x3x23x40xbf16> loc(#loc14)
    %170 = xten_nn.subgraph (%arg5 = %166: tensor<1x3x180x320xbf16>, %arg6 = %162: tensor<1x3x180x320xbf16>)  attributes {
      LayerName = "Sub_14",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "393",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "392",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
        }
      ],
      OutputName = "Sub_14",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "399",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x3x180x320xbf16>, %arg8 = %arg6: tensor<1x3x180x320xbf16>)  attributes {
        LayerName = "Sub_14",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "393",
            Port = "data_io.ifm1",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "392",
            Port = "data_io.ifm2",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
          }
        ],
        OutputName = "Sub_14",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "399",
            Port = "data_io.ofm",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
          }
        ],
        Specializes = "AddBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.act = 0 : ui8,
          config.act_type = "LINEAR",
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.add %arg7, %arg8 {LayerName = "Sub_14", OutputName = "Initializer_398"} : (tensor<1x3x180x320xbf16>, tensor<1x3x180x320xbf16>) -> tensor<1x3x180x320xbf16> loc(#loc320)
        xten_nn.output %462 : tensor<1x3x180x320xbf16> loc(#loc320)
      } -> tensor<1x3x180x320xbf16> loc(#loc320)
      xten_nn.output %461 : tensor<1x3x180x320xbf16> loc(#loc320)
    } -> tensor<1x3x180x320xbf16> loc(#loc320)
    %171 = xten_nn.subgraph (%arg5 = %170: tensor<1x3x180x320xbf16>, %arg6 = %161: tensor<1x3x180x320xbf16>)  attributes {
      LayerName = "Div_16",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "399",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "393",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
        }
      ],
      OutputName = "Div_16",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "401",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x3x180x320xbf16>, %arg8 = %arg6: tensor<1x3x180x320xbf16>)  attributes {
        LayerName = "Div_16",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "399",
            Port = "data_io.ifm1",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "393",
            Port = "data_io.ifm2",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
          }
        ],
        OutputName = "Div_16",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "401",
            Port = "data_io.ofm",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          OutputName = "Div_16",
          PartOfLayerName = "Div_16",
          shift = 0 : i8} : (tensor<1x3x180x320xbf16>, tensor<1x3x180x320xbf16>) -> tensor<1x3x180x320xbf16> loc(#loc6)
        xten_nn.output %462 : tensor<1x3x180x320xbf16> loc(#loc6)
      } -> tensor<1x3x180x320xbf16> loc(#loc6)
      xten_nn.output %461 : tensor<1x3x180x320xbf16> loc(#loc6)
    } -> tensor<1x3x180x320xbf16> loc(#loc6)
    %172 = xten_nn.subgraph (%arg5 = %171: tensor<1x3x180x320xbf16>, %arg6 = %160: tensor<16x3x3x3xbf16>, %arg7 = %159: tensor<16xbf16>)  attributes {
      LayerName = "Conv_17",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "401",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
        },
        {
          Name = "399",
          UnknownDataFormat = true,
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[16, 3, 3, 3]> : vector<4xindex>
        },
        {
          Name = "929",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_17",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "928",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x3x180x320xbf16>, %arg9 = %arg6: tensor<16x3x3x3xbf16>, %arg10 = %arg7: tensor<16xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 0], [1, 0]],
        LayerName = "Conv_17",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "401",
            Port = "data_io.ifm",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
          },
          {
            Name = "399",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[16, 3, 3, 3]> : vector<4xindex>
          },
          {
            Name = "929",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_17",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "928",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 3 : ui8,
          config.ksize.width = 3 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 2 : ui8,
          config.stride_w = 2 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = tosa.transpose %arg9, %463 : (tensor<16x3x3x3xbf16>, tensor<4xi32>) -> tensor<16x3x3x3xbf16> loc(#loc15)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x3x180x320xbf16>, tensor<4xi32>) -> tensor<1x180x320x3xbf16> loc(#loc15)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_17",
          PartOfOutputName = "Conv_17",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 0, 1, 0>,
          stride = array<i64: 2, 2>} : (tensor<1x180x320x3xbf16>, tensor<16x3x3x3xbf16>, tensor<16xbf16>) -> tensor<1x90x160x16xbf16> loc(#loc15)
        %467 = tosa.transpose %466, %462 : (tensor<1x90x160x16xbf16>, tensor<4xi32>) -> tensor<1x16x90x160xbf16> loc(#loc15)
        xten_nn.output %467 : tensor<1x16x90x160xbf16> loc(#loc15)
      } -> tensor<1x16x90x160xbf16> loc(#loc15)
      xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc15)
    } -> tensor<1x16x90x160xbf16> loc(#loc15)
    %173 = xten_nn.subgraph (%arg5 = %172: tensor<1x16x90x160xbf16>)  attributes {
      LayerName = "Add_19",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "928",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "Add_19",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "405",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x16x90x160xbf16>)  attributes {
        LayerName = "Add_19",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "928",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        OutputName = "Add_19",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "405",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_19", OutputName = "Add_19"} : (tensor<1x16x90x160xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc16)
        xten_nn.output %463 : tensor<1x16x90x160xbf16> loc(#loc16)
      } -> tensor<1x16x90x160xbf16> loc(#loc16)
      xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc16)
    } -> tensor<1x16x90x160xbf16> loc(#loc16)
    %174 = xten_nn.subgraph (%arg5 = %173: tensor<1x16x90x160xbf16>)  attributes {
      LayerName = "Clip_22",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "405",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_22",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "408",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x16x90x160xbf16>)  attributes {
        LayerName = "Clip_22",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "405",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_22",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "408",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_22",
          OutputName = "Clip_22",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc17)
        xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc17)
      } -> tensor<1x16x90x160xbf16> loc(#loc17)
      xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc17)
    } -> tensor<1x16x90x160xbf16> loc(#loc17)
    %175 = xten_nn.subgraph (%arg5 = %174: tensor<1x16x90x160xbf16>)  attributes {
      LayerName = "Div_24",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "408",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "Div_24",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "410",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x16x90x160xbf16>)  attributes {
        LayerName = "Div_24",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "408",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        OutputName = "Div_24",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "410",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_24",
          OutputName = "Div_24",
          shift = 0 : i8} : (tensor<1x16x90x160xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc18)
        xten_nn.output %463 : tensor<1x16x90x160xbf16> loc(#loc18)
      } -> tensor<1x16x90x160xbf16> loc(#loc18)
      xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc18)
    } -> tensor<1x16x90x160xbf16> loc(#loc18)
    %176 = xten_nn.subgraph (%arg5 = %172: tensor<1x16x90x160xbf16>, %arg6 = %175: tensor<1x16x90x160xbf16>)  attributes {
      LayerName = "Mul_25",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "928",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "401",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_25",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "411",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x16x90x160xbf16>, %arg8 = %arg6: tensor<1x16x90x160xbf16>)  attributes {
        LayerName = "Mul_25",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "928",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "401",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_25",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "411",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_25",
          OutputName = "Mul_25",
          shift = 0 : i8} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc19)
        xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc19)
      } -> tensor<1x16x90x160xbf16> loc(#loc19)
      xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc19)
    } -> tensor<1x16x90x160xbf16> loc(#loc19)
    %177 = xten_nn.subgraph (%arg5 = %176: tensor<1x16x90x160xbf16>, %arg6 = %158: tensor<16x1x3x3xbf16>, %arg7 = %157: tensor<16xbf16>)  attributes {
      LayerName = "Conv_26",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "411",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "CMHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "928",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[16, 1, 3, 3]> : vector<4xindex>
        },
        {
          Name = "411",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_27",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "414",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x16x90x160xbf16>, %arg9 = %arg6: tensor<16x1x3x3xbf16>, %arg10 = %arg7: tensor<16xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 1], [1, 1]],
        LayerName = "Conv_26",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "411",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "CMHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "928",
            Port = "data_io.wts",
            SubPort = "wts_data",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[16, 1, 3, 3]> : vector<4xindex>
          },
          {
            Name = "411",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_27",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "414",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        Specializes = "DepthwiseConv2dBf16",
        Traits = {
          NonNegativeOut = true
        },
        With = {
          config.act = 1 : ui8,
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.kernel_height = 3 : ui8,
          config.kernel_width = 3 : ui8,
          config.stride = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc323)
        %465 = tosa.transpose %arg9, %464 : (tensor<16x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x16x1xbf16> loc(#loc323)
        %466 = tosa.transpose %arg8, %463 : (tensor<1x16x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x16xbf16> loc(#loc323)
        %467 = tosa.depthwise_conv2d %466, %465, %arg10 {
          PartOfLayerName = "Conv_26",
          PartOfOutputName = "Conv_26",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 1, 1, 1>,
          stride = array<i64: 1, 1>} : (tensor<1x90x160x16xbf16>, tensor<3x3x16x1xbf16>, tensor<16xbf16>) -> tensor<1x90x160x16xbf16> loc(#loc20)
        %468 = tosa.clamp %467 {
          LayerName = "Relu_27",
          OutputName = "Relu_27",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x90x160x16xbf16>) -> tensor<1x90x160x16xbf16> loc(#loc21)
        %469 = tosa.transpose %468, %462 : (tensor<1x90x160x16xbf16>, tensor<4xi32>) -> tensor<1x16x90x160xbf16> loc(#loc323)
        xten_nn.output %469 : tensor<1x16x90x160xbf16> loc(#loc21)
      } -> tensor<1x16x90x160xbf16> loc(#loc323)
      xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc323)
    } -> tensor<1x16x90x160xbf16> loc(#loc323)
    %178 = xten_nn.subgraph (%arg5 = %177: tensor<1x16x90x160xbf16>, %arg6 = %156: tensor<16x16x1x1xbf16>, %arg7 = %155: tensor<16xbf16>, %arg8 = %176: tensor<1x16x90x160xbf16>)  attributes {
      LayerName = "Conv_28",
      OfmShare = 3 : index,
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "414",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        },
        {
          Name = "931",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[16, 16, 1, 1]> : vector<4xindex>
        },
        {
          Name = "935",
          UnknownDataFormat = true
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "414",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "Add_29",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "417",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x16x90x160xbf16>, %arg10 = %arg6: tensor<16x16x1x1xbf16>, %arg11 = %arg7: tensor<16xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_28",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "414",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          },
          {
            Name = "931",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[16, 16, 1, 1]> : vector<4xindex>
          },
          {
            Name = "935",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_28",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "934",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc22)
        %465 = tosa.reshape %arg10 {new_shape = array<i64: 16, 1, 1, 16>} : (tensor<16x16x1x1xbf16>) -> tensor<16x1x1x16xbf16> loc(#loc22)
        %466 = tosa.transpose %arg9, %464 : (tensor<1x16x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x16xbf16> loc(#loc22)
        %467 = tosa.conv2d %466, %465, %arg11 {
          PartOfLayerName = "Conv_28",
          PartOfOutputName = "Conv_28",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x90x160x16xbf16>, tensor<16x1x1x16xbf16>, tensor<16xbf16>) -> tensor<1x90x160x16xbf16> loc(#loc22)
        %468 = tosa.transpose %467, %463 : (tensor<1x90x160x16xbf16>, tensor<4xi32>) -> tensor<1x16x90x160xbf16> loc(#loc22)
        xten_nn.output %468 : tensor<1x16x90x160xbf16> loc(#loc22)
      } -> tensor<1x16x90x160xbf16> loc(#loc22)
      %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x16x90x160xbf16>, %arg10 = %arg8: tensor<1x16x90x160xbf16>)  attributes {
        LayerName = "Add_29",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "934",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "414",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        OutputName = "Add_29",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "417",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        Specializes = "AddBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.act = 0 : ui8,
          config.act_type = "LINEAR",
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %463 = tosa.add %arg9, %arg10 {LayerName = "Add_29", OutputName = "Add_29"} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc23)
        xten_nn.output %463 : tensor<1x16x90x160xbf16> loc(#loc23)
      } -> tensor<1x16x90x160xbf16> loc(#loc23)
      xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc23)
    } -> tensor<1x16x90x160xbf16> loc(#loc324)
    %179 = xten_nn.subgraph (%arg5 = %178: tensor<1x16x90x160xbf16>, %arg6 = %154: tensor<64x16x1x1xbf16>, %arg7 = %153: tensor<64xbf16>)  attributes {
      LayerName = "Conv_30",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "417",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        },
        {
          Name = "934",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[64, 16, 1, 1]> : vector<4xindex>
        },
        {
          Name = "417",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_31",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "420",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 90, 160]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x16x90x160xbf16>, %arg9 = %arg6: tensor<64x16x1x1xbf16>, %arg10 = %arg7: tensor<64xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_30",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "417",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          },
          {
            Name = "934",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[64, 16, 1, 1]> : vector<4xindex>
          },
          {
            Name = "417",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_31",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "420",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 90, 160]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true,
          NonNegativeOut = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 1 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 0.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 0.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc325)
        %464 = tosa.reshape %arg9 {new_shape = array<i64: 64, 1, 1, 16>} : (tensor<64x16x1x1xbf16>) -> tensor<64x1x1x16xbf16> loc(#loc325)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x16x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x16xbf16> loc(#loc325)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_30",
          PartOfOutputName = "Conv_30",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x90x160x16xbf16>, tensor<64x1x1x16xbf16>, tensor<64xbf16>) -> tensor<1x90x160x64xbf16> loc(#loc24)
        %467 = tosa.clamp %466 {
          LayerName = "Relu_31",
          OutputName = "Relu_31",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x90x160x64xbf16>) -> tensor<1x90x160x64xbf16> loc(#loc25)
        %468 = tosa.transpose %467, %462 : (tensor<1x90x160x64xbf16>, tensor<4xi32>) -> tensor<1x64x90x160xbf16> loc(#loc325)
        xten_nn.output %468 : tensor<1x64x90x160xbf16> loc(#loc25)
      } -> tensor<1x64x90x160xbf16> loc(#loc325)
      xten_nn.output %461 : tensor<1x64x90x160xbf16> loc(#loc325)
    } -> tensor<1x64x90x160xbf16> loc(#loc325)
    %180 = xten_nn.subgraph (%arg5 = %179: tensor<1x64x90x160xbf16>, %arg6 = %152: tensor<64x1x3x3xbf16>, %arg7 = %151: tensor<64xbf16>)  attributes {
      LayerName = "Conv_32",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "420",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 90, 160]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "CMHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "937",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[64, 1, 3, 3]> : vector<4xindex>
        },
        {
          Name = "941",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_33",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "423",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 45, 80]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x64x90x160xbf16>, %arg9 = %arg6: tensor<64x1x3x3xbf16>, %arg10 = %arg7: tensor<64xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 0], [1, 0]],
        LayerName = "Conv_32",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "420",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 90, 160]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "CMHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "937",
            Port = "data_io.wts",
            SubPort = "wts_data",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[64, 1, 3, 3]> : vector<4xindex>
          },
          {
            Name = "941",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_33",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "423",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 45, 80]> : vector<4xindex>
          }
        ],
        Specializes = "DepthwiseConv2dBf16",
        Traits = {
          NonNegativeOut = true
        },
        With = {
          config.act = 1 : ui8,
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.kernel_height = 3 : ui8,
          config.kernel_width = 3 : ui8,
          config.stride = 2 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc326)
        %465 = tosa.transpose %arg9, %464 : (tensor<64x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x64x1xbf16> loc(#loc326)
        %466 = tosa.transpose %arg8, %463 : (tensor<1x64x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x64xbf16> loc(#loc326)
        %467 = tosa.depthwise_conv2d %466, %465, %arg10 {
          PartOfLayerName = "Conv_32",
          PartOfOutputName = "Conv_32",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 0, 1, 0>,
          stride = array<i64: 2, 2>} : (tensor<1x90x160x64xbf16>, tensor<3x3x64x1xbf16>, tensor<64xbf16>) -> tensor<1x45x80x64xbf16> loc(#loc26)
        %468 = tosa.clamp %467 {
          LayerName = "Relu_33",
          OutputName = "Relu_33",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x45x80x64xbf16>) -> tensor<1x45x80x64xbf16> loc(#loc27)
        %469 = tosa.transpose %468, %462 : (tensor<1x45x80x64xbf16>, tensor<4xi32>) -> tensor<1x64x45x80xbf16> loc(#loc326)
        xten_nn.output %469 : tensor<1x64x45x80xbf16> loc(#loc27)
      } -> tensor<1x64x45x80xbf16> loc(#loc326)
      xten_nn.output %461 : tensor<1x64x45x80xbf16> loc(#loc326)
    } -> tensor<1x64x45x80xbf16> loc(#loc326)
    %181 = xten_nn.subgraph (%arg5 = %180: tensor<1x64x45x80xbf16>, %arg6 = %150: tensor<24x64x1x1xbf16>, %arg7 = %149: tensor<24xbf16>)  attributes {
      LayerName = "Conv_34",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "423",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 45, 80]> : vector<4xindex>
        },
        {
          Name = "940",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[24, 64, 1, 1]> : vector<4xindex>
        },
        {
          Name = "944",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_34",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "943",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x64x45x80xbf16>, %arg9 = %arg6: tensor<24x64x1x1xbf16>, %arg10 = %arg7: tensor<24xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_34",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "423",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 45, 80]> : vector<4xindex>
          },
          {
            Name = "940",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[24, 64, 1, 1]> : vector<4xindex>
          },
          {
            Name = "944",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_34",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "943",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc28)
        %464 = tosa.reshape %arg9 {new_shape = array<i64: 24, 1, 1, 64>} : (tensor<24x64x1x1xbf16>) -> tensor<24x1x1x64xbf16> loc(#loc28)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x64x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x64xbf16> loc(#loc28)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_34",
          PartOfOutputName = "Conv_34",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x45x80x64xbf16>, tensor<24x1x1x64xbf16>, tensor<24xbf16>) -> tensor<1x45x80x24xbf16> loc(#loc28)
        %467 = tosa.transpose %466, %462 : (tensor<1x45x80x24xbf16>, tensor<4xi32>) -> tensor<1x24x45x80xbf16> loc(#loc28)
        xten_nn.output %467 : tensor<1x24x45x80xbf16> loc(#loc28)
      } -> tensor<1x24x45x80xbf16> loc(#loc28)
      xten_nn.output %461 : tensor<1x24x45x80xbf16> loc(#loc28)
    } -> tensor<1x24x45x80xbf16> loc(#loc28)
    %182 = xten_nn.subgraph (%arg5 = %181: tensor<1x24x45x80xbf16>, %arg6 = %148: tensor<72x24x1x1xbf16>, %arg7 = %147: tensor<72xbf16>)  attributes {
      LayerName = "Conv_35",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "943",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex>
        },
        {
          Name = "423",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[72, 24, 1, 1]> : vector<4xindex>
        },
        {
          Name = "947",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_36",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "428",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x24x45x80xbf16>, %arg9 = %arg6: tensor<72x24x1x1xbf16>, %arg10 = %arg7: tensor<72xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_35",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "943",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex>
          },
          {
            Name = "423",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[72, 24, 1, 1]> : vector<4xindex>
          },
          {
            Name = "947",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_36",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "428",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true,
          NonNegativeOut = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 1 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 0.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 0.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc327)
        %464 = tosa.reshape %arg9 {new_shape = array<i64: 72, 1, 1, 24>} : (tensor<72x24x1x1xbf16>) -> tensor<72x1x1x24xbf16> loc(#loc327)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x24x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x24xbf16> loc(#loc327)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_35",
          PartOfOutputName = "Conv_35",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x45x80x24xbf16>, tensor<72x1x1x24xbf16>, tensor<72xbf16>) -> tensor<1x45x80x72xbf16> loc(#loc29)
        %467 = tosa.clamp %466 {
          LayerName = "Relu_36",
          OutputName = "Relu_36",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x45x80x72xbf16>) -> tensor<1x45x80x72xbf16> loc(#loc30)
        %468 = tosa.transpose %467, %462 : (tensor<1x45x80x72xbf16>, tensor<4xi32>) -> tensor<1x72x45x80xbf16> loc(#loc327)
        xten_nn.output %468 : tensor<1x72x45x80xbf16> loc(#loc30)
      } -> tensor<1x72x45x80xbf16> loc(#loc327)
      xten_nn.output %461 : tensor<1x72x45x80xbf16> loc(#loc327)
    } -> tensor<1x72x45x80xbf16> loc(#loc327)
    %183 = xten_nn.subgraph (%arg5 = %182: tensor<1x72x45x80xbf16>, %arg6 = %146: tensor<72x1x3x3xbf16>, %arg7 = %145: tensor<72xbf16>)  attributes {
      LayerName = "Conv_37",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "428",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "CMHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "946",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[72, 1, 3, 3]> : vector<4xindex>
        },
        {
          Name = "950",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_38",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "431",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x72x45x80xbf16>, %arg9 = %arg6: tensor<72x1x3x3xbf16>, %arg10 = %arg7: tensor<72xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 1], [1, 1]],
        LayerName = "Conv_37",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "428",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "CMHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "946",
            Port = "data_io.wts",
            SubPort = "wts_data",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[72, 1, 3, 3]> : vector<4xindex>
          },
          {
            Name = "950",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_38",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "431",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex>
          }
        ],
        Specializes = "DepthwiseConv2dBf16",
        Traits = {
          NonNegativeOut = true
        },
        With = {
          config.act = 1 : ui8,
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.kernel_height = 3 : ui8,
          config.kernel_width = 3 : ui8,
          config.stride = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc328)
        %465 = tosa.transpose %arg9, %464 : (tensor<72x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x72x1xbf16> loc(#loc328)
        %466 = tosa.transpose %arg8, %463 : (tensor<1x72x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x72xbf16> loc(#loc328)
        %467 = tosa.depthwise_conv2d %466, %465, %arg10 {
          PartOfLayerName = "Conv_37",
          PartOfOutputName = "Conv_37",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 1, 1, 1>,
          stride = array<i64: 1, 1>} : (tensor<1x45x80x72xbf16>, tensor<3x3x72x1xbf16>, tensor<72xbf16>) -> tensor<1x45x80x72xbf16> loc(#loc31)
        %468 = tosa.clamp %467 {
          LayerName = "Relu_38",
          OutputName = "Relu_38",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x45x80x72xbf16>) -> tensor<1x45x80x72xbf16> loc(#loc32)
        %469 = tosa.transpose %468, %462 : (tensor<1x45x80x72xbf16>, tensor<4xi32>) -> tensor<1x72x45x80xbf16> loc(#loc328)
        xten_nn.output %469 : tensor<1x72x45x80xbf16> loc(#loc32)
      } -> tensor<1x72x45x80xbf16> loc(#loc328)
      xten_nn.output %461 : tensor<1x72x45x80xbf16> loc(#loc328)
    } -> tensor<1x72x45x80xbf16> loc(#loc328)
    %184 = xten_nn.subgraph (%arg5 = %183: tensor<1x72x45x80xbf16>, %arg6 = %144: tensor<24x72x1x1xbf16>, %arg7 = %143: tensor<24xbf16>, %arg8 = %181: tensor<1x24x45x80xbf16>)  attributes {
      LayerName = "Conv_39",
      OfmShare = 3 : index,
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "431",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex>
        },
        {
          Name = "949",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[24, 72, 1, 1]> : vector<4xindex>
        },
        {
          Name = "953",
          UnknownDataFormat = true
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "431",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex>
        }
      ],
      OutputName = "Add_40",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "434",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x72x45x80xbf16>, %arg10 = %arg6: tensor<24x72x1x1xbf16>, %arg11 = %arg7: tensor<24xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_39",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "431",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex>
          },
          {
            Name = "949",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[24, 72, 1, 1]> : vector<4xindex>
          },
          {
            Name = "953",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_39",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "952",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc33)
        %465 = tosa.reshape %arg10 {new_shape = array<i64: 24, 1, 1, 72>} : (tensor<24x72x1x1xbf16>) -> tensor<24x1x1x72xbf16> loc(#loc33)
        %466 = tosa.transpose %arg9, %464 : (tensor<1x72x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x72xbf16> loc(#loc33)
        %467 = tosa.conv2d %466, %465, %arg11 {
          PartOfLayerName = "Conv_39",
          PartOfOutputName = "Conv_39",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x45x80x72xbf16>, tensor<24x1x1x72xbf16>, tensor<24xbf16>) -> tensor<1x45x80x24xbf16> loc(#loc33)
        %468 = tosa.transpose %467, %463 : (tensor<1x45x80x24xbf16>, tensor<4xi32>) -> tensor<1x24x45x80xbf16> loc(#loc33)
        xten_nn.output %468 : tensor<1x24x45x80xbf16> loc(#loc33)
      } -> tensor<1x24x45x80xbf16> loc(#loc33)
      %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x24x45x80xbf16>, %arg10 = %arg8: tensor<1x24x45x80xbf16>)  attributes {
        LayerName = "Add_40",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "952",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "431",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex>
          }
        ],
        OutputName = "Add_40",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "434",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex>
          }
        ],
        Specializes = "AddBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.act = 0 : ui8,
          config.act_type = "LINEAR",
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %463 = tosa.add %arg9, %arg10 {LayerName = "Add_40", OutputName = "Add_40"} : (tensor<1x24x45x80xbf16>, tensor<1x24x45x80xbf16>) -> tensor<1x24x45x80xbf16> loc(#loc34)
        xten_nn.output %463 : tensor<1x24x45x80xbf16> loc(#loc34)
      } -> tensor<1x24x45x80xbf16> loc(#loc34)
      xten_nn.output %462 : tensor<1x24x45x80xbf16> loc(#loc34)
    } -> tensor<1x24x45x80xbf16> loc(#loc329)
    %185 = xten_nn.subgraph (%arg5 = %184: tensor<1x24x45x80xbf16>, %arg6 = %142: tensor<72x24x1x1xbf16>, %arg7 = %141: tensor<72xbf16>)  attributes {
      LayerName = "Conv_41",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "434",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex>
        },
        {
          Name = "952",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[72, 24, 1, 1]> : vector<4xindex>
        },
        {
          Name = "434",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_42",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "437",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x24x45x80xbf16>, %arg9 = %arg6: tensor<72x24x1x1xbf16>, %arg10 = %arg7: tensor<72xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_41",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "434",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex>
          },
          {
            Name = "952",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[72, 24, 1, 1]> : vector<4xindex>
          },
          {
            Name = "434",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_42",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "437",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true,
          NonNegativeOut = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 1 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 0.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 0.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc330)
        %464 = tosa.reshape %arg9 {new_shape = array<i64: 72, 1, 1, 24>} : (tensor<72x24x1x1xbf16>) -> tensor<72x1x1x24xbf16> loc(#loc330)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x24x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x24xbf16> loc(#loc330)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_41",
          PartOfOutputName = "Conv_41",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x45x80x24xbf16>, tensor<72x1x1x24xbf16>, tensor<72xbf16>) -> tensor<1x45x80x72xbf16> loc(#loc35)
        %467 = tosa.clamp %466 {
          LayerName = "Relu_42",
          OutputName = "Relu_42",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x45x80x72xbf16>) -> tensor<1x45x80x72xbf16> loc(#loc36)
        %468 = tosa.transpose %467, %462 : (tensor<1x45x80x72xbf16>, tensor<4xi32>) -> tensor<1x72x45x80xbf16> loc(#loc330)
        xten_nn.output %468 : tensor<1x72x45x80xbf16> loc(#loc36)
      } -> tensor<1x72x45x80xbf16> loc(#loc330)
      xten_nn.output %461 : tensor<1x72x45x80xbf16> loc(#loc330)
    } -> tensor<1x72x45x80xbf16> loc(#loc330)
    %186 = xten_nn.subgraph (%arg5 = %185: tensor<1x72x45x80xbf16>, %arg6 = %140: tensor<72x1x5x5xbf16>, %arg7 = %139: tensor<72xbf16>)  attributes {
      LayerName = "Conv_43",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "437",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "CMHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "955",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[72, 1, 5, 5]> : vector<4xindex>
        },
        {
          Name = "959",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_44",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "440",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x72x45x80xbf16>, %arg9 = %arg6: tensor<72x1x5x5xbf16>, %arg10 = %arg7: tensor<72xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[2, 2], [2, 1]],
        LayerName = "Conv_43",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "437",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "CMHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "955",
            Port = "data_io.wts",
            SubPort = "wts_data",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[72, 1, 5, 5]> : vector<4xindex>
          },
          {
            Name = "959",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_44",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "440",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "DepthwiseConv2dBf16",
        Traits = {
          NonNegativeOut = true
        },
        With = {
          config.act = 1 : ui8,
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.kernel_height = 5 : ui8,
          config.kernel_width = 5 : ui8,
          config.stride = 2 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc331)
        %465 = tosa.transpose %arg9, %464 : (tensor<72x1x5x5xbf16>, tensor<4xi32>) -> tensor<5x5x72x1xbf16> loc(#loc331)
        %466 = tosa.transpose %arg8, %463 : (tensor<1x72x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x72xbf16> loc(#loc331)
        %467 = tosa.depthwise_conv2d %466, %465, %arg10 {
          PartOfLayerName = "Conv_43",
          PartOfOutputName = "Conv_43",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 2, 2, 2, 1>,
          stride = array<i64: 2, 2>} : (tensor<1x45x80x72xbf16>, tensor<5x5x72x1xbf16>, tensor<72xbf16>) -> tensor<1x23x40x72xbf16> loc(#loc37)
        %468 = tosa.clamp %467 {
          LayerName = "Relu_44",
          OutputName = "Relu_44",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x23x40x72xbf16>) -> tensor<1x23x40x72xbf16> loc(#loc38)
        %469 = tosa.transpose %468, %462 : (tensor<1x23x40x72xbf16>, tensor<4xi32>) -> tensor<1x72x23x40xbf16> loc(#loc331)
        xten_nn.output %469 : tensor<1x72x23x40xbf16> loc(#loc38)
      } -> tensor<1x72x23x40xbf16> loc(#loc331)
      xten_nn.output %461 : tensor<1x72x23x40xbf16> loc(#loc331)
    } -> tensor<1x72x23x40xbf16> loc(#loc331)
    %187 = xten_nn.subgraph (%arg5 = %186: tensor<1x72x23x40xbf16>)  attributes {
      LayerName = "GlobalAveragePool_45_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "440",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "GlobalAveragePool_45_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "441",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 1, 920]> : vector<4xindex>
        }
      ],
      Specializes = "Transpose4dAdf",
      With = {
        config.aie_arch = "aie2p",
        config.dim_0 = 23 : ui32,
        config.dim_1 = 9 : ui32,
        config.dim_2 = 40 : ui32,
        config.dim_3 = 8 : ui32,
        config.dtype = "bfloat16",
        config.perm = 6 : ui32
      }} {
      %461 = tosa.reshape %arg5 {new_shape = array<i64: 1, 72, 1, 920>} : (tensor<1x72x23x40xbf16>) -> tensor<1x72x1x920xbf16> loc(#loc39)
      xten_nn.output %461 : tensor<1x72x1x920xbf16> loc(#loc39)
    } -> tensor<1x72x1x920xbf16> loc(#loc39)
    %188 = xten_nn.subgraph (%arg5 = %187: tensor<1x72x1x920xbf16>)  attributes {
      LayerName = "GlobalAveragePool_45_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "440",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 1, 920]> : vector<4xindex>
        }
      ],
      OutputName = "GlobalAveragePool_45_Duplicated#1",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "441",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x72x1x920xbf16>)  attributes {
        LayerName = "GlobalAveragePool_45_Duplicated#1",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "440",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 72, 1, 920]> : vector<4xindex>
          }
        ],
        OutputName = "GlobalAveragePool_45_Duplicated#1",
        PadValue = 0.000000e+00 : bf16,
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "441",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "ReduceMeanC8Bf16",
        Traits = {
          Reduce = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.full_channel = 72 : ui32,
          config.full_height = 1 : ui32,
          config.full_width = 920 : ui32,
          config.reduce_dim = "W"
        }} {
        %462 = xten_nn.reduce_mean %arg6 {axes = array<i64: 3>, keepdims = 1 : i64} : (tensor<1x72x1x920xbf16>) -> tensor<1x72x1x1xbf16> loc(#loc39)
        xten_nn.output %462 : tensor<1x72x1x1xbf16> loc(#loc39)
      } -> tensor<1x72x1x1xbf16> loc(#loc39)
      xten_nn.output %461 : tensor<1x72x1x1xbf16> loc(#loc39)
    } -> tensor<1x72x1x1xbf16> loc(#loc39)
    %189 = xten_nn.subgraph (%arg5 = %188: tensor<1x72x1x1xbf16>, %arg6 = %138: tensor<24x72x1x1xbf16>, %arg7 = %137: tensor<24xbf16>)  attributes {
      LayerName = "Conv_46",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "441",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex>
        },
        {
          Name = "440",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[24, 72, 1, 1]> : vector<4xindex>
        },
        {
          Name = "backbone.features.4.block.2.fc1.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_47",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "443",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 24, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x72x1x1xbf16>, %arg9 = %arg6: tensor<24x72x1x1xbf16>, %arg10 = %arg7: tensor<24xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_46",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "441",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex>
          },
          {
            Name = "440",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[24, 72, 1, 1]> : vector<4xindex>
          },
          {
            Name = "backbone.features.4.block.2.fc1.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_47",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "443",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 24, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true,
          NonNegativeOut = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 1 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 0.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 0.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = tosa.reshape %arg9 {new_shape = array<i64: 24, 1, 1, 72>} : (tensor<24x72x1x1xbf16>) -> tensor<24x1x1x72xbf16> loc(#loc332)
        %463 = tosa.reshape %arg8 {new_shape = array<i64: 1, 1, 1, 72>} : (tensor<1x72x1x1xbf16>) -> tensor<1x1x1x72xbf16> loc(#loc332)
        %464 = tosa.conv2d %463, %462, %arg10 {
          PartOfLayerName = "Conv_46",
          PartOfOutputName = "Conv_46",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x1x1x72xbf16>, tensor<24x1x1x72xbf16>, tensor<24xbf16>) -> tensor<1x1x1x24xbf16> loc(#loc40)
        %465 = tosa.clamp %464 {
          LayerName = "Relu_47",
          OutputName = "Relu_47",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x1x1x24xbf16>) -> tensor<1x1x1x24xbf16> loc(#loc41)
        %466 = tosa.reshape %465 {new_shape = array<i64: 1, 24, 1, 1>} : (tensor<1x1x1x24xbf16>) -> tensor<1x24x1x1xbf16> loc(#loc332)
        xten_nn.output %466 : tensor<1x24x1x1xbf16> loc(#loc41)
      } -> tensor<1x24x1x1xbf16> loc(#loc332)
      xten_nn.output %461 : tensor<1x24x1x1xbf16> loc(#loc332)
    } -> tensor<1x24x1x1xbf16> loc(#loc332)
    %190 = xten_nn.subgraph (%arg5 = %189: tensor<1x24x1x1xbf16>, %arg6 = %136: tensor<72x24x1x1xbf16>, %arg7 = %135: tensor<72xbf16>)  attributes {
      LayerName = "Conv_48",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "443",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 24, 1, 1]> : vector<4xindex>
        },
        {
          Name = "442",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[72, 24, 1, 1]> : vector<4xindex>
        },
        {
          Name = "backbone.features.4.block.2.fc2.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_48",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "444",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x24x1x1xbf16>, %arg9 = %arg6: tensor<72x24x1x1xbf16>, %arg10 = %arg7: tensor<72xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_48",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "443",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 24, 1, 1]> : vector<4xindex>
          },
          {
            Name = "442",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[72, 24, 1, 1]> : vector<4xindex>
          },
          {
            Name = "backbone.features.4.block.2.fc2.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_48",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "444",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = tosa.reshape %arg9 {new_shape = array<i64: 72, 1, 1, 24>} : (tensor<72x24x1x1xbf16>) -> tensor<72x1x1x24xbf16> loc(#loc42)
        %463 = tosa.reshape %arg8 {new_shape = array<i64: 1, 1, 1, 24>} : (tensor<1x24x1x1xbf16>) -> tensor<1x1x1x24xbf16> loc(#loc42)
        %464 = tosa.conv2d %463, %462, %arg10 {
          PartOfLayerName = "Conv_48",
          PartOfOutputName = "Conv_48",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x1x1x24xbf16>, tensor<72x1x1x24xbf16>, tensor<72xbf16>) -> tensor<1x1x1x72xbf16> loc(#loc42)
        %465 = tosa.reshape %464 {new_shape = array<i64: 1, 72, 1, 1>} : (tensor<1x1x1x72xbf16>) -> tensor<1x72x1x1xbf16> loc(#loc42)
        xten_nn.output %465 : tensor<1x72x1x1xbf16> loc(#loc42)
      } -> tensor<1x72x1x1xbf16> loc(#loc42)
      xten_nn.output %461 : tensor<1x72x1x1xbf16> loc(#loc42)
    } -> tensor<1x72x1x1xbf16> loc(#loc42)
    %191 = xten_nn.subgraph (%arg5 = %190: tensor<1x72x1x1xbf16>)  attributes {
      LayerName = "Add_50",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "444",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Add_50",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "446",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x72x1x1xbf16>)  attributes {
        LayerName = "Add_50",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "444",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Add_50",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "446",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_50", OutputName = "Add_50"} : (tensor<1x72x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x72x1x1xbf16> loc(#loc43)
        xten_nn.output %463 : tensor<1x72x1x1xbf16> loc(#loc43)
      } -> tensor<1x72x1x1xbf16> loc(#loc43)
      xten_nn.output %461 : tensor<1x72x1x1xbf16> loc(#loc43)
    } -> tensor<1x72x1x1xbf16> loc(#loc43)
    %192 = xten_nn.subgraph (%arg5 = %191: tensor<1x72x1x1xbf16>)  attributes {
      LayerName = "Clip_53",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "446",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_53",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "449",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x72x1x1xbf16>)  attributes {
        LayerName = "Clip_53",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "446",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_53",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "449",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_53",
          OutputName = "Clip_53",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x72x1x1xbf16>) -> tensor<1x72x1x1xbf16> loc(#loc44)
        xten_nn.output %462 : tensor<1x72x1x1xbf16> loc(#loc44)
      } -> tensor<1x72x1x1xbf16> loc(#loc44)
      xten_nn.output %461 : tensor<1x72x1x1xbf16> loc(#loc44)
    } -> tensor<1x72x1x1xbf16> loc(#loc44)
    %193 = xten_nn.subgraph (%arg5 = %192: tensor<1x72x1x1xbf16>)  attributes {
      LayerName = "Div_55",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "449",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Div_55",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "451",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x72x1x1xbf16>)  attributes {
        LayerName = "Div_55",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "449",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Div_55",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "451",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_55",
          OutputName = "Div_55",
          shift = 0 : i8} : (tensor<1x72x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x72x1x1xbf16> loc(#loc45)
        xten_nn.output %463 : tensor<1x72x1x1xbf16> loc(#loc45)
      } -> tensor<1x72x1x1xbf16> loc(#loc45)
      xten_nn.output %461 : tensor<1x72x1x1xbf16> loc(#loc45)
    } -> tensor<1x72x1x1xbf16> loc(#loc45)
    %194 = xten_nn.subgraph (%arg5 = %193: tensor<1x72x1x1xbf16>)  attributes {
      LayerName = "Mul_56_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "451",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_56_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "452",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex>
        }
      ],
      Specializes = "TileAdf",
      With = {
        config.aie_arch = "aie2p",
        config.dtype = "bfloat16",
        config.i_dim_c = 72 : ui32,
        config.i_dim_h = 1 : ui32,
        config.i_dim_n = 1 : ui32,
        config.i_dim_w = 1 : ui32,
        config.rep_dim_c = 1 : ui32,
        config.rep_dim_h = 23 : ui32,
        config.rep_dim_w = 40 : ui32
      }} {
      %461 = tosa.tile %arg5 {multiples = array<i64: 1, 1, 23, 40>} : (tensor<1x72x1x1xbf16>) -> tensor<1x72x23x40xbf16> loc(#loc46)
      xten_nn.output %461 : tensor<1x72x23x40xbf16> loc(#loc46)
    } -> tensor<1x72x23x40xbf16> loc(#loc46)
    %195 = xten_nn.subgraph (%arg5 = %194: tensor<1x72x23x40xbf16>, %arg6 = %186: tensor<1x72x23x40xbf16>)  attributes {
      LayerName = "Mul_56_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "451",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "449",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_56_Duplicated#1",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "452",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x72x23x40xbf16>, %arg8 = %arg6: tensor<1x72x23x40xbf16>)  attributes {
        LayerName = "Mul_56_Duplicated#1",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "451",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "449",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_56_Duplicated#1",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "452",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_56",
          OutputName = "Mul_56",
          shift = 0 : i8} : (tensor<1x72x23x40xbf16>, tensor<1x72x23x40xbf16>) -> tensor<1x72x23x40xbf16> loc(#loc46)
        xten_nn.output %462 : tensor<1x72x23x40xbf16> loc(#loc46)
      } -> tensor<1x72x23x40xbf16> loc(#loc46)
      xten_nn.output %461 : tensor<1x72x23x40xbf16> loc(#loc46)
    } -> tensor<1x72x23x40xbf16> loc(#loc46)
    %196 = xten_nn.subgraph (%arg5 = %195: tensor<1x72x23x40xbf16>, %arg6 = %134: tensor<40x72x1x1xbf16>, %arg7 = %133: tensor<40xbf16>)  attributes {
      LayerName = "Conv_57",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "452",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex>
        },
        {
          Name = "451",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[40, 72, 1, 1]> : vector<4xindex>
        },
        {
          Name = "452",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_57",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "961",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x72x23x40xbf16>, %arg9 = %arg6: tensor<40x72x1x1xbf16>, %arg10 = %arg7: tensor<40xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_57",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "452",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex>
          },
          {
            Name = "451",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[40, 72, 1, 1]> : vector<4xindex>
          },
          {
            Name = "452",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_57",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "961",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc47)
        %464 = tosa.reshape %arg9 {new_shape = array<i64: 40, 1, 1, 72>} : (tensor<40x72x1x1xbf16>) -> tensor<40x1x1x72xbf16> loc(#loc47)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x72x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x72xbf16> loc(#loc47)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_57",
          PartOfOutputName = "Conv_57",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x23x40x72xbf16>, tensor<40x1x1x72xbf16>, tensor<40xbf16>) -> tensor<1x23x40x40xbf16> loc(#loc47)
        %467 = tosa.transpose %466, %462 : (tensor<1x23x40x40xbf16>, tensor<4xi32>) -> tensor<1x40x23x40xbf16> loc(#loc47)
        xten_nn.output %467 : tensor<1x40x23x40xbf16> loc(#loc47)
      } -> tensor<1x40x23x40xbf16> loc(#loc47)
      xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc47)
    } -> tensor<1x40x23x40xbf16> loc(#loc47)
    %197 = xten_nn.subgraph (%arg5 = %196: tensor<1x40x23x40xbf16>, %arg6 = %132: tensor<120x40x1x1xbf16>, %arg7 = %131: tensor<120xbf16>)  attributes {
      LayerName = "Conv_58",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "961",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        },
        {
          Name = "452",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[120, 40, 1, 1]> : vector<4xindex>
        },
        {
          Name = "965",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_59",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "457",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x40x23x40xbf16>, %arg9 = %arg6: tensor<120x40x1x1xbf16>, %arg10 = %arg7: tensor<120xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_58",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "961",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          },
          {
            Name = "452",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[120, 40, 1, 1]> : vector<4xindex>
          },
          {
            Name = "965",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_59",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "457",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true,
          NonNegativeOut = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 1 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 0.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 0.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc333)
        %464 = tosa.reshape %arg9 {new_shape = array<i64: 120, 1, 1, 40>} : (tensor<120x40x1x1xbf16>) -> tensor<120x1x1x40xbf16> loc(#loc333)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x40x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x40xbf16> loc(#loc333)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_58",
          PartOfOutputName = "Conv_58",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x23x40x40xbf16>, tensor<120x1x1x40xbf16>, tensor<120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc48)
        %467 = tosa.clamp %466 {
          LayerName = "Relu_59",
          OutputName = "Relu_59",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x23x40x120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc49)
        %468 = tosa.transpose %467, %462 : (tensor<1x23x40x120xbf16>, tensor<4xi32>) -> tensor<1x120x23x40xbf16> loc(#loc333)
        xten_nn.output %468 : tensor<1x120x23x40xbf16> loc(#loc49)
      } -> tensor<1x120x23x40xbf16> loc(#loc333)
      xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc333)
    } -> tensor<1x120x23x40xbf16> loc(#loc333)
    %198 = xten_nn.subgraph (%arg5 = %197: tensor<1x120x23x40xbf16>, %arg6 = %130: tensor<120x1x5x5xbf16>, %arg7 = %129: tensor<120xbf16>)  attributes {
      LayerName = "Conv_60",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "457",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "CMHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "964",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[120, 1, 5, 5]> : vector<4xindex>
        },
        {
          Name = "968",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_61",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "460",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x120x23x40xbf16>, %arg9 = %arg6: tensor<120x1x5x5xbf16>, %arg10 = %arg7: tensor<120xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[2, 2], [2, 2]],
        LayerName = "Conv_60",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "457",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "CMHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "964",
            Port = "data_io.wts",
            SubPort = "wts_data",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[120, 1, 5, 5]> : vector<4xindex>
          },
          {
            Name = "968",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_61",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "460",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "DepthwiseConv2dBf16",
        Traits = {
          NonNegativeOut = true
        },
        With = {
          config.act = 1 : ui8,
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.kernel_height = 5 : ui8,
          config.kernel_width = 5 : ui8,
          config.stride = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc334)
        %465 = tosa.transpose %arg9, %464 : (tensor<120x1x5x5xbf16>, tensor<4xi32>) -> tensor<5x5x120x1xbf16> loc(#loc334)
        %466 = tosa.transpose %arg8, %463 : (tensor<1x120x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x120xbf16> loc(#loc334)
        %467 = tosa.depthwise_conv2d %466, %465, %arg10 {
          PartOfLayerName = "Conv_60",
          PartOfOutputName = "Conv_60",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 2, 2, 2, 2>,
          stride = array<i64: 1, 1>} : (tensor<1x23x40x120xbf16>, tensor<5x5x120x1xbf16>, tensor<120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc50)
        %468 = tosa.clamp %467 {
          LayerName = "Relu_61",
          OutputName = "Relu_61",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x23x40x120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc51)
        %469 = tosa.transpose %468, %462 : (tensor<1x23x40x120xbf16>, tensor<4xi32>) -> tensor<1x120x23x40xbf16> loc(#loc334)
        xten_nn.output %469 : tensor<1x120x23x40xbf16> loc(#loc51)
      } -> tensor<1x120x23x40xbf16> loc(#loc334)
      xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc334)
    } -> tensor<1x120x23x40xbf16> loc(#loc334)
    %199 = xten_nn.subgraph (%arg5 = %198: tensor<1x120x23x40xbf16>)  attributes {
      LayerName = "GlobalAveragePool_62_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "460",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "GlobalAveragePool_62_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "461",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 920]> : vector<4xindex>
        }
      ],
      Specializes = "Transpose4dAdf",
      With = {
        config.aie_arch = "aie2p",
        config.dim_0 = 23 : ui32,
        config.dim_1 = 15 : ui32,
        config.dim_2 = 40 : ui32,
        config.dim_3 = 8 : ui32,
        config.dtype = "bfloat16",
        config.perm = 6 : ui32
      }} {
      %461 = tosa.reshape %arg5 {new_shape = array<i64: 1, 120, 1, 920>} : (tensor<1x120x23x40xbf16>) -> tensor<1x120x1x920xbf16> loc(#loc52)
      xten_nn.output %461 : tensor<1x120x1x920xbf16> loc(#loc52)
    } -> tensor<1x120x1x920xbf16> loc(#loc52)
    %200 = xten_nn.subgraph (%arg5 = %199: tensor<1x120x1x920xbf16>)  attributes {
      LayerName = "GlobalAveragePool_62_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "460",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 920]> : vector<4xindex>
        }
      ],
      OutputName = "GlobalAveragePool_62_Duplicated#1",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "461",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x920xbf16>)  attributes {
        LayerName = "GlobalAveragePool_62_Duplicated#1",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "460",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 920]> : vector<4xindex>
          }
        ],
        OutputName = "GlobalAveragePool_62_Duplicated#1",
        PadValue = 0.000000e+00 : bf16,
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "461",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "ReduceMeanC8Bf16",
        Traits = {
          Reduce = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.full_channel = 120 : ui32,
          config.full_height = 1 : ui32,
          config.full_width = 920 : ui32,
          config.reduce_dim = "W"
        }} {
        %462 = xten_nn.reduce_mean %arg6 {axes = array<i64: 3>, keepdims = 1 : i64} : (tensor<1x120x1x920xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc52)
        xten_nn.output %462 : tensor<1x120x1x1xbf16> loc(#loc52)
      } -> tensor<1x120x1x1xbf16> loc(#loc52)
      xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc52)
    } -> tensor<1x120x1x1xbf16> loc(#loc52)
    %201 = xten_nn.subgraph (%arg5 = %200: tensor<1x120x1x1xbf16>, %arg6 = %128: tensor<32x120x1x1xbf16>, %arg7 = %127: tensor<32xbf16>)  attributes {
      LayerName = "Conv_63",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "461",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        },
        {
          Name = "460",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[32, 120, 1, 1]> : vector<4xindex>
        },
        {
          Name = "backbone.features.5.block.2.fc1.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_64",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "463",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x120x1x1xbf16>, %arg9 = %arg6: tensor<32x120x1x1xbf16>, %arg10 = %arg7: tensor<32xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_63",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "461",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
          },
          {
            Name = "460",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[32, 120, 1, 1]> : vector<4xindex>
          },
          {
            Name = "backbone.features.5.block.2.fc1.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_64",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "463",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true,
          NonNegativeOut = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 1 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 0.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 0.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = tosa.reshape %arg9 {new_shape = array<i64: 32, 1, 1, 120>} : (tensor<32x120x1x1xbf16>) -> tensor<32x1x1x120xbf16> loc(#loc335)
        %463 = tosa.reshape %arg8 {new_shape = array<i64: 1, 1, 1, 120>} : (tensor<1x120x1x1xbf16>) -> tensor<1x1x1x120xbf16> loc(#loc335)
        %464 = tosa.conv2d %463, %462, %arg10 {
          PartOfLayerName = "Conv_63",
          PartOfOutputName = "Conv_63",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x1x1x120xbf16>, tensor<32x1x1x120xbf16>, tensor<32xbf16>) -> tensor<1x1x1x32xbf16> loc(#loc53)
        %465 = tosa.clamp %464 {
          LayerName = "Relu_64",
          OutputName = "Relu_64",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x1x1x32xbf16>) -> tensor<1x1x1x32xbf16> loc(#loc54)
        %466 = tosa.reshape %465 {new_shape = array<i64: 1, 32, 1, 1>} : (tensor<1x1x1x32xbf16>) -> tensor<1x32x1x1xbf16> loc(#loc335)
        xten_nn.output %466 : tensor<1x32x1x1xbf16> loc(#loc54)
      } -> tensor<1x32x1x1xbf16> loc(#loc335)
      xten_nn.output %461 : tensor<1x32x1x1xbf16> loc(#loc335)
    } -> tensor<1x32x1x1xbf16> loc(#loc335)
    %202 = xten_nn.subgraph (%arg5 = %201: tensor<1x32x1x1xbf16>, %arg6 = %126: tensor<120x32x1x1xbf16>, %arg7 = %125: tensor<120xbf16>)  attributes {
      LayerName = "Conv_65",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "463",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex>
        },
        {
          Name = "462",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[120, 32, 1, 1]> : vector<4xindex>
        },
        {
          Name = "backbone.features.5.block.2.fc2.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_65",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "464",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x32x1x1xbf16>, %arg9 = %arg6: tensor<120x32x1x1xbf16>, %arg10 = %arg7: tensor<120xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_65",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "463",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex>
          },
          {
            Name = "462",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[120, 32, 1, 1]> : vector<4xindex>
          },
          {
            Name = "backbone.features.5.block.2.fc2.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_65",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "464",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = tosa.reshape %arg9 {new_shape = array<i64: 120, 1, 1, 32>} : (tensor<120x32x1x1xbf16>) -> tensor<120x1x1x32xbf16> loc(#loc55)
        %463 = tosa.reshape %arg8 {new_shape = array<i64: 1, 1, 1, 32>} : (tensor<1x32x1x1xbf16>) -> tensor<1x1x1x32xbf16> loc(#loc55)
        %464 = tosa.conv2d %463, %462, %arg10 {
          PartOfLayerName = "Conv_65",
          PartOfOutputName = "Conv_65",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x1x1x32xbf16>, tensor<120x1x1x32xbf16>, tensor<120xbf16>) -> tensor<1x1x1x120xbf16> loc(#loc55)
        %465 = tosa.reshape %464 {new_shape = array<i64: 1, 120, 1, 1>} : (tensor<1x1x1x120xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc55)
        xten_nn.output %465 : tensor<1x120x1x1xbf16> loc(#loc55)
      } -> tensor<1x120x1x1xbf16> loc(#loc55)
      xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc55)
    } -> tensor<1x120x1x1xbf16> loc(#loc55)
    %203 = xten_nn.subgraph (%arg5 = %202: tensor<1x120x1x1xbf16>)  attributes {
      LayerName = "Add_67",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "464",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Add_67",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "466",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x1xbf16>)  attributes {
        LayerName = "Add_67",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "464",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Add_67",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "466",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_67", OutputName = "Add_67"} : (tensor<1x120x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc56)
        xten_nn.output %463 : tensor<1x120x1x1xbf16> loc(#loc56)
      } -> tensor<1x120x1x1xbf16> loc(#loc56)
      xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc56)
    } -> tensor<1x120x1x1xbf16> loc(#loc56)
    %204 = xten_nn.subgraph (%arg5 = %203: tensor<1x120x1x1xbf16>)  attributes {
      LayerName = "Clip_70",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "466",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_70",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "469",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x1xbf16>)  attributes {
        LayerName = "Clip_70",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "466",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_70",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "469",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_70",
          OutputName = "Clip_70",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x120x1x1xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc57)
        xten_nn.output %462 : tensor<1x120x1x1xbf16> loc(#loc57)
      } -> tensor<1x120x1x1xbf16> loc(#loc57)
      xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc57)
    } -> tensor<1x120x1x1xbf16> loc(#loc57)
    %205 = xten_nn.subgraph (%arg5 = %204: tensor<1x120x1x1xbf16>)  attributes {
      LayerName = "Div_72",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "469",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Div_72",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "471",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x1xbf16>)  attributes {
        LayerName = "Div_72",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "469",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Div_72",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "471",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_72",
          OutputName = "Div_72",
          shift = 0 : i8} : (tensor<1x120x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc58)
        xten_nn.output %463 : tensor<1x120x1x1xbf16> loc(#loc58)
      } -> tensor<1x120x1x1xbf16> loc(#loc58)
      xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc58)
    } -> tensor<1x120x1x1xbf16> loc(#loc58)
    %206 = xten_nn.subgraph (%arg5 = %205: tensor<1x120x1x1xbf16>)  attributes {
      LayerName = "Mul_73_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "471",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_73_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "472",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
        }
      ],
      Specializes = "TileAdf",
      With = {
        config.aie_arch = "aie2p",
        config.dtype = "bfloat16",
        config.i_dim_c = 120 : ui32,
        config.i_dim_h = 1 : ui32,
        config.i_dim_n = 1 : ui32,
        config.i_dim_w = 1 : ui32,
        config.rep_dim_c = 1 : ui32,
        config.rep_dim_h = 23 : ui32,
        config.rep_dim_w = 40 : ui32
      }} {
      %461 = tosa.tile %arg5 {multiples = array<i64: 1, 1, 23, 40>} : (tensor<1x120x1x1xbf16>) -> tensor<1x120x23x40xbf16> loc(#loc59)
      xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc59)
    } -> tensor<1x120x23x40xbf16> loc(#loc59)
    %207 = xten_nn.subgraph (%arg5 = %206: tensor<1x120x23x40xbf16>, %arg6 = %198: tensor<1x120x23x40xbf16>)  attributes {
      LayerName = "Mul_73_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "471",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "469",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_73_Duplicated#1",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "472",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x120x23x40xbf16>, %arg8 = %arg6: tensor<1x120x23x40xbf16>)  attributes {
        LayerName = "Mul_73_Duplicated#1",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "471",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "469",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_73_Duplicated#1",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "472",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_73",
          OutputName = "Mul_73",
          shift = 0 : i8} : (tensor<1x120x23x40xbf16>, tensor<1x120x23x40xbf16>) -> tensor<1x120x23x40xbf16> loc(#loc59)
        xten_nn.output %462 : tensor<1x120x23x40xbf16> loc(#loc59)
      } -> tensor<1x120x23x40xbf16> loc(#loc59)
      xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc59)
    } -> tensor<1x120x23x40xbf16> loc(#loc59)
    %208 = xten_nn.subgraph (%arg5 = %207: tensor<1x120x23x40xbf16>, %arg6 = %124: tensor<40x120x1x1xbf16>, %arg7 = %123: tensor<40xbf16>, %arg8 = %196: tensor<1x40x23x40xbf16>)  attributes {
      LayerName = "Conv_74",
      OfmShare = 3 : index,
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "472",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
        },
        {
          Name = "471",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[40, 120, 1, 1]> : vector<4xindex>
        },
        {
          Name = "472",
          UnknownDataFormat = true
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "472",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Add_75",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "475",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x120x23x40xbf16>, %arg10 = %arg6: tensor<40x120x1x1xbf16>, %arg11 = %arg7: tensor<40xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_74",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "472",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
          },
          {
            Name = "471",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[40, 120, 1, 1]> : vector<4xindex>
          },
          {
            Name = "472",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_74",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "970",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc60)
        %465 = tosa.reshape %arg10 {new_shape = array<i64: 40, 1, 1, 120>} : (tensor<40x120x1x1xbf16>) -> tensor<40x1x1x120xbf16> loc(#loc60)
        %466 = tosa.transpose %arg9, %464 : (tensor<1x120x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x120xbf16> loc(#loc60)
        %467 = tosa.conv2d %466, %465, %arg11 {
          PartOfLayerName = "Conv_74",
          PartOfOutputName = "Conv_74",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x23x40x120xbf16>, tensor<40x1x1x120xbf16>, tensor<40xbf16>) -> tensor<1x23x40x40xbf16> loc(#loc60)
        %468 = tosa.transpose %467, %463 : (tensor<1x23x40x40xbf16>, tensor<4xi32>) -> tensor<1x40x23x40xbf16> loc(#loc60)
        xten_nn.output %468 : tensor<1x40x23x40xbf16> loc(#loc60)
      } -> tensor<1x40x23x40xbf16> loc(#loc60)
      %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x40x23x40xbf16>, %arg10 = %arg8: tensor<1x40x23x40xbf16>)  attributes {
        LayerName = "Add_75",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "970",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "472",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          }
        ],
        OutputName = "Add_75",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "475",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "AddBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.act = 0 : ui8,
          config.act_type = "LINEAR",
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %463 = tosa.add %arg9, %arg10 {LayerName = "Add_75", OutputName = "Add_75"} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc61)
        xten_nn.output %463 : tensor<1x40x23x40xbf16> loc(#loc61)
      } -> tensor<1x40x23x40xbf16> loc(#loc61)
      xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc61)
    } -> tensor<1x40x23x40xbf16> loc(#loc336)
    %209 = xten_nn.subgraph (%arg5 = %208: tensor<1x40x23x40xbf16>, %arg6 = %122: tensor<120x40x1x1xbf16>, %arg7 = %121: tensor<120xbf16>)  attributes {
      LayerName = "Conv_76",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "475",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        },
        {
          Name = "970",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[120, 40, 1, 1]> : vector<4xindex>
        },
        {
          Name = "475",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_77",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "478",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x40x23x40xbf16>, %arg9 = %arg6: tensor<120x40x1x1xbf16>, %arg10 = %arg7: tensor<120xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_76",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "475",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          },
          {
            Name = "970",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[120, 40, 1, 1]> : vector<4xindex>
          },
          {
            Name = "475",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_77",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "478",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true,
          NonNegativeOut = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 1 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 0.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 0.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc337)
        %464 = tosa.reshape %arg9 {new_shape = array<i64: 120, 1, 1, 40>} : (tensor<120x40x1x1xbf16>) -> tensor<120x1x1x40xbf16> loc(#loc337)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x40x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x40xbf16> loc(#loc337)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_76",
          PartOfOutputName = "Conv_76",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x23x40x40xbf16>, tensor<120x1x1x40xbf16>, tensor<120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc62)
        %467 = tosa.clamp %466 {
          LayerName = "Relu_77",
          OutputName = "Relu_77",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x23x40x120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc63)
        %468 = tosa.transpose %467, %462 : (tensor<1x23x40x120xbf16>, tensor<4xi32>) -> tensor<1x120x23x40xbf16> loc(#loc337)
        xten_nn.output %468 : tensor<1x120x23x40xbf16> loc(#loc63)
      } -> tensor<1x120x23x40xbf16> loc(#loc337)
      xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc337)
    } -> tensor<1x120x23x40xbf16> loc(#loc337)
    %210 = xten_nn.subgraph (%arg5 = %209: tensor<1x120x23x40xbf16>, %arg6 = %120: tensor<120x1x5x5xbf16>, %arg7 = %119: tensor<120xbf16>)  attributes {
      LayerName = "Conv_78",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "478",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "CMHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "973",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[120, 1, 5, 5]> : vector<4xindex>
        },
        {
          Name = "977",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_79",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "481",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x120x23x40xbf16>, %arg9 = %arg6: tensor<120x1x5x5xbf16>, %arg10 = %arg7: tensor<120xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[2, 2], [2, 2]],
        LayerName = "Conv_78",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "478",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "CMHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "973",
            Port = "data_io.wts",
            SubPort = "wts_data",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[120, 1, 5, 5]> : vector<4xindex>
          },
          {
            Name = "977",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_79",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "481",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "DepthwiseConv2dBf16",
        Traits = {
          NonNegativeOut = true
        },
        With = {
          config.act = 1 : ui8,
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.kernel_height = 5 : ui8,
          config.kernel_width = 5 : ui8,
          config.stride = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc338)
        %465 = tosa.transpose %arg9, %464 : (tensor<120x1x5x5xbf16>, tensor<4xi32>) -> tensor<5x5x120x1xbf16> loc(#loc338)
        %466 = tosa.transpose %arg8, %463 : (tensor<1x120x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x120xbf16> loc(#loc338)
        %467 = tosa.depthwise_conv2d %466, %465, %arg10 {
          PartOfLayerName = "Conv_78",
          PartOfOutputName = "Conv_78",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 2, 2, 2, 2>,
          stride = array<i64: 1, 1>} : (tensor<1x23x40x120xbf16>, tensor<5x5x120x1xbf16>, tensor<120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc64)
        %468 = tosa.clamp %467 {
          LayerName = "Relu_79",
          OutputName = "Relu_79",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x23x40x120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc65)
        %469 = tosa.transpose %468, %462 : (tensor<1x23x40x120xbf16>, tensor<4xi32>) -> tensor<1x120x23x40xbf16> loc(#loc338)
        xten_nn.output %469 : tensor<1x120x23x40xbf16> loc(#loc65)
      } -> tensor<1x120x23x40xbf16> loc(#loc338)
      xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc338)
    } -> tensor<1x120x23x40xbf16> loc(#loc338)
    %211 = xten_nn.subgraph (%arg5 = %210: tensor<1x120x23x40xbf16>)  attributes {
      LayerName = "GlobalAveragePool_80_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "481",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "GlobalAveragePool_80_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "482",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 920]> : vector<4xindex>
        }
      ],
      Specializes = "Transpose4dAdf",
      With = {
        config.aie_arch = "aie2p",
        config.dim_0 = 23 : ui32,
        config.dim_1 = 15 : ui32,
        config.dim_2 = 40 : ui32,
        config.dim_3 = 8 : ui32,
        config.dtype = "bfloat16",
        config.perm = 6 : ui32
      }} {
      %461 = tosa.reshape %arg5 {new_shape = array<i64: 1, 120, 1, 920>} : (tensor<1x120x23x40xbf16>) -> tensor<1x120x1x920xbf16> loc(#loc66)
      xten_nn.output %461 : tensor<1x120x1x920xbf16> loc(#loc66)
    } -> tensor<1x120x1x920xbf16> loc(#loc66)
    %212 = xten_nn.subgraph (%arg5 = %211: tensor<1x120x1x920xbf16>)  attributes {
      LayerName = "GlobalAveragePool_80_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "481",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 920]> : vector<4xindex>
        }
      ],
      OutputName = "GlobalAveragePool_80_Duplicated#1",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "482",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x920xbf16>)  attributes {
        LayerName = "GlobalAveragePool_80_Duplicated#1",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "481",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 920]> : vector<4xindex>
          }
        ],
        OutputName = "GlobalAveragePool_80_Duplicated#1",
        PadValue = 0.000000e+00 : bf16,
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "482",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "ReduceMeanC8Bf16",
        Traits = {
          Reduce = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.full_channel = 120 : ui32,
          config.full_height = 1 : ui32,
          config.full_width = 920 : ui32,
          config.reduce_dim = "W"
        }} {
        %462 = xten_nn.reduce_mean %arg6 {axes = array<i64: 3>, keepdims = 1 : i64} : (tensor<1x120x1x920xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc66)
        xten_nn.output %462 : tensor<1x120x1x1xbf16> loc(#loc66)
      } -> tensor<1x120x1x1xbf16> loc(#loc66)
      xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc66)
    } -> tensor<1x120x1x1xbf16> loc(#loc66)
    %213 = xten_nn.subgraph (%arg5 = %212: tensor<1x120x1x1xbf16>, %arg6 = %118: tensor<32x120x1x1xbf16>, %arg7 = %117: tensor<32xbf16>)  attributes {
      LayerName = "Conv_81",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "482",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        },
        {
          Name = "481",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[32, 120, 1, 1]> : vector<4xindex>
        },
        {
          Name = "backbone.features.6.block.2.fc1.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_82",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "484",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x120x1x1xbf16>, %arg9 = %arg6: tensor<32x120x1x1xbf16>, %arg10 = %arg7: tensor<32xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_81",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "482",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
          },
          {
            Name = "481",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[32, 120, 1, 1]> : vector<4xindex>
          },
          {
            Name = "backbone.features.6.block.2.fc1.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_82",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "484",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true,
          NonNegativeOut = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 1 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 0.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 0.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = tosa.reshape %arg9 {new_shape = array<i64: 32, 1, 1, 120>} : (tensor<32x120x1x1xbf16>) -> tensor<32x1x1x120xbf16> loc(#loc339)
        %463 = tosa.reshape %arg8 {new_shape = array<i64: 1, 1, 1, 120>} : (tensor<1x120x1x1xbf16>) -> tensor<1x1x1x120xbf16> loc(#loc339)
        %464 = tosa.conv2d %463, %462, %arg10 {
          PartOfLayerName = "Conv_81",
          PartOfOutputName = "Conv_81",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x1x1x120xbf16>, tensor<32x1x1x120xbf16>, tensor<32xbf16>) -> tensor<1x1x1x32xbf16> loc(#loc67)
        %465 = tosa.clamp %464 {
          LayerName = "Relu_82",
          OutputName = "Relu_82",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x1x1x32xbf16>) -> tensor<1x1x1x32xbf16> loc(#loc68)
        %466 = tosa.reshape %465 {new_shape = array<i64: 1, 32, 1, 1>} : (tensor<1x1x1x32xbf16>) -> tensor<1x32x1x1xbf16> loc(#loc339)
        xten_nn.output %466 : tensor<1x32x1x1xbf16> loc(#loc68)
      } -> tensor<1x32x1x1xbf16> loc(#loc339)
      xten_nn.output %461 : tensor<1x32x1x1xbf16> loc(#loc339)
    } -> tensor<1x32x1x1xbf16> loc(#loc339)
    %214 = xten_nn.subgraph (%arg5 = %213: tensor<1x32x1x1xbf16>, %arg6 = %116: tensor<120x32x1x1xbf16>, %arg7 = %115: tensor<120xbf16>)  attributes {
      LayerName = "Conv_83",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "484",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex>
        },
        {
          Name = "483",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[120, 32, 1, 1]> : vector<4xindex>
        },
        {
          Name = "backbone.features.6.block.2.fc2.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_83",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "485",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x32x1x1xbf16>, %arg9 = %arg6: tensor<120x32x1x1xbf16>, %arg10 = %arg7: tensor<120xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_83",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "484",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex>
          },
          {
            Name = "483",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[120, 32, 1, 1]> : vector<4xindex>
          },
          {
            Name = "backbone.features.6.block.2.fc2.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_83",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "485",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = tosa.reshape %arg9 {new_shape = array<i64: 120, 1, 1, 32>} : (tensor<120x32x1x1xbf16>) -> tensor<120x1x1x32xbf16> loc(#loc69)
        %463 = tosa.reshape %arg8 {new_shape = array<i64: 1, 1, 1, 32>} : (tensor<1x32x1x1xbf16>) -> tensor<1x1x1x32xbf16> loc(#loc69)
        %464 = tosa.conv2d %463, %462, %arg10 {
          PartOfLayerName = "Conv_83",
          PartOfOutputName = "Conv_83",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x1x1x32xbf16>, tensor<120x1x1x32xbf16>, tensor<120xbf16>) -> tensor<1x1x1x120xbf16> loc(#loc69)
        %465 = tosa.reshape %464 {new_shape = array<i64: 1, 120, 1, 1>} : (tensor<1x1x1x120xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc69)
        xten_nn.output %465 : tensor<1x120x1x1xbf16> loc(#loc69)
      } -> tensor<1x120x1x1xbf16> loc(#loc69)
      xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc69)
    } -> tensor<1x120x1x1xbf16> loc(#loc69)
    %215 = xten_nn.subgraph (%arg5 = %214: tensor<1x120x1x1xbf16>)  attributes {
      LayerName = "Add_85",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "485",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Add_85",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "487",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x1xbf16>)  attributes {
        LayerName = "Add_85",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "485",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Add_85",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "487",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_85", OutputName = "Add_85"} : (tensor<1x120x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc70)
        xten_nn.output %463 : tensor<1x120x1x1xbf16> loc(#loc70)
      } -> tensor<1x120x1x1xbf16> loc(#loc70)
      xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc70)
    } -> tensor<1x120x1x1xbf16> loc(#loc70)
    %216 = xten_nn.subgraph (%arg5 = %215: tensor<1x120x1x1xbf16>)  attributes {
      LayerName = "Clip_88",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "487",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_88",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "490",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x1xbf16>)  attributes {
        LayerName = "Clip_88",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "487",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_88",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "490",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_88",
          OutputName = "Clip_88",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x120x1x1xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc71)
        xten_nn.output %462 : tensor<1x120x1x1xbf16> loc(#loc71)
      } -> tensor<1x120x1x1xbf16> loc(#loc71)
      xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc71)
    } -> tensor<1x120x1x1xbf16> loc(#loc71)
    %217 = xten_nn.subgraph (%arg5 = %216: tensor<1x120x1x1xbf16>)  attributes {
      LayerName = "Div_90",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "490",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Div_90",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "492",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x1xbf16>)  attributes {
        LayerName = "Div_90",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "490",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Div_90",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "492",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_90",
          OutputName = "Div_90",
          shift = 0 : i8} : (tensor<1x120x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc72)
        xten_nn.output %463 : tensor<1x120x1x1xbf16> loc(#loc72)
      } -> tensor<1x120x1x1xbf16> loc(#loc72)
      xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc72)
    } -> tensor<1x120x1x1xbf16> loc(#loc72)
    %218 = xten_nn.subgraph (%arg5 = %217: tensor<1x120x1x1xbf16>)  attributes {
      LayerName = "Mul_91_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "492",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_91_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "493",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
        }
      ],
      Specializes = "TileAdf",
      With = {
        config.aie_arch = "aie2p",
        config.dtype = "bfloat16",
        config.i_dim_c = 120 : ui32,
        config.i_dim_h = 1 : ui32,
        config.i_dim_n = 1 : ui32,
        config.i_dim_w = 1 : ui32,
        config.rep_dim_c = 1 : ui32,
        config.rep_dim_h = 23 : ui32,
        config.rep_dim_w = 40 : ui32
      }} {
      %461 = tosa.tile %arg5 {multiples = array<i64: 1, 1, 23, 40>} : (tensor<1x120x1x1xbf16>) -> tensor<1x120x23x40xbf16> loc(#loc73)
      xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc73)
    } -> tensor<1x120x23x40xbf16> loc(#loc73)
    %219 = xten_nn.subgraph (%arg5 = %218: tensor<1x120x23x40xbf16>, %arg6 = %210: tensor<1x120x23x40xbf16>)  attributes {
      LayerName = "Mul_91_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "492",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "490",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_91_Duplicated#1",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "493",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x120x23x40xbf16>, %arg8 = %arg6: tensor<1x120x23x40xbf16>)  attributes {
        LayerName = "Mul_91_Duplicated#1",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "492",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "490",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_91_Duplicated#1",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "493",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_91",
          OutputName = "Mul_91",
          shift = 0 : i8} : (tensor<1x120x23x40xbf16>, tensor<1x120x23x40xbf16>) -> tensor<1x120x23x40xbf16> loc(#loc73)
        xten_nn.output %462 : tensor<1x120x23x40xbf16> loc(#loc73)
      } -> tensor<1x120x23x40xbf16> loc(#loc73)
      xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc73)
    } -> tensor<1x120x23x40xbf16> loc(#loc73)
    %220 = xten_nn.subgraph (%arg5 = %219: tensor<1x120x23x40xbf16>, %arg6 = %114: tensor<40x120x1x1xbf16>, %arg7 = %113: tensor<40xbf16>, %arg8 = %208: tensor<1x40x23x40xbf16>)  attributes {
      LayerName = "Conv_92",
      OfmShare = 3 : index,
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "493",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
        },
        {
          Name = "492",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[40, 120, 1, 1]> : vector<4xindex>
        },
        {
          Name = "493",
          UnknownDataFormat = true
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "493",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Add_93",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "496",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x120x23x40xbf16>, %arg10 = %arg6: tensor<40x120x1x1xbf16>, %arg11 = %arg7: tensor<40xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_92",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "493",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex>
          },
          {
            Name = "492",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[40, 120, 1, 1]> : vector<4xindex>
          },
          {
            Name = "493",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_92",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "979",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc74)
        %465 = tosa.reshape %arg10 {new_shape = array<i64: 40, 1, 1, 120>} : (tensor<40x120x1x1xbf16>) -> tensor<40x1x1x120xbf16> loc(#loc74)
        %466 = tosa.transpose %arg9, %464 : (tensor<1x120x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x120xbf16> loc(#loc74)
        %467 = tosa.conv2d %466, %465, %arg11 {
          PartOfLayerName = "Conv_92",
          PartOfOutputName = "Conv_92",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x23x40x120xbf16>, tensor<40x1x1x120xbf16>, tensor<40xbf16>) -> tensor<1x23x40x40xbf16> loc(#loc74)
        %468 = tosa.transpose %467, %463 : (tensor<1x23x40x40xbf16>, tensor<4xi32>) -> tensor<1x40x23x40xbf16> loc(#loc74)
        xten_nn.output %468 : tensor<1x40x23x40xbf16> loc(#loc74)
      } -> tensor<1x40x23x40xbf16> loc(#loc74)
      %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x40x23x40xbf16>, %arg10 = %arg8: tensor<1x40x23x40xbf16>)  attributes {
        LayerName = "Add_93",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "979",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "493",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          }
        ],
        OutputName = "Add_93",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "496",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "AddBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.act = 0 : ui8,
          config.act_type = "LINEAR",
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %463 = tosa.add %arg9, %arg10 {LayerName = "Add_93", OutputName = "Add_93"} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc75)
        xten_nn.output %463 : tensor<1x40x23x40xbf16> loc(#loc75)
      } -> tensor<1x40x23x40xbf16> loc(#loc75)
      xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc75)
    } -> tensor<1x40x23x40xbf16> loc(#loc340)
    %221 = xten_nn.subgraph (%arg5 = %220: tensor<1x40x23x40xbf16>, %arg6 = %112: tensor<240x40x1x1xbf16>, %arg7 = %111: tensor<240xbf16>)  attributes {
      LayerName = "Conv_94",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "496",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        },
        {
          Name = "979",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[240, 40, 1, 1]> : vector<4xindex>
        },
        {
          Name = "496",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_94",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "982",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x40x23x40xbf16>, %arg9 = %arg6: tensor<240x40x1x1xbf16>, %arg10 = %arg7: tensor<240xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_94",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "496",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          },
          {
            Name = "979",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[240, 40, 1, 1]> : vector<4xindex>
          },
          {
            Name = "496",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_94",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "982",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc76)
        %464 = tosa.reshape %arg9 {new_shape = array<i64: 240, 1, 1, 40>} : (tensor<240x40x1x1xbf16>) -> tensor<240x1x1x40xbf16> loc(#loc76)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x40x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x40xbf16> loc(#loc76)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_94",
          PartOfOutputName = "Conv_94",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x23x40x40xbf16>, tensor<240x1x1x40xbf16>, tensor<240xbf16>) -> tensor<1x23x40x240xbf16> loc(#loc76)
        %467 = tosa.transpose %466, %462 : (tensor<1x23x40x240xbf16>, tensor<4xi32>) -> tensor<1x240x23x40xbf16> loc(#loc76)
        xten_nn.output %467 : tensor<1x240x23x40xbf16> loc(#loc76)
      } -> tensor<1x240x23x40xbf16> loc(#loc76)
      xten_nn.output %461 : tensor<1x240x23x40xbf16> loc(#loc76)
    } -> tensor<1x240x23x40xbf16> loc(#loc76)
    %222 = xten_nn.subgraph (%arg5 = %221: tensor<1x240x23x40xbf16>)  attributes {
      LayerName = "Add_96",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "982",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Add_96",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "500",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x240x23x40xbf16>)  attributes {
        LayerName = "Add_96",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "982",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
          }
        ],
        OutputName = "Add_96",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "500",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_96", OutputName = "Add_96"} : (tensor<1x240x23x40xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x240x23x40xbf16> loc(#loc77)
        xten_nn.output %463 : tensor<1x240x23x40xbf16> loc(#loc77)
      } -> tensor<1x240x23x40xbf16> loc(#loc77)
      xten_nn.output %461 : tensor<1x240x23x40xbf16> loc(#loc77)
    } -> tensor<1x240x23x40xbf16> loc(#loc77)
    %223 = xten_nn.subgraph (%arg5 = %222: tensor<1x240x23x40xbf16>)  attributes {
      LayerName = "Clip_99",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "500",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_99",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "503",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x240x23x40xbf16>)  attributes {
        LayerName = "Clip_99",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "500",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_99",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "503",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_99",
          OutputName = "Clip_99",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x240x23x40xbf16>) -> tensor<1x240x23x40xbf16> loc(#loc78)
        xten_nn.output %462 : tensor<1x240x23x40xbf16> loc(#loc78)
      } -> tensor<1x240x23x40xbf16> loc(#loc78)
      xten_nn.output %461 : tensor<1x240x23x40xbf16> loc(#loc78)
    } -> tensor<1x240x23x40xbf16> loc(#loc78)
    %224 = xten_nn.subgraph (%arg5 = %223: tensor<1x240x23x40xbf16>)  attributes {
      LayerName = "Div_101",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "503",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Div_101",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "505",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x240x23x40xbf16>)  attributes {
        LayerName = "Div_101",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "503",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
          }
        ],
        OutputName = "Div_101",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "505",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_101",
          OutputName = "Div_101",
          shift = 0 : i8} : (tensor<1x240x23x40xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x240x23x40xbf16> loc(#loc79)
        xten_nn.output %463 : tensor<1x240x23x40xbf16> loc(#loc79)
      } -> tensor<1x240x23x40xbf16> loc(#loc79)
      xten_nn.output %461 : tensor<1x240x23x40xbf16> loc(#loc79)
    } -> tensor<1x240x23x40xbf16> loc(#loc79)
    %225 = xten_nn.subgraph (%arg5 = %221: tensor<1x240x23x40xbf16>, %arg6 = %224: tensor<1x240x23x40xbf16>)  attributes {
      LayerName = "Mul_102",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "982",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "496",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_102",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "506",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x240x23x40xbf16>, %arg8 = %arg6: tensor<1x240x23x40xbf16>)  attributes {
        LayerName = "Mul_102",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "982",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "496",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_102",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "506",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_102",
          OutputName = "Mul_102",
          shift = 0 : i8} : (tensor<1x240x23x40xbf16>, tensor<1x240x23x40xbf16>) -> tensor<1x240x23x40xbf16> loc(#loc80)
        xten_nn.output %462 : tensor<1x240x23x40xbf16> loc(#loc80)
      } -> tensor<1x240x23x40xbf16> loc(#loc80)
      xten_nn.output %461 : tensor<1x240x23x40xbf16> loc(#loc80)
    } -> tensor<1x240x23x40xbf16> loc(#loc80)
    %226 = xten_nn.subgraph (%arg5 = %225: tensor<1x240x23x40xbf16>, %arg6 = %110: tensor<240x1x3x3xbf16>, %arg7 = %109: tensor<240xbf16>)  attributes {
      LayerName = "Conv_103",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "506",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "CMHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "982",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[240, 1, 3, 3]> : vector<4xindex>
        },
        {
          Name = "506",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_103",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "985",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x240x23x40xbf16>, %arg9 = %arg6: tensor<240x1x3x3xbf16>, %arg10 = %arg7: tensor<240xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 1], [1, 0]],
        LayerName = "Conv_103",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "506",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "CMHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "982",
            Port = "data_io.wts",
            SubPort = "wts_data",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[240, 1, 3, 3]> : vector<4xindex>
          },
          {
            Name = "506",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_103",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "985",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "DepthwiseConv2dBf16",
        With = {
          config.act = 0 : ui8,
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.kernel_height = 3 : ui8,
          config.kernel_width = 3 : ui8,
          config.stride = 2 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc81)
        %465 = tosa.transpose %arg9, %464 : (tensor<240x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x240x1xbf16> loc(#loc81)
        %466 = tosa.transpose %arg8, %463 : (tensor<1x240x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x240xbf16> loc(#loc81)
        %467 = tosa.depthwise_conv2d %466, %465, %arg10 {
          PartOfLayerName = "Conv_103",
          PartOfOutputName = "Conv_103",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 1, 1, 0>,
          stride = array<i64: 2, 2>} : (tensor<1x23x40x240xbf16>, tensor<3x3x240x1xbf16>, tensor<240xbf16>) -> tensor<1x12x20x240xbf16> loc(#loc81)
        %468 = tosa.transpose %467, %462 : (tensor<1x12x20x240xbf16>, tensor<4xi32>) -> tensor<1x240x12x20xbf16> loc(#loc81)
        xten_nn.output %468 : tensor<1x240x12x20xbf16> loc(#loc81)
      } -> tensor<1x240x12x20xbf16> loc(#loc81)
      xten_nn.output %461 : tensor<1x240x12x20xbf16> loc(#loc81)
    } -> tensor<1x240x12x20xbf16> loc(#loc81)
    %227 = xten_nn.subgraph (%arg5 = %226: tensor<1x240x12x20xbf16>)  attributes {
      LayerName = "Add_105",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "985",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_105",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "510",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x240x12x20xbf16>)  attributes {
        LayerName = "Add_105",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "985",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_105",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "510",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_105", OutputName = "Add_105"} : (tensor<1x240x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x240x12x20xbf16> loc(#loc82)
        xten_nn.output %463 : tensor<1x240x12x20xbf16> loc(#loc82)
      } -> tensor<1x240x12x20xbf16> loc(#loc82)
      xten_nn.output %461 : tensor<1x240x12x20xbf16> loc(#loc82)
    } -> tensor<1x240x12x20xbf16> loc(#loc82)
    %228 = xten_nn.subgraph (%arg5 = %227: tensor<1x240x12x20xbf16>)  attributes {
      LayerName = "Clip_108",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "510",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_108",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "513",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x240x12x20xbf16>)  attributes {
        LayerName = "Clip_108",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "510",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_108",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "513",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_108",
          OutputName = "Clip_108",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x240x12x20xbf16>) -> tensor<1x240x12x20xbf16> loc(#loc83)
        xten_nn.output %462 : tensor<1x240x12x20xbf16> loc(#loc83)
      } -> tensor<1x240x12x20xbf16> loc(#loc83)
      xten_nn.output %461 : tensor<1x240x12x20xbf16> loc(#loc83)
    } -> tensor<1x240x12x20xbf16> loc(#loc83)
    %229 = xten_nn.subgraph (%arg5 = %228: tensor<1x240x12x20xbf16>)  attributes {
      LayerName = "Div_110",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "513",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Div_110",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "515",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x240x12x20xbf16>)  attributes {
        LayerName = "Div_110",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "513",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Div_110",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "515",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_110",
          OutputName = "Div_110",
          shift = 0 : i8} : (tensor<1x240x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x240x12x20xbf16> loc(#loc84)
        xten_nn.output %463 : tensor<1x240x12x20xbf16> loc(#loc84)
      } -> tensor<1x240x12x20xbf16> loc(#loc84)
      xten_nn.output %461 : tensor<1x240x12x20xbf16> loc(#loc84)
    } -> tensor<1x240x12x20xbf16> loc(#loc84)
    %230 = xten_nn.subgraph (%arg5 = %226: tensor<1x240x12x20xbf16>, %arg6 = %229: tensor<1x240x12x20xbf16>)  attributes {
      LayerName = "Mul_111",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "985",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "506",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_111",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "516",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x240x12x20xbf16>, %arg8 = %arg6: tensor<1x240x12x20xbf16>)  attributes {
        LayerName = "Mul_111",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "985",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "506",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_111",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "516",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_111",
          OutputName = "Mul_111",
          shift = 0 : i8} : (tensor<1x240x12x20xbf16>, tensor<1x240x12x20xbf16>) -> tensor<1x240x12x20xbf16> loc(#loc85)
        xten_nn.output %462 : tensor<1x240x12x20xbf16> loc(#loc85)
      } -> tensor<1x240x12x20xbf16> loc(#loc85)
      xten_nn.output %461 : tensor<1x240x12x20xbf16> loc(#loc85)
    } -> tensor<1x240x12x20xbf16> loc(#loc85)
    %231 = xten_nn.subgraph (%arg5 = %230: tensor<1x240x12x20xbf16>, %arg6 = %108: tensor<80x240x1x1xbf16>, %arg7 = %107: tensor<80xbf16>)  attributes {
      LayerName = "Conv_112",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "516",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
        },
        {
          Name = "985",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[80, 240, 1, 1]> : vector<4xindex>
        },
        {
          Name = "516",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_112",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "988",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x240x12x20xbf16>, %arg9 = %arg6: tensor<80x240x1x1xbf16>, %arg10 = %arg7: tensor<80xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_112",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "516",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex>
          },
          {
            Name = "985",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[80, 240, 1, 1]> : vector<4xindex>
          },
          {
            Name = "516",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_112",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "988",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc86)
        %464 = tosa.reshape %arg9 {new_shape = array<i64: 80, 1, 1, 240>} : (tensor<80x240x1x1xbf16>) -> tensor<80x1x1x240xbf16> loc(#loc86)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x240x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x240xbf16> loc(#loc86)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_112",
          PartOfOutputName = "Conv_112",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x240xbf16>, tensor<80x1x1x240xbf16>, tensor<80xbf16>) -> tensor<1x12x20x80xbf16> loc(#loc86)
        %467 = tosa.transpose %466, %462 : (tensor<1x12x20x80xbf16>, tensor<4xi32>) -> tensor<1x80x12x20xbf16> loc(#loc86)
        xten_nn.output %467 : tensor<1x80x12x20xbf16> loc(#loc86)
      } -> tensor<1x80x12x20xbf16> loc(#loc86)
      xten_nn.output %461 : tensor<1x80x12x20xbf16> loc(#loc86)
    } -> tensor<1x80x12x20xbf16> loc(#loc86)
    %232 = xten_nn.subgraph (%arg5 = %231: tensor<1x80x12x20xbf16>, %arg6 = %106: tensor<200x80x1x1xbf16>, %arg7 = %105: tensor<200xbf16>)  attributes {
      LayerName = "Conv_113",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "988",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
        },
        {
          Name = "516",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[200, 80, 1, 1]> : vector<4xindex>
        },
        {
          Name = "992",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_113",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "991",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x80x12x20xbf16>, %arg9 = %arg6: tensor<200x80x1x1xbf16>, %arg10 = %arg7: tensor<200xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_113",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "988",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
          },
          {
            Name = "516",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[200, 80, 1, 1]> : vector<4xindex>
          },
          {
            Name = "992",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_113",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "991",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc87)
        %464 = tosa.reshape %arg9 {new_shape = array<i64: 200, 1, 1, 80>} : (tensor<200x80x1x1xbf16>) -> tensor<200x1x1x80xbf16> loc(#loc87)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x80x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x80xbf16> loc(#loc87)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_113",
          PartOfOutputName = "Conv_113",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x80xbf16>, tensor<200x1x1x80xbf16>, tensor<200xbf16>) -> tensor<1x12x20x200xbf16> loc(#loc87)
        %467 = tosa.transpose %466, %462 : (tensor<1x12x20x200xbf16>, tensor<4xi32>) -> tensor<1x200x12x20xbf16> loc(#loc87)
        xten_nn.output %467 : tensor<1x200x12x20xbf16> loc(#loc87)
      } -> tensor<1x200x12x20xbf16> loc(#loc87)
      xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc87)
    } -> tensor<1x200x12x20xbf16> loc(#loc87)
    %233 = xten_nn.subgraph (%arg5 = %232: tensor<1x200x12x20xbf16>)  attributes {
      LayerName = "Add_115",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "991",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_115",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "522",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x200x12x20xbf16>)  attributes {
        LayerName = "Add_115",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "991",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_115",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "522",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_115", OutputName = "Add_115"} : (tensor<1x200x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc88)
        xten_nn.output %463 : tensor<1x200x12x20xbf16> loc(#loc88)
      } -> tensor<1x200x12x20xbf16> loc(#loc88)
      xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc88)
    } -> tensor<1x200x12x20xbf16> loc(#loc88)
    %234 = xten_nn.subgraph (%arg5 = %233: tensor<1x200x12x20xbf16>)  attributes {
      LayerName = "Clip_118",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "522",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_118",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "525",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x200x12x20xbf16>)  attributes {
        LayerName = "Clip_118",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "522",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_118",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "525",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_118",
          OutputName = "Clip_118",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x200x12x20xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc89)
        xten_nn.output %462 : tensor<1x200x12x20xbf16> loc(#loc89)
      } -> tensor<1x200x12x20xbf16> loc(#loc89)
      xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc89)
    } -> tensor<1x200x12x20xbf16> loc(#loc89)
    %235 = xten_nn.subgraph (%arg5 = %234: tensor<1x200x12x20xbf16>)  attributes {
      LayerName = "Div_120",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "525",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Div_120",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "527",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x200x12x20xbf16>)  attributes {
        LayerName = "Div_120",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "525",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Div_120",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "527",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_120",
          OutputName = "Div_120",
          shift = 0 : i8} : (tensor<1x200x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc90)
        xten_nn.output %463 : tensor<1x200x12x20xbf16> loc(#loc90)
      } -> tensor<1x200x12x20xbf16> loc(#loc90)
      xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc90)
    } -> tensor<1x200x12x20xbf16> loc(#loc90)
    %236 = xten_nn.subgraph (%arg5 = %232: tensor<1x200x12x20xbf16>, %arg6 = %235: tensor<1x200x12x20xbf16>)  attributes {
      LayerName = "Mul_121",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "991",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "988",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_121",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "528",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x200x12x20xbf16>, %arg8 = %arg6: tensor<1x200x12x20xbf16>)  attributes {
        LayerName = "Mul_121",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "991",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "988",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_121",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "528",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_121",
          OutputName = "Mul_121",
          shift = 0 : i8} : (tensor<1x200x12x20xbf16>, tensor<1x200x12x20xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc91)
        xten_nn.output %462 : tensor<1x200x12x20xbf16> loc(#loc91)
      } -> tensor<1x200x12x20xbf16> loc(#loc91)
      xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc91)
    } -> tensor<1x200x12x20xbf16> loc(#loc91)
    %237 = xten_nn.subgraph (%arg5 = %236: tensor<1x200x12x20xbf16>, %arg6 = %104: tensor<200x1x3x3xbf16>, %arg7 = %103: tensor<200xbf16>)  attributes {
      LayerName = "Conv_122",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "528",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "CMHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "991",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[200, 1, 3, 3]> : vector<4xindex>
        },
        {
          Name = "528",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_122",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "994",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x200x12x20xbf16>, %arg9 = %arg6: tensor<200x1x3x3xbf16>, %arg10 = %arg7: tensor<200xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 1], [1, 1]],
        LayerName = "Conv_122",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "528",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "CMHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "991",
            Port = "data_io.wts",
            SubPort = "wts_data",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[200, 1, 3, 3]> : vector<4xindex>
          },
          {
            Name = "528",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_122",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "994",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "DepthwiseConv2dBf16",
        With = {
          config.act = 0 : ui8,
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.kernel_height = 3 : ui8,
          config.kernel_width = 3 : ui8,
          config.stride = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc92)
        %465 = tosa.transpose %arg9, %464 : (tensor<200x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x200x1xbf16> loc(#loc92)
        %466 = tosa.transpose %arg8, %463 : (tensor<1x200x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x200xbf16> loc(#loc92)
        %467 = tosa.depthwise_conv2d %466, %465, %arg10 {
          PartOfLayerName = "Conv_122",
          PartOfOutputName = "Conv_122",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 1, 1, 1>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x200xbf16>, tensor<3x3x200x1xbf16>, tensor<200xbf16>) -> tensor<1x12x20x200xbf16> loc(#loc92)
        %468 = tosa.transpose %467, %462 : (tensor<1x12x20x200xbf16>, tensor<4xi32>) -> tensor<1x200x12x20xbf16> loc(#loc92)
        xten_nn.output %468 : tensor<1x200x12x20xbf16> loc(#loc92)
      } -> tensor<1x200x12x20xbf16> loc(#loc92)
      xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc92)
    } -> tensor<1x200x12x20xbf16> loc(#loc92)
    %238 = xten_nn.subgraph (%arg5 = %237: tensor<1x200x12x20xbf16>)  attributes {
      LayerName = "Add_124",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "994",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_124",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "532",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x200x12x20xbf16>)  attributes {
        LayerName = "Add_124",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "994",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_124",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "532",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_124", OutputName = "Add_124"} : (tensor<1x200x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc93)
        xten_nn.output %463 : tensor<1x200x12x20xbf16> loc(#loc93)
      } -> tensor<1x200x12x20xbf16> loc(#loc93)
      xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc93)
    } -> tensor<1x200x12x20xbf16> loc(#loc93)
    %239 = xten_nn.subgraph (%arg5 = %238: tensor<1x200x12x20xbf16>)  attributes {
      LayerName = "Clip_127",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "532",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_127",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "535",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x200x12x20xbf16>)  attributes {
        LayerName = "Clip_127",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "532",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_127",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "535",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_127",
          OutputName = "Clip_127",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x200x12x20xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc94)
        xten_nn.output %462 : tensor<1x200x12x20xbf16> loc(#loc94)
      } -> tensor<1x200x12x20xbf16> loc(#loc94)
      xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc94)
    } -> tensor<1x200x12x20xbf16> loc(#loc94)
    %240 = xten_nn.subgraph (%arg5 = %239: tensor<1x200x12x20xbf16>)  attributes {
      LayerName = "Div_129",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "535",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Div_129",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "537",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x200x12x20xbf16>)  attributes {
        LayerName = "Div_129",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "535",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Div_129",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "537",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_129",
          OutputName = "Div_129",
          shift = 0 : i8} : (tensor<1x200x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc95)
        xten_nn.output %463 : tensor<1x200x12x20xbf16> loc(#loc95)
      } -> tensor<1x200x12x20xbf16> loc(#loc95)
      xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc95)
    } -> tensor<1x200x12x20xbf16> loc(#loc95)
    %241 = xten_nn.subgraph (%arg5 = %237: tensor<1x200x12x20xbf16>, %arg6 = %240: tensor<1x200x12x20xbf16>)  attributes {
      LayerName = "Mul_130",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "994",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "528",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_130",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "538",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x200x12x20xbf16>, %arg8 = %arg6: tensor<1x200x12x20xbf16>)  attributes {
        LayerName = "Mul_130",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "994",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "528",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_130",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "538",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_130",
          OutputName = "Mul_130",
          shift = 0 : i8} : (tensor<1x200x12x20xbf16>, tensor<1x200x12x20xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc96)
        xten_nn.output %462 : tensor<1x200x12x20xbf16> loc(#loc96)
      } -> tensor<1x200x12x20xbf16> loc(#loc96)
      xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc96)
    } -> tensor<1x200x12x20xbf16> loc(#loc96)
    %242 = xten_nn.subgraph (%arg5 = %241: tensor<1x200x12x20xbf16>, %arg6 = %102: tensor<80x200x1x1xbf16>, %arg7 = %101: tensor<80xbf16>, %arg8 = %231: tensor<1x80x12x20xbf16>)  attributes {
      LayerName = "Conv_131",
      OfmShare = 3 : index,
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "538",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
        },
        {
          Name = "994",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[80, 200, 1, 1]> : vector<4xindex>
        },
        {
          Name = "538",
          UnknownDataFormat = true
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "538",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_132",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "541",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x200x12x20xbf16>, %arg10 = %arg6: tensor<80x200x1x1xbf16>, %arg11 = %arg7: tensor<80xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_131",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "538",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex>
          },
          {
            Name = "994",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[80, 200, 1, 1]> : vector<4xindex>
          },
          {
            Name = "538",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_131",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "997",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc97)
        %465 = tosa.reshape %arg10 {new_shape = array<i64: 80, 1, 1, 200>} : (tensor<80x200x1x1xbf16>) -> tensor<80x1x1x200xbf16> loc(#loc97)
        %466 = tosa.transpose %arg9, %464 : (tensor<1x200x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x200xbf16> loc(#loc97)
        %467 = tosa.conv2d %466, %465, %arg11 {
          PartOfLayerName = "Conv_131",
          PartOfOutputName = "Conv_131",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x200xbf16>, tensor<80x1x1x200xbf16>, tensor<80xbf16>) -> tensor<1x12x20x80xbf16> loc(#loc97)
        %468 = tosa.transpose %467, %463 : (tensor<1x12x20x80xbf16>, tensor<4xi32>) -> tensor<1x80x12x20xbf16> loc(#loc97)
        xten_nn.output %468 : tensor<1x80x12x20xbf16> loc(#loc97)
      } -> tensor<1x80x12x20xbf16> loc(#loc97)
      %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x80x12x20xbf16>, %arg10 = %arg8: tensor<1x80x12x20xbf16>)  attributes {
        LayerName = "Add_132",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "997",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "538",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_132",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "541",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.act = 0 : ui8,
          config.act_type = "LINEAR",
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %463 = tosa.add %arg9, %arg10 {LayerName = "Add_132", OutputName = "Add_132"} : (tensor<1x80x12x20xbf16>, tensor<1x80x12x20xbf16>) -> tensor<1x80x12x20xbf16> loc(#loc98)
        xten_nn.output %463 : tensor<1x80x12x20xbf16> loc(#loc98)
      } -> tensor<1x80x12x20xbf16> loc(#loc98)
      xten_nn.output %462 : tensor<1x80x12x20xbf16> loc(#loc98)
    } -> tensor<1x80x12x20xbf16> loc(#loc341)
    %243 = xten_nn.subgraph (%arg5 = %242: tensor<1x80x12x20xbf16>, %arg6 = %100: tensor<184x80x1x1xbf16>, %arg7 = %99: tensor<184xbf16>)  attributes {
      LayerName = "Conv_133",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "541",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
        },
        {
          Name = "997",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[184, 80, 1, 1]> : vector<4xindex>
        },
        {
          Name = "541",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_133",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1000",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x80x12x20xbf16>, %arg9 = %arg6: tensor<184x80x1x1xbf16>, %arg10 = %arg7: tensor<184xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_133",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "541",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
          },
          {
            Name = "997",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[184, 80, 1, 1]> : vector<4xindex>
          },
          {
            Name = "541",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_133",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1000",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc99)
        %464 = tosa.reshape %arg9 {new_shape = array<i64: 184, 1, 1, 80>} : (tensor<184x80x1x1xbf16>) -> tensor<184x1x1x80xbf16> loc(#loc99)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x80x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x80xbf16> loc(#loc99)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_133",
          PartOfOutputName = "Conv_133",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x80xbf16>, tensor<184x1x1x80xbf16>, tensor<184xbf16>) -> tensor<1x12x20x184xbf16> loc(#loc99)
        %467 = tosa.transpose %466, %462 : (tensor<1x12x20x184xbf16>, tensor<4xi32>) -> tensor<1x184x12x20xbf16> loc(#loc99)
        xten_nn.output %467 : tensor<1x184x12x20xbf16> loc(#loc99)
      } -> tensor<1x184x12x20xbf16> loc(#loc99)
      xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc99)
    } -> tensor<1x184x12x20xbf16> loc(#loc99)
    %244 = xten_nn.subgraph (%arg5 = %243: tensor<1x184x12x20xbf16>)  attributes {
      LayerName = "Add_135",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1000",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_135",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "545",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>)  attributes {
        LayerName = "Add_135",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1000",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_135",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "545",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_135", OutputName = "Add_135"} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc100)
        xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc100)
      } -> tensor<1x184x12x20xbf16> loc(#loc100)
      xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc100)
    } -> tensor<1x184x12x20xbf16> loc(#loc100)
    %245 = xten_nn.subgraph (%arg5 = %244: tensor<1x184x12x20xbf16>)  attributes {
      LayerName = "Clip_138",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "545",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_138",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "548",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>)  attributes {
        LayerName = "Clip_138",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "545",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_138",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "548",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_138",
          OutputName = "Clip_138",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc101)
        xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc101)
      } -> tensor<1x184x12x20xbf16> loc(#loc101)
      xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc101)
    } -> tensor<1x184x12x20xbf16> loc(#loc101)
    %246 = xten_nn.subgraph (%arg5 = %245: tensor<1x184x12x20xbf16>)  attributes {
      LayerName = "Div_140",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "548",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Div_140",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "550",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>)  attributes {
        LayerName = "Div_140",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "548",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Div_140",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "550",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_140",
          OutputName = "Div_140",
          shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc102)
        xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc102)
      } -> tensor<1x184x12x20xbf16> loc(#loc102)
      xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc102)
    } -> tensor<1x184x12x20xbf16> loc(#loc102)
    %247 = xten_nn.subgraph (%arg5 = %243: tensor<1x184x12x20xbf16>, %arg6 = %246: tensor<1x184x12x20xbf16>)  attributes {
      LayerName = "Mul_141",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1000",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "541",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_141",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "551",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x184x12x20xbf16>, %arg8 = %arg6: tensor<1x184x12x20xbf16>)  attributes {
        LayerName = "Mul_141",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1000",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "541",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_141",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "551",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_141",
          OutputName = "Mul_141",
          shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc103)
        xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc103)
      } -> tensor<1x184x12x20xbf16> loc(#loc103)
      xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc103)
    } -> tensor<1x184x12x20xbf16> loc(#loc103)
    %248 = xten_nn.subgraph (%arg5 = %247: tensor<1x184x12x20xbf16>, %arg6 = %98: tensor<184x1x3x3xbf16>, %arg7 = %97: tensor<184xbf16>)  attributes {
      LayerName = "Conv_142",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "551",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "CMHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1000",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[184, 1, 3, 3]> : vector<4xindex>
        },
        {
          Name = "551",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_142",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1003",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x184x12x20xbf16>, %arg9 = %arg6: tensor<184x1x3x3xbf16>, %arg10 = %arg7: tensor<184xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 1], [1, 1]],
        LayerName = "Conv_142",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "551",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "CMHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1000",
            Port = "data_io.wts",
            SubPort = "wts_data",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[184, 1, 3, 3]> : vector<4xindex>
          },
          {
            Name = "551",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_142",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1003",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "DepthwiseConv2dBf16",
        With = {
          config.act = 0 : ui8,
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.kernel_height = 3 : ui8,
          config.kernel_width = 3 : ui8,
          config.stride = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc104)
        %465 = tosa.transpose %arg9, %464 : (tensor<184x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x184x1xbf16> loc(#loc104)
        %466 = tosa.transpose %arg8, %463 : (tensor<1x184x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x184xbf16> loc(#loc104)
        %467 = tosa.depthwise_conv2d %466, %465, %arg10 {
          PartOfLayerName = "Conv_142",
          PartOfOutputName = "Conv_142",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 1, 1, 1>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x184xbf16>, tensor<3x3x184x1xbf16>, tensor<184xbf16>) -> tensor<1x12x20x184xbf16> loc(#loc104)
        %468 = tosa.transpose %467, %462 : (tensor<1x12x20x184xbf16>, tensor<4xi32>) -> tensor<1x184x12x20xbf16> loc(#loc104)
        xten_nn.output %468 : tensor<1x184x12x20xbf16> loc(#loc104)
      } -> tensor<1x184x12x20xbf16> loc(#loc104)
      xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc104)
    } -> tensor<1x184x12x20xbf16> loc(#loc104)
    %249 = xten_nn.subgraph (%arg5 = %248: tensor<1x184x12x20xbf16>)  attributes {
      LayerName = "Add_144",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1003",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_144",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "555",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>)  attributes {
        LayerName = "Add_144",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1003",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_144",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "555",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_144", OutputName = "Add_144"} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc105)
        xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc105)
      } -> tensor<1x184x12x20xbf16> loc(#loc105)
      xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc105)
    } -> tensor<1x184x12x20xbf16> loc(#loc105)
    %250 = xten_nn.subgraph (%arg5 = %249: tensor<1x184x12x20xbf16>)  attributes {
      LayerName = "Clip_147",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "555",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_147",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "558",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>)  attributes {
        LayerName = "Clip_147",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "555",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_147",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "558",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_147",
          OutputName = "Clip_147",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc106)
        xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc106)
      } -> tensor<1x184x12x20xbf16> loc(#loc106)
      xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc106)
    } -> tensor<1x184x12x20xbf16> loc(#loc106)
    %251 = xten_nn.subgraph (%arg5 = %250: tensor<1x184x12x20xbf16>)  attributes {
      LayerName = "Div_149",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "558",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Div_149",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "560",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>)  attributes {
        LayerName = "Div_149",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "558",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Div_149",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "560",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_149",
          OutputName = "Div_149",
          shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc107)
        xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc107)
      } -> tensor<1x184x12x20xbf16> loc(#loc107)
      xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc107)
    } -> tensor<1x184x12x20xbf16> loc(#loc107)
    %252 = xten_nn.subgraph (%arg5 = %248: tensor<1x184x12x20xbf16>, %arg6 = %251: tensor<1x184x12x20xbf16>)  attributes {
      LayerName = "Mul_150",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1003",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "551",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_150",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "561",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x184x12x20xbf16>, %arg8 = %arg6: tensor<1x184x12x20xbf16>)  attributes {
        LayerName = "Mul_150",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1003",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "551",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_150",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "561",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_150",
          OutputName = "Mul_150",
          shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc108)
        xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc108)
      } -> tensor<1x184x12x20xbf16> loc(#loc108)
      xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc108)
    } -> tensor<1x184x12x20xbf16> loc(#loc108)
    %253 = xten_nn.subgraph (%arg5 = %252: tensor<1x184x12x20xbf16>, %arg6 = %96: tensor<80x184x1x1xbf16>, %arg7 = %95: tensor<80xbf16>, %arg8 = %242: tensor<1x80x12x20xbf16>)  attributes {
      LayerName = "Conv_151",
      OfmShare = 3 : index,
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "561",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        },
        {
          Name = "1003",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[80, 184, 1, 1]> : vector<4xindex>
        },
        {
          Name = "561",
          UnknownDataFormat = true
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "561",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_152",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "564",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x184x12x20xbf16>, %arg10 = %arg6: tensor<80x184x1x1xbf16>, %arg11 = %arg7: tensor<80xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_151",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "561",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          },
          {
            Name = "1003",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[80, 184, 1, 1]> : vector<4xindex>
          },
          {
            Name = "561",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_151",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1006",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc109)
        %465 = tosa.reshape %arg10 {new_shape = array<i64: 80, 1, 1, 184>} : (tensor<80x184x1x1xbf16>) -> tensor<80x1x1x184xbf16> loc(#loc109)
        %466 = tosa.transpose %arg9, %464 : (tensor<1x184x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x184xbf16> loc(#loc109)
        %467 = tosa.conv2d %466, %465, %arg11 {
          PartOfLayerName = "Conv_151",
          PartOfOutputName = "Conv_151",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x184xbf16>, tensor<80x1x1x184xbf16>, tensor<80xbf16>) -> tensor<1x12x20x80xbf16> loc(#loc109)
        %468 = tosa.transpose %467, %463 : (tensor<1x12x20x80xbf16>, tensor<4xi32>) -> tensor<1x80x12x20xbf16> loc(#loc109)
        xten_nn.output %468 : tensor<1x80x12x20xbf16> loc(#loc109)
      } -> tensor<1x80x12x20xbf16> loc(#loc109)
      %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x80x12x20xbf16>, %arg10 = %arg8: tensor<1x80x12x20xbf16>)  attributes {
        LayerName = "Add_152",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1006",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "561",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_152",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "564",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.act = 0 : ui8,
          config.act_type = "LINEAR",
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %463 = tosa.add %arg9, %arg10 {LayerName = "Add_152", OutputName = "Add_152"} : (tensor<1x80x12x20xbf16>, tensor<1x80x12x20xbf16>) -> tensor<1x80x12x20xbf16> loc(#loc110)
        xten_nn.output %463 : tensor<1x80x12x20xbf16> loc(#loc110)
      } -> tensor<1x80x12x20xbf16> loc(#loc110)
      xten_nn.output %462 : tensor<1x80x12x20xbf16> loc(#loc110)
    } -> tensor<1x80x12x20xbf16> loc(#loc342)
    %254 = xten_nn.subgraph (%arg5 = %253: tensor<1x80x12x20xbf16>, %arg6 = %94: tensor<184x80x1x1xbf16>, %arg7 = %93: tensor<184xbf16>)  attributes {
      LayerName = "Conv_153",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "564",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
        },
        {
          Name = "1006",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[184, 80, 1, 1]> : vector<4xindex>
        },
        {
          Name = "564",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_153",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1009",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x80x12x20xbf16>, %arg9 = %arg6: tensor<184x80x1x1xbf16>, %arg10 = %arg7: tensor<184xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_153",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "564",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
          },
          {
            Name = "1006",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[184, 80, 1, 1]> : vector<4xindex>
          },
          {
            Name = "564",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_153",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1009",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc111)
        %464 = tosa.reshape %arg9 {new_shape = array<i64: 184, 1, 1, 80>} : (tensor<184x80x1x1xbf16>) -> tensor<184x1x1x80xbf16> loc(#loc111)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x80x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x80xbf16> loc(#loc111)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_153",
          PartOfOutputName = "Conv_153",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x80xbf16>, tensor<184x1x1x80xbf16>, tensor<184xbf16>) -> tensor<1x12x20x184xbf16> loc(#loc111)
        %467 = tosa.transpose %466, %462 : (tensor<1x12x20x184xbf16>, tensor<4xi32>) -> tensor<1x184x12x20xbf16> loc(#loc111)
        xten_nn.output %467 : tensor<1x184x12x20xbf16> loc(#loc111)
      } -> tensor<1x184x12x20xbf16> loc(#loc111)
      xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc111)
    } -> tensor<1x184x12x20xbf16> loc(#loc111)
    %255 = xten_nn.subgraph (%arg5 = %254: tensor<1x184x12x20xbf16>)  attributes {
      LayerName = "Add_155",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1009",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_155",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "568",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>)  attributes {
        LayerName = "Add_155",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1009",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_155",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "568",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_155", OutputName = "Add_155"} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc112)
        xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc112)
      } -> tensor<1x184x12x20xbf16> loc(#loc112)
      xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc112)
    } -> tensor<1x184x12x20xbf16> loc(#loc112)
    %256 = xten_nn.subgraph (%arg5 = %255: tensor<1x184x12x20xbf16>)  attributes {
      LayerName = "Clip_158",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "568",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_158",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "571",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>)  attributes {
        LayerName = "Clip_158",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "568",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_158",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "571",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_158",
          OutputName = "Clip_158",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc113)
        xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc113)
      } -> tensor<1x184x12x20xbf16> loc(#loc113)
      xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc113)
    } -> tensor<1x184x12x20xbf16> loc(#loc113)
    %257 = xten_nn.subgraph (%arg5 = %256: tensor<1x184x12x20xbf16>)  attributes {
      LayerName = "Div_160",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "571",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Div_160",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "573",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>)  attributes {
        LayerName = "Div_160",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "571",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Div_160",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "573",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_160",
          OutputName = "Div_160",
          shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc114)
        xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc114)
      } -> tensor<1x184x12x20xbf16> loc(#loc114)
      xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc114)
    } -> tensor<1x184x12x20xbf16> loc(#loc114)
    %258 = xten_nn.subgraph (%arg5 = %254: tensor<1x184x12x20xbf16>, %arg6 = %257: tensor<1x184x12x20xbf16>)  attributes {
      LayerName = "Mul_161",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1009",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "564",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_161",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "574",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x184x12x20xbf16>, %arg8 = %arg6: tensor<1x184x12x20xbf16>)  attributes {
        LayerName = "Mul_161",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1009",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "564",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_161",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "574",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_161",
          OutputName = "Mul_161",
          shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc115)
        xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc115)
      } -> tensor<1x184x12x20xbf16> loc(#loc115)
      xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc115)
    } -> tensor<1x184x12x20xbf16> loc(#loc115)
    %259 = xten_nn.subgraph (%arg5 = %258: tensor<1x184x12x20xbf16>, %arg6 = %92: tensor<184x1x3x3xbf16>, %arg7 = %91: tensor<184xbf16>)  attributes {
      LayerName = "Conv_162",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "574",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "CMHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1009",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[184, 1, 3, 3]> : vector<4xindex>
        },
        {
          Name = "574",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_162",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1012",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x184x12x20xbf16>, %arg9 = %arg6: tensor<184x1x3x3xbf16>, %arg10 = %arg7: tensor<184xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 1], [1, 1]],
        LayerName = "Conv_162",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "574",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "CMHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1009",
            Port = "data_io.wts",
            SubPort = "wts_data",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[184, 1, 3, 3]> : vector<4xindex>
          },
          {
            Name = "574",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_162",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1012",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "DepthwiseConv2dBf16",
        With = {
          config.act = 0 : ui8,
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.kernel_height = 3 : ui8,
          config.kernel_width = 3 : ui8,
          config.stride = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc116)
        %465 = tosa.transpose %arg9, %464 : (tensor<184x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x184x1xbf16> loc(#loc116)
        %466 = tosa.transpose %arg8, %463 : (tensor<1x184x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x184xbf16> loc(#loc116)
        %467 = tosa.depthwise_conv2d %466, %465, %arg10 {
          PartOfLayerName = "Conv_162",
          PartOfOutputName = "Conv_162",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 1, 1, 1>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x184xbf16>, tensor<3x3x184x1xbf16>, tensor<184xbf16>) -> tensor<1x12x20x184xbf16> loc(#loc116)
        %468 = tosa.transpose %467, %462 : (tensor<1x12x20x184xbf16>, tensor<4xi32>) -> tensor<1x184x12x20xbf16> loc(#loc116)
        xten_nn.output %468 : tensor<1x184x12x20xbf16> loc(#loc116)
      } -> tensor<1x184x12x20xbf16> loc(#loc116)
      xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc116)
    } -> tensor<1x184x12x20xbf16> loc(#loc116)
    %260 = xten_nn.subgraph (%arg5 = %259: tensor<1x184x12x20xbf16>)  attributes {
      LayerName = "Add_164",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1012",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_164",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "578",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>)  attributes {
        LayerName = "Add_164",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1012",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_164",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "578",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_164", OutputName = "Add_164"} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc117)
        xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc117)
      } -> tensor<1x184x12x20xbf16> loc(#loc117)
      xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc117)
    } -> tensor<1x184x12x20xbf16> loc(#loc117)
    %261 = xten_nn.subgraph (%arg5 = %260: tensor<1x184x12x20xbf16>)  attributes {
      LayerName = "Clip_167",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "578",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_167",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "581",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>)  attributes {
        LayerName = "Clip_167",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "578",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_167",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "581",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_167",
          OutputName = "Clip_167",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc118)
        xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc118)
      } -> tensor<1x184x12x20xbf16> loc(#loc118)
      xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc118)
    } -> tensor<1x184x12x20xbf16> loc(#loc118)
    %262 = xten_nn.subgraph (%arg5 = %261: tensor<1x184x12x20xbf16>)  attributes {
      LayerName = "Div_169",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "581",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Div_169",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "583",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>)  attributes {
        LayerName = "Div_169",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "581",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Div_169",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "583",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_169",
          OutputName = "Div_169",
          shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc119)
        xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc119)
      } -> tensor<1x184x12x20xbf16> loc(#loc119)
      xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc119)
    } -> tensor<1x184x12x20xbf16> loc(#loc119)
    %263 = xten_nn.subgraph (%arg5 = %259: tensor<1x184x12x20xbf16>, %arg6 = %262: tensor<1x184x12x20xbf16>)  attributes {
      LayerName = "Mul_170",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1012",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "574",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_170",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "584",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x184x12x20xbf16>, %arg8 = %arg6: tensor<1x184x12x20xbf16>)  attributes {
        LayerName = "Mul_170",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1012",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "574",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_170",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "584",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_170",
          OutputName = "Mul_170",
          shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc120)
        xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc120)
      } -> tensor<1x184x12x20xbf16> loc(#loc120)
      xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc120)
    } -> tensor<1x184x12x20xbf16> loc(#loc120)
    %264 = xten_nn.subgraph (%arg5 = %263: tensor<1x184x12x20xbf16>, %arg6 = %90: tensor<80x184x1x1xbf16>, %arg7 = %89: tensor<80xbf16>, %arg8 = %253: tensor<1x80x12x20xbf16>)  attributes {
      LayerName = "Conv_171",
      OfmShare = 3 : index,
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "584",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
        },
        {
          Name = "1012",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[80, 184, 1, 1]> : vector<4xindex>
        },
        {
          Name = "584",
          UnknownDataFormat = true
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "584",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_172",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "587",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x184x12x20xbf16>, %arg10 = %arg6: tensor<80x184x1x1xbf16>, %arg11 = %arg7: tensor<80xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_171",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "584",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex>
          },
          {
            Name = "1012",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[80, 184, 1, 1]> : vector<4xindex>
          },
          {
            Name = "584",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_171",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1015",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc121)
        %465 = tosa.reshape %arg10 {new_shape = array<i64: 80, 1, 1, 184>} : (tensor<80x184x1x1xbf16>) -> tensor<80x1x1x184xbf16> loc(#loc121)
        %466 = tosa.transpose %arg9, %464 : (tensor<1x184x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x184xbf16> loc(#loc121)
        %467 = tosa.conv2d %466, %465, %arg11 {
          PartOfLayerName = "Conv_171",
          PartOfOutputName = "Conv_171",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x184xbf16>, tensor<80x1x1x184xbf16>, tensor<80xbf16>) -> tensor<1x12x20x80xbf16> loc(#loc121)
        %468 = tosa.transpose %467, %463 : (tensor<1x12x20x80xbf16>, tensor<4xi32>) -> tensor<1x80x12x20xbf16> loc(#loc121)
        xten_nn.output %468 : tensor<1x80x12x20xbf16> loc(#loc121)
      } -> tensor<1x80x12x20xbf16> loc(#loc121)
      %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x80x12x20xbf16>, %arg10 = %arg8: tensor<1x80x12x20xbf16>)  attributes {
        LayerName = "Add_172",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1015",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "584",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_172",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "587",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.act = 0 : ui8,
          config.act_type = "LINEAR",
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %463 = tosa.add %arg9, %arg10 {LayerName = "Add_172", OutputName = "Add_172"} : (tensor<1x80x12x20xbf16>, tensor<1x80x12x20xbf16>) -> tensor<1x80x12x20xbf16> loc(#loc122)
        xten_nn.output %463 : tensor<1x80x12x20xbf16> loc(#loc122)
      } -> tensor<1x80x12x20xbf16> loc(#loc122)
      xten_nn.output %462 : tensor<1x80x12x20xbf16> loc(#loc122)
    } -> tensor<1x80x12x20xbf16> loc(#loc343)
    %265 = xten_nn.subgraph (%arg5 = %264: tensor<1x80x12x20xbf16>, %arg6 = %88: tensor<480x80x1x1xbf16>, %arg7 = %87: tensor<480xbf16>)  attributes {
      LayerName = "Conv_173",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "587",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
        },
        {
          Name = "1015",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[480, 80, 1, 1]> : vector<4xindex>
        },
        {
          Name = "587",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_173",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1018",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x80x12x20xbf16>, %arg9 = %arg6: tensor<480x80x1x1xbf16>, %arg10 = %arg7: tensor<480xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_173",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "587",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex>
          },
          {
            Name = "1015",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[480, 80, 1, 1]> : vector<4xindex>
          },
          {
            Name = "587",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_173",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1018",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc123)
        %464 = tosa.reshape %arg9 {new_shape = array<i64: 480, 1, 1, 80>} : (tensor<480x80x1x1xbf16>) -> tensor<480x1x1x80xbf16> loc(#loc123)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x80x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x80xbf16> loc(#loc123)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_173",
          PartOfOutputName = "Conv_173",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x80xbf16>, tensor<480x1x1x80xbf16>, tensor<480xbf16>) -> tensor<1x12x20x480xbf16> loc(#loc123)
        %467 = tosa.transpose %466, %462 : (tensor<1x12x20x480xbf16>, tensor<4xi32>) -> tensor<1x480x12x20xbf16> loc(#loc123)
        xten_nn.output %467 : tensor<1x480x12x20xbf16> loc(#loc123)
      } -> tensor<1x480x12x20xbf16> loc(#loc123)
      xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc123)
    } -> tensor<1x480x12x20xbf16> loc(#loc123)
    %266 = xten_nn.subgraph (%arg5 = %265: tensor<1x480x12x20xbf16>)  attributes {
      LayerName = "Add_175",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1018",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_175",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "591",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x12x20xbf16>)  attributes {
        LayerName = "Add_175",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1018",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_175",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "591",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_175", OutputName = "Add_175"} : (tensor<1x480x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc124)
        xten_nn.output %463 : tensor<1x480x12x20xbf16> loc(#loc124)
      } -> tensor<1x480x12x20xbf16> loc(#loc124)
      xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc124)
    } -> tensor<1x480x12x20xbf16> loc(#loc124)
    %267 = xten_nn.subgraph (%arg5 = %266: tensor<1x480x12x20xbf16>)  attributes {
      LayerName = "Clip_178",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "591",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_178",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "594",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x12x20xbf16>)  attributes {
        LayerName = "Clip_178",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "591",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_178",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "594",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_178",
          OutputName = "Clip_178",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x480x12x20xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc125)
        xten_nn.output %462 : tensor<1x480x12x20xbf16> loc(#loc125)
      } -> tensor<1x480x12x20xbf16> loc(#loc125)
      xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc125)
    } -> tensor<1x480x12x20xbf16> loc(#loc125)
    %268 = xten_nn.subgraph (%arg5 = %267: tensor<1x480x12x20xbf16>)  attributes {
      LayerName = "Div_180",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "594",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Div_180",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "596",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x12x20xbf16>)  attributes {
        LayerName = "Div_180",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "594",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Div_180",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "596",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_180",
          OutputName = "Div_180",
          shift = 0 : i8} : (tensor<1x480x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc126)
        xten_nn.output %463 : tensor<1x480x12x20xbf16> loc(#loc126)
      } -> tensor<1x480x12x20xbf16> loc(#loc126)
      xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc126)
    } -> tensor<1x480x12x20xbf16> loc(#loc126)
    %269 = xten_nn.subgraph (%arg5 = %265: tensor<1x480x12x20xbf16>, %arg6 = %268: tensor<1x480x12x20xbf16>)  attributes {
      LayerName = "Mul_181",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1018",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "587",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_181",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "597",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x480x12x20xbf16>, %arg8 = %arg6: tensor<1x480x12x20xbf16>)  attributes {
        LayerName = "Mul_181",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1018",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "587",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_181",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "597",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_181",
          OutputName = "Mul_181",
          shift = 0 : i8} : (tensor<1x480x12x20xbf16>, tensor<1x480x12x20xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc127)
        xten_nn.output %462 : tensor<1x480x12x20xbf16> loc(#loc127)
      } -> tensor<1x480x12x20xbf16> loc(#loc127)
      xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc127)
    } -> tensor<1x480x12x20xbf16> loc(#loc127)
    %270 = xten_nn.subgraph (%arg5 = %269: tensor<1x480x12x20xbf16>, %arg6 = %86: tensor<480x1x3x3xbf16>, %arg7 = %85: tensor<480xbf16>)  attributes {
      LayerName = "Conv_182",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "597",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "CMHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1018",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[480, 1, 3, 3]> : vector<4xindex>
        },
        {
          Name = "597",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_182",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1021",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x480x12x20xbf16>, %arg9 = %arg6: tensor<480x1x3x3xbf16>, %arg10 = %arg7: tensor<480xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 1], [1, 1]],
        LayerName = "Conv_182",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "597",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "CMHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1018",
            Port = "data_io.wts",
            SubPort = "wts_data",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[480, 1, 3, 3]> : vector<4xindex>
          },
          {
            Name = "597",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_182",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1021",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "DepthwiseConv2dBf16",
        With = {
          config.act = 0 : ui8,
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.kernel_height = 3 : ui8,
          config.kernel_width = 3 : ui8,
          config.stride = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc128)
        %465 = tosa.transpose %arg9, %464 : (tensor<480x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x480x1xbf16> loc(#loc128)
        %466 = tosa.transpose %arg8, %463 : (tensor<1x480x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x480xbf16> loc(#loc128)
        %467 = tosa.depthwise_conv2d %466, %465, %arg10 {
          PartOfLayerName = "Conv_182",
          PartOfOutputName = "Conv_182",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 1, 1, 1>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x480xbf16>, tensor<3x3x480x1xbf16>, tensor<480xbf16>) -> tensor<1x12x20x480xbf16> loc(#loc128)
        %468 = tosa.transpose %467, %462 : (tensor<1x12x20x480xbf16>, tensor<4xi32>) -> tensor<1x480x12x20xbf16> loc(#loc128)
        xten_nn.output %468 : tensor<1x480x12x20xbf16> loc(#loc128)
      } -> tensor<1x480x12x20xbf16> loc(#loc128)
      xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc128)
    } -> tensor<1x480x12x20xbf16> loc(#loc128)
    %271 = xten_nn.subgraph (%arg5 = %270: tensor<1x480x12x20xbf16>)  attributes {
      LayerName = "Add_184",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1021",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_184",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "601",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x12x20xbf16>)  attributes {
        LayerName = "Add_184",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1021",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_184",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "601",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_184", OutputName = "Add_184"} : (tensor<1x480x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc129)
        xten_nn.output %463 : tensor<1x480x12x20xbf16> loc(#loc129)
      } -> tensor<1x480x12x20xbf16> loc(#loc129)
      xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc129)
    } -> tensor<1x480x12x20xbf16> loc(#loc129)
    %272 = xten_nn.subgraph (%arg5 = %271: tensor<1x480x12x20xbf16>)  attributes {
      LayerName = "Clip_187",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "601",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_187",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "604",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x12x20xbf16>)  attributes {
        LayerName = "Clip_187",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "601",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_187",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "604",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_187",
          OutputName = "Clip_187",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x480x12x20xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc130)
        xten_nn.output %462 : tensor<1x480x12x20xbf16> loc(#loc130)
      } -> tensor<1x480x12x20xbf16> loc(#loc130)
      xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc130)
    } -> tensor<1x480x12x20xbf16> loc(#loc130)
    %273 = xten_nn.subgraph (%arg5 = %272: tensor<1x480x12x20xbf16>)  attributes {
      LayerName = "Div_189",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "604",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Div_189",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "606",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x12x20xbf16>)  attributes {
        LayerName = "Div_189",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "604",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Div_189",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "606",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_189",
          OutputName = "Div_189",
          shift = 0 : i8} : (tensor<1x480x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc131)
        xten_nn.output %463 : tensor<1x480x12x20xbf16> loc(#loc131)
      } -> tensor<1x480x12x20xbf16> loc(#loc131)
      xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc131)
    } -> tensor<1x480x12x20xbf16> loc(#loc131)
    %274 = xten_nn.subgraph (%arg5 = %270: tensor<1x480x12x20xbf16>, %arg6 = %273: tensor<1x480x12x20xbf16>)  attributes {
      LayerName = "Mul_190_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1021",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "597",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_190",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "607",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x480x12x20xbf16>, %arg8 = %arg6: tensor<1x480x12x20xbf16>)  attributes {
        LayerName = "Mul_190_Duplicated#0",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1021",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "597",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_190",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "607",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_190",
          OutputName = "Mul_190",
          shift = 0 : i8} : (tensor<1x480x12x20xbf16>, tensor<1x480x12x20xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc132)
        xten_nn.output %462 : tensor<1x480x12x20xbf16> loc(#loc132)
      } -> tensor<1x480x12x20xbf16> loc(#loc132)
      xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc132)
    } -> tensor<1x480x12x20xbf16> loc(#loc132)
    %275 = xten_nn.subgraph (%arg5 = %274: tensor<1x480x12x20xbf16>)  attributes {
      LayerName = "Mul_190_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1021",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "GlobalAveragePool_191_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "608",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 1, 240]> : vector<4xindex>
        }
      ],
      Specializes = "Transpose4dAdf",
      With = {
        config.aie_arch = "aie2p",
        config.dim_0 = 12 : ui32,
        config.dim_1 = 60 : ui32,
        config.dim_2 = 20 : ui32,
        config.dim_3 = 8 : ui32,
        config.dtype = "bfloat16",
        config.perm = 6 : ui32
      }} {
      %461 = tosa.reshape %arg5 {new_shape = array<i64: 1, 480, 1, 240>} : (tensor<1x480x12x20xbf16>) -> tensor<1x480x1x240xbf16> loc(#loc344)
      xten_nn.output %461 : tensor<1x480x1x240xbf16> loc(#loc344)
    } -> tensor<1x480x1x240xbf16> loc(#loc344)
    %276 = xten_nn.subgraph (%arg5 = %275: tensor<1x480x1x240xbf16>)  attributes {
      LayerName = "GlobalAveragePool_191",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "607",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 1, 240]> : vector<4xindex>
        }
      ],
      OutputName = "GlobalAveragePool_191_Duplicated#1",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "608",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x1x240xbf16>)  attributes {
        LayerName = "GlobalAveragePool_191",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "607",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 1, 240]> : vector<4xindex>
          }
        ],
        OutputName = "GlobalAveragePool_191_Duplicated#1",
        PadValue = 0.000000e+00 : bf16,
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "608",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "ReduceMeanC8Bf16",
        Traits = {
          Reduce = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.full_channel = 480 : ui32,
          config.full_height = 1 : ui32,
          config.full_width = 240 : ui32,
          config.reduce_dim = "W"
        }} {
        %462 = xten_nn.reduce_mean %arg6 {axes = array<i64: 3>, keepdims = 1 : i64} : (tensor<1x480x1x240xbf16>) -> tensor<1x480x1x1xbf16> loc(#loc133)
        xten_nn.output %462 : tensor<1x480x1x1xbf16> loc(#loc133)
      } -> tensor<1x480x1x1xbf16> loc(#loc133)
      xten_nn.output %461 : tensor<1x480x1x1xbf16> loc(#loc133)
    } -> tensor<1x480x1x1xbf16> loc(#loc133)
    %277 = xten_nn.subgraph (%arg5 = %276: tensor<1x480x1x1xbf16>, %arg6 = %84: tensor<120x480x1x1xbf16>, %arg7 = %83: tensor<120xbf16>)  attributes {
      LayerName = "Conv_192",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "608",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex>
        },
        {
          Name = "607",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[120, 480, 1, 1]> : vector<4xindex>
        },
        {
          Name = "backbone.features.11.block.2.fc1.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_193",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "610",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x480x1x1xbf16>, %arg9 = %arg6: tensor<120x480x1x1xbf16>, %arg10 = %arg7: tensor<120xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_192",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "608",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex>
          },
          {
            Name = "607",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[120, 480, 1, 1]> : vector<4xindex>
          },
          {
            Name = "backbone.features.11.block.2.fc1.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_193",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "610",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true,
          NonNegativeOut = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 1 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 0.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 0.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = tosa.reshape %arg9 {new_shape = array<i64: 120, 1, 1, 480>} : (tensor<120x480x1x1xbf16>) -> tensor<120x1x1x480xbf16> loc(#loc345)
        %463 = tosa.reshape %arg8 {new_shape = array<i64: 1, 1, 1, 480>} : (tensor<1x480x1x1xbf16>) -> tensor<1x1x1x480xbf16> loc(#loc345)
        %464 = tosa.conv2d %463, %462, %arg10 {
          PartOfLayerName = "Conv_192",
          PartOfOutputName = "Conv_192",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x1x1x480xbf16>, tensor<120x1x1x480xbf16>, tensor<120xbf16>) -> tensor<1x1x1x120xbf16> loc(#loc134)
        %465 = tosa.clamp %464 {
          LayerName = "Relu_193",
          OutputName = "Relu_193",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x1x1x120xbf16>) -> tensor<1x1x1x120xbf16> loc(#loc135)
        %466 = tosa.reshape %465 {new_shape = array<i64: 1, 120, 1, 1>} : (tensor<1x1x1x120xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc345)
        xten_nn.output %466 : tensor<1x120x1x1xbf16> loc(#loc135)
      } -> tensor<1x120x1x1xbf16> loc(#loc345)
      xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc345)
    } -> tensor<1x120x1x1xbf16> loc(#loc345)
    %278 = xten_nn.subgraph (%arg5 = %277: tensor<1x120x1x1xbf16>, %arg6 = %82: tensor<480x120x1x1xbf16>, %arg7 = %81: tensor<480xbf16>)  attributes {
      LayerName = "Conv_194",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "610",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
        },
        {
          Name = "609",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[480, 120, 1, 1]> : vector<4xindex>
        },
        {
          Name = "backbone.features.11.block.2.fc2.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_194",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "611",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x120x1x1xbf16>, %arg9 = %arg6: tensor<480x120x1x1xbf16>, %arg10 = %arg7: tensor<480xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_194",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "610",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex>
          },
          {
            Name = "609",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[480, 120, 1, 1]> : vector<4xindex>
          },
          {
            Name = "backbone.features.11.block.2.fc2.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_194",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "611",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = tosa.reshape %arg9 {new_shape = array<i64: 480, 1, 1, 120>} : (tensor<480x120x1x1xbf16>) -> tensor<480x1x1x120xbf16> loc(#loc136)
        %463 = tosa.reshape %arg8 {new_shape = array<i64: 1, 1, 1, 120>} : (tensor<1x120x1x1xbf16>) -> tensor<1x1x1x120xbf16> loc(#loc136)
        %464 = tosa.conv2d %463, %462, %arg10 {
          PartOfLayerName = "Conv_194",
          PartOfOutputName = "Conv_194",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x1x1x120xbf16>, tensor<480x1x1x120xbf16>, tensor<480xbf16>) -> tensor<1x1x1x480xbf16> loc(#loc136)
        %465 = tosa.reshape %464 {new_shape = array<i64: 1, 480, 1, 1>} : (tensor<1x1x1x480xbf16>) -> tensor<1x480x1x1xbf16> loc(#loc136)
        xten_nn.output %465 : tensor<1x480x1x1xbf16> loc(#loc136)
      } -> tensor<1x480x1x1xbf16> loc(#loc136)
      xten_nn.output %461 : tensor<1x480x1x1xbf16> loc(#loc136)
    } -> tensor<1x480x1x1xbf16> loc(#loc136)
    %279 = xten_nn.subgraph (%arg5 = %278: tensor<1x480x1x1xbf16>)  attributes {
      LayerName = "Add_196",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "611",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Add_196",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "613",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x1x1xbf16>)  attributes {
        LayerName = "Add_196",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "611",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Add_196",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "613",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_196", OutputName = "Add_196"} : (tensor<1x480x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x480x1x1xbf16> loc(#loc137)
        xten_nn.output %463 : tensor<1x480x1x1xbf16> loc(#loc137)
      } -> tensor<1x480x1x1xbf16> loc(#loc137)
      xten_nn.output %461 : tensor<1x480x1x1xbf16> loc(#loc137)
    } -> tensor<1x480x1x1xbf16> loc(#loc137)
    %280 = xten_nn.subgraph (%arg5 = %279: tensor<1x480x1x1xbf16>)  attributes {
      LayerName = "Clip_199",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "613",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_199",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "616",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x1x1xbf16>)  attributes {
        LayerName = "Clip_199",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "613",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_199",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "616",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_199",
          OutputName = "Clip_199",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x480x1x1xbf16>) -> tensor<1x480x1x1xbf16> loc(#loc138)
        xten_nn.output %462 : tensor<1x480x1x1xbf16> loc(#loc138)
      } -> tensor<1x480x1x1xbf16> loc(#loc138)
      xten_nn.output %461 : tensor<1x480x1x1xbf16> loc(#loc138)
    } -> tensor<1x480x1x1xbf16> loc(#loc138)
    %281 = xten_nn.subgraph (%arg5 = %280: tensor<1x480x1x1xbf16>)  attributes {
      LayerName = "Div_201",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "616",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Div_201",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "618",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x1x1xbf16>)  attributes {
        LayerName = "Div_201",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "616",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Div_201",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "618",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_201",
          OutputName = "Div_201",
          shift = 0 : i8} : (tensor<1x480x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x480x1x1xbf16> loc(#loc139)
        xten_nn.output %463 : tensor<1x480x1x1xbf16> loc(#loc139)
      } -> tensor<1x480x1x1xbf16> loc(#loc139)
      xten_nn.output %461 : tensor<1x480x1x1xbf16> loc(#loc139)
    } -> tensor<1x480x1x1xbf16> loc(#loc139)
    %282 = xten_nn.subgraph (%arg5 = %281: tensor<1x480x1x1xbf16>)  attributes {
      LayerName = "Mul_202_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "618",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_202_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "619",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      Specializes = "TileAdf",
      With = {
        config.aie_arch = "aie2p",
        config.dtype = "bfloat16",
        config.i_dim_c = 480 : ui32,
        config.i_dim_h = 1 : ui32,
        config.i_dim_n = 1 : ui32,
        config.i_dim_w = 1 : ui32,
        config.rep_dim_c = 1 : ui32,
        config.rep_dim_h = 12 : ui32,
        config.rep_dim_w = 20 : ui32
      }} {
      %461 = tosa.tile %arg5 {multiples = array<i64: 1, 1, 12, 20>} : (tensor<1x480x1x1xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc140)
      xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc140)
    } -> tensor<1x480x12x20xbf16> loc(#loc140)
    %283 = xten_nn.subgraph (%arg5 = %282: tensor<1x480x12x20xbf16>, %arg6 = %274: tensor<1x480x12x20xbf16>)  attributes {
      LayerName = "Mul_202_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "618",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "616",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_202_Duplicated#1",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "619",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x480x12x20xbf16>, %arg8 = %arg6: tensor<1x480x12x20xbf16>)  attributes {
        LayerName = "Mul_202_Duplicated#1",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "618",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "616",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_202_Duplicated#1",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "619",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_202",
          OutputName = "Mul_202",
          shift = 0 : i8} : (tensor<1x480x12x20xbf16>, tensor<1x480x12x20xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc140)
        xten_nn.output %462 : tensor<1x480x12x20xbf16> loc(#loc140)
      } -> tensor<1x480x12x20xbf16> loc(#loc140)
      xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc140)
    } -> tensor<1x480x12x20xbf16> loc(#loc140)
    %284 = xten_nn.subgraph (%arg5 = %283: tensor<1x480x12x20xbf16>, %arg6 = %80: tensor<112x480x1x1xbf16>, %arg7 = %79: tensor<112xbf16>)  attributes {
      LayerName = "Conv_203",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "619",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
        },
        {
          Name = "618",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[112, 480, 1, 1]> : vector<4xindex>
        },
        {
          Name = "619",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_203",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1024",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x480x12x20xbf16>, %arg9 = %arg6: tensor<112x480x1x1xbf16>, %arg10 = %arg7: tensor<112xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_203",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "619",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex>
          },
          {
            Name = "618",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[112, 480, 1, 1]> : vector<4xindex>
          },
          {
            Name = "619",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_203",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1024",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc141)
        %464 = tosa.reshape %arg9 {new_shape = array<i64: 112, 1, 1, 480>} : (tensor<112x480x1x1xbf16>) -> tensor<112x1x1x480xbf16> loc(#loc141)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x480x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x480xbf16> loc(#loc141)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_203",
          PartOfOutputName = "Conv_203",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x480xbf16>, tensor<112x1x1x480xbf16>, tensor<112xbf16>) -> tensor<1x12x20x112xbf16> loc(#loc141)
        %467 = tosa.transpose %466, %462 : (tensor<1x12x20x112xbf16>, tensor<4xi32>) -> tensor<1x112x12x20xbf16> loc(#loc141)
        xten_nn.output %467 : tensor<1x112x12x20xbf16> loc(#loc141)
      } -> tensor<1x112x12x20xbf16> loc(#loc141)
      xten_nn.output %461 : tensor<1x112x12x20xbf16> loc(#loc141)
    } -> tensor<1x112x12x20xbf16> loc(#loc141)
    %285 = xten_nn.subgraph (%arg5 = %284: tensor<1x112x12x20xbf16>, %arg6 = %78: tensor<672x112x1x1xbf16>, %arg7 = %77: tensor<672xbf16>)  attributes {
      LayerName = "Conv_204",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1024",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex>
        },
        {
          Name = "619",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[672, 112, 1, 1]> : vector<4xindex>
        },
        {
          Name = "1028",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_204",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1027",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x112x12x20xbf16>, %arg9 = %arg6: tensor<672x112x1x1xbf16>, %arg10 = %arg7: tensor<672xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_204",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1024",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex>
          },
          {
            Name = "619",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[672, 112, 1, 1]> : vector<4xindex>
          },
          {
            Name = "1028",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_204",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1027",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc142)
        %464 = tosa.reshape %arg9 {new_shape = array<i64: 672, 1, 1, 112>} : (tensor<672x112x1x1xbf16>) -> tensor<672x1x1x112xbf16> loc(#loc142)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x112x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x112xbf16> loc(#loc142)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_204",
          PartOfOutputName = "Conv_204",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x112xbf16>, tensor<672x1x1x112xbf16>, tensor<672xbf16>) -> tensor<1x12x20x672xbf16> loc(#loc142)
        %467 = tosa.transpose %466, %462 : (tensor<1x12x20x672xbf16>, tensor<4xi32>) -> tensor<1x672x12x20xbf16> loc(#loc142)
        xten_nn.output %467 : tensor<1x672x12x20xbf16> loc(#loc142)
      } -> tensor<1x672x12x20xbf16> loc(#loc142)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc142)
    } -> tensor<1x672x12x20xbf16> loc(#loc142)
    %286 = xten_nn.subgraph (%arg5 = %285: tensor<1x672x12x20xbf16>)  attributes {
      LayerName = "Add_206",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1027",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_206",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "625",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>)  attributes {
        LayerName = "Add_206",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1027",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_206",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "625",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_206", OutputName = "Add_206"} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc143)
        xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc143)
      } -> tensor<1x672x12x20xbf16> loc(#loc143)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc143)
    } -> tensor<1x672x12x20xbf16> loc(#loc143)
    %287 = xten_nn.subgraph (%arg5 = %286: tensor<1x672x12x20xbf16>)  attributes {
      LayerName = "Clip_209",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "625",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_209",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "628",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>)  attributes {
        LayerName = "Clip_209",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "625",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_209",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "628",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_209",
          OutputName = "Clip_209",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc144)
        xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc144)
      } -> tensor<1x672x12x20xbf16> loc(#loc144)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc144)
    } -> tensor<1x672x12x20xbf16> loc(#loc144)
    %288 = xten_nn.subgraph (%arg5 = %287: tensor<1x672x12x20xbf16>)  attributes {
      LayerName = "Div_211",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "628",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Div_211",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "630",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>)  attributes {
        LayerName = "Div_211",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "628",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Div_211",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "630",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_211",
          OutputName = "Div_211",
          shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc145)
        xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc145)
      } -> tensor<1x672x12x20xbf16> loc(#loc145)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc145)
    } -> tensor<1x672x12x20xbf16> loc(#loc145)
    %289 = xten_nn.subgraph (%arg5 = %285: tensor<1x672x12x20xbf16>, %arg6 = %288: tensor<1x672x12x20xbf16>)  attributes {
      LayerName = "Mul_212",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1027",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1024",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_212",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "631",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x672x12x20xbf16>, %arg8 = %arg6: tensor<1x672x12x20xbf16>)  attributes {
        LayerName = "Mul_212",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1027",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1024",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_212",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "631",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_212",
          OutputName = "Mul_212",
          shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc146)
        xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc146)
      } -> tensor<1x672x12x20xbf16> loc(#loc146)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc146)
    } -> tensor<1x672x12x20xbf16> loc(#loc146)
    %290 = xten_nn.subgraph (%arg5 = %289: tensor<1x672x12x20xbf16>, %arg6 = %76: tensor<672x1x3x3xbf16>, %arg7 = %75: tensor<672xbf16>)  attributes {
      LayerName = "Conv_213",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "631",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "CMHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1027",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[672, 1, 3, 3]> : vector<4xindex>
        },
        {
          Name = "631",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_213",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1030",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x672x12x20xbf16>, %arg9 = %arg6: tensor<672x1x3x3xbf16>, %arg10 = %arg7: tensor<672xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 1], [1, 1]],
        LayerName = "Conv_213",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "631",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "CMHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1027",
            Port = "data_io.wts",
            SubPort = "wts_data",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[672, 1, 3, 3]> : vector<4xindex>
          },
          {
            Name = "631",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_213",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1030",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "DepthwiseConv2dBf16",
        With = {
          config.act = 0 : ui8,
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.kernel_height = 3 : ui8,
          config.kernel_width = 3 : ui8,
          config.stride = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc147)
        %465 = tosa.transpose %arg9, %464 : (tensor<672x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x672x1xbf16> loc(#loc147)
        %466 = tosa.transpose %arg8, %463 : (tensor<1x672x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x672xbf16> loc(#loc147)
        %467 = tosa.depthwise_conv2d %466, %465, %arg10 {
          PartOfLayerName = "Conv_213",
          PartOfOutputName = "Conv_213",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 1, 1, 1>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x672xbf16>, tensor<3x3x672x1xbf16>, tensor<672xbf16>) -> tensor<1x12x20x672xbf16> loc(#loc147)
        %468 = tosa.transpose %467, %462 : (tensor<1x12x20x672xbf16>, tensor<4xi32>) -> tensor<1x672x12x20xbf16> loc(#loc147)
        xten_nn.output %468 : tensor<1x672x12x20xbf16> loc(#loc147)
      } -> tensor<1x672x12x20xbf16> loc(#loc147)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc147)
    } -> tensor<1x672x12x20xbf16> loc(#loc147)
    %291 = xten_nn.subgraph (%arg5 = %290: tensor<1x672x12x20xbf16>)  attributes {
      LayerName = "Add_215",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1030",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_215",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "635",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>)  attributes {
        LayerName = "Add_215",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1030",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_215",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "635",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_215", OutputName = "Add_215"} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc148)
        xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc148)
      } -> tensor<1x672x12x20xbf16> loc(#loc148)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc148)
    } -> tensor<1x672x12x20xbf16> loc(#loc148)
    %292 = xten_nn.subgraph (%arg5 = %291: tensor<1x672x12x20xbf16>)  attributes {
      LayerName = "Clip_218",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "635",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_218",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "638",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>)  attributes {
        LayerName = "Clip_218",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "635",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_218",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "638",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_218",
          OutputName = "Clip_218",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc149)
        xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc149)
      } -> tensor<1x672x12x20xbf16> loc(#loc149)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc149)
    } -> tensor<1x672x12x20xbf16> loc(#loc149)
    %293 = xten_nn.subgraph (%arg5 = %292: tensor<1x672x12x20xbf16>)  attributes {
      LayerName = "Div_220",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "638",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Div_220",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "640",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>)  attributes {
        LayerName = "Div_220",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "638",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Div_220",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "640",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_220",
          OutputName = "Div_220",
          shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc150)
        xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc150)
      } -> tensor<1x672x12x20xbf16> loc(#loc150)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc150)
    } -> tensor<1x672x12x20xbf16> loc(#loc150)
    %294 = xten_nn.subgraph (%arg5 = %290: tensor<1x672x12x20xbf16>, %arg6 = %293: tensor<1x672x12x20xbf16>)  attributes {
      LayerName = "Mul_221_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1030",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "631",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_221",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "641",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x672x12x20xbf16>, %arg8 = %arg6: tensor<1x672x12x20xbf16>)  attributes {
        LayerName = "Mul_221_Duplicated#0",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1030",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "631",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_221",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "641",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_221",
          OutputName = "Mul_221",
          shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc151)
        xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc151)
      } -> tensor<1x672x12x20xbf16> loc(#loc151)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc151)
    } -> tensor<1x672x12x20xbf16> loc(#loc151)
    %295 = xten_nn.subgraph (%arg5 = %294: tensor<1x672x12x20xbf16>)  attributes {
      LayerName = "Mul_221_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1030",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "GlobalAveragePool_222_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "642",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 240]> : vector<4xindex>
        }
      ],
      Specializes = "Transpose4dAdf",
      With = {
        config.aie_arch = "aie2p",
        config.dim_0 = 12 : ui32,
        config.dim_1 = 84 : ui32,
        config.dim_2 = 20 : ui32,
        config.dim_3 = 8 : ui32,
        config.dtype = "bfloat16",
        config.perm = 6 : ui32
      }} {
      %461 = tosa.reshape %arg5 {new_shape = array<i64: 1, 672, 1, 240>} : (tensor<1x672x12x20xbf16>) -> tensor<1x672x1x240xbf16> loc(#loc346)
      xten_nn.output %461 : tensor<1x672x1x240xbf16> loc(#loc346)
    } -> tensor<1x672x1x240xbf16> loc(#loc346)
    %296 = xten_nn.subgraph (%arg5 = %295: tensor<1x672x1x240xbf16>)  attributes {
      LayerName = "GlobalAveragePool_222",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "641",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 240]> : vector<4xindex>
        }
      ],
      OutputName = "GlobalAveragePool_222_Duplicated#1",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "642",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x240xbf16>)  attributes {
        LayerName = "GlobalAveragePool_222",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "641",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 1, 240]> : vector<4xindex>
          }
        ],
        OutputName = "GlobalAveragePool_222_Duplicated#1",
        PadValue = 0.000000e+00 : bf16,
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "642",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "ReduceMeanC8Bf16",
        Traits = {
          Reduce = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.full_channel = 672 : ui32,
          config.full_height = 1 : ui32,
          config.full_width = 240 : ui32,
          config.reduce_dim = "W"
        }} {
        %462 = xten_nn.reduce_mean %arg6 {axes = array<i64: 3>, keepdims = 1 : i64} : (tensor<1x672x1x240xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc152)
        xten_nn.output %462 : tensor<1x672x1x1xbf16> loc(#loc152)
      } -> tensor<1x672x1x1xbf16> loc(#loc152)
      xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc152)
    } -> tensor<1x672x1x1xbf16> loc(#loc152)
    %297 = xten_nn.subgraph (%arg5 = %296: tensor<1x672x1x1xbf16>, %arg6 = %74: tensor<168x672x1x1xbf16>, %arg7 = %73: tensor<168xbf16>)  attributes {
      LayerName = "Conv_223",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "642",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
        },
        {
          Name = "641",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[168, 672, 1, 1]> : vector<4xindex>
        },
        {
          Name = "backbone.features.12.block.2.fc1.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_224",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "644",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x672x1x1xbf16>, %arg9 = %arg6: tensor<168x672x1x1xbf16>, %arg10 = %arg7: tensor<168xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_223",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "642",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
          },
          {
            Name = "641",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[168, 672, 1, 1]> : vector<4xindex>
          },
          {
            Name = "backbone.features.12.block.2.fc1.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_224",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "644",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true,
          NonNegativeOut = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 1 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 0.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 0.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = tosa.reshape %arg9 {new_shape = array<i64: 168, 1, 1, 672>} : (tensor<168x672x1x1xbf16>) -> tensor<168x1x1x672xbf16> loc(#loc347)
        %463 = tosa.reshape %arg8 {new_shape = array<i64: 1, 1, 1, 672>} : (tensor<1x672x1x1xbf16>) -> tensor<1x1x1x672xbf16> loc(#loc347)
        %464 = tosa.conv2d %463, %462, %arg10 {
          PartOfLayerName = "Conv_223",
          PartOfOutputName = "Conv_223",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x1x1x672xbf16>, tensor<168x1x1x672xbf16>, tensor<168xbf16>) -> tensor<1x1x1x168xbf16> loc(#loc153)
        %465 = tosa.clamp %464 {
          LayerName = "Relu_224",
          OutputName = "Relu_224",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x1x1x168xbf16>) -> tensor<1x1x1x168xbf16> loc(#loc154)
        %466 = tosa.reshape %465 {new_shape = array<i64: 1, 168, 1, 1>} : (tensor<1x1x1x168xbf16>) -> tensor<1x168x1x1xbf16> loc(#loc347)
        xten_nn.output %466 : tensor<1x168x1x1xbf16> loc(#loc154)
      } -> tensor<1x168x1x1xbf16> loc(#loc347)
      xten_nn.output %461 : tensor<1x168x1x1xbf16> loc(#loc347)
    } -> tensor<1x168x1x1xbf16> loc(#loc347)
    %298 = xten_nn.subgraph (%arg5 = %297: tensor<1x168x1x1xbf16>, %arg6 = %72: tensor<672x168x1x1xbf16>, %arg7 = %71: tensor<672xbf16>)  attributes {
      LayerName = "Conv_225",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "644",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex>
        },
        {
          Name = "643",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[672, 168, 1, 1]> : vector<4xindex>
        },
        {
          Name = "backbone.features.12.block.2.fc2.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_225",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "645",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x168x1x1xbf16>, %arg9 = %arg6: tensor<672x168x1x1xbf16>, %arg10 = %arg7: tensor<672xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_225",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "644",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex>
          },
          {
            Name = "643",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[672, 168, 1, 1]> : vector<4xindex>
          },
          {
            Name = "backbone.features.12.block.2.fc2.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_225",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "645",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = tosa.reshape %arg9 {new_shape = array<i64: 672, 1, 1, 168>} : (tensor<672x168x1x1xbf16>) -> tensor<672x1x1x168xbf16> loc(#loc155)
        %463 = tosa.reshape %arg8 {new_shape = array<i64: 1, 1, 1, 168>} : (tensor<1x168x1x1xbf16>) -> tensor<1x1x1x168xbf16> loc(#loc155)
        %464 = tosa.conv2d %463, %462, %arg10 {
          PartOfLayerName = "Conv_225",
          PartOfOutputName = "Conv_225",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x1x1x168xbf16>, tensor<672x1x1x168xbf16>, tensor<672xbf16>) -> tensor<1x1x1x672xbf16> loc(#loc155)
        %465 = tosa.reshape %464 {new_shape = array<i64: 1, 672, 1, 1>} : (tensor<1x1x1x672xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc155)
        xten_nn.output %465 : tensor<1x672x1x1xbf16> loc(#loc155)
      } -> tensor<1x672x1x1xbf16> loc(#loc155)
      xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc155)
    } -> tensor<1x672x1x1xbf16> loc(#loc155)
    %299 = xten_nn.subgraph (%arg5 = %298: tensor<1x672x1x1xbf16>)  attributes {
      LayerName = "Add_227",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "645",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Add_227",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "647",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x1xbf16>)  attributes {
        LayerName = "Add_227",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "645",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Add_227",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "647",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_227", OutputName = "Add_227"} : (tensor<1x672x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc156)
        xten_nn.output %463 : tensor<1x672x1x1xbf16> loc(#loc156)
      } -> tensor<1x672x1x1xbf16> loc(#loc156)
      xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc156)
    } -> tensor<1x672x1x1xbf16> loc(#loc156)
    %300 = xten_nn.subgraph (%arg5 = %299: tensor<1x672x1x1xbf16>)  attributes {
      LayerName = "Clip_230",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "647",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_230",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "650",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x1xbf16>)  attributes {
        LayerName = "Clip_230",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "647",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_230",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "650",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_230",
          OutputName = "Clip_230",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x672x1x1xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc157)
        xten_nn.output %462 : tensor<1x672x1x1xbf16> loc(#loc157)
      } -> tensor<1x672x1x1xbf16> loc(#loc157)
      xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc157)
    } -> tensor<1x672x1x1xbf16> loc(#loc157)
    %301 = xten_nn.subgraph (%arg5 = %300: tensor<1x672x1x1xbf16>)  attributes {
      LayerName = "Div_232",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "650",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Div_232",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "652",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x1xbf16>)  attributes {
        LayerName = "Div_232",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "650",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Div_232",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "652",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_232",
          OutputName = "Div_232",
          shift = 0 : i8} : (tensor<1x672x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc158)
        xten_nn.output %463 : tensor<1x672x1x1xbf16> loc(#loc158)
      } -> tensor<1x672x1x1xbf16> loc(#loc158)
      xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc158)
    } -> tensor<1x672x1x1xbf16> loc(#loc158)
    %302 = xten_nn.subgraph (%arg5 = %301: tensor<1x672x1x1xbf16>)  attributes {
      LayerName = "Mul_233_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "652",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_233_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "653",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      Specializes = "TileAdf",
      With = {
        config.aie_arch = "aie2p",
        config.dtype = "bfloat16",
        config.i_dim_c = 672 : ui32,
        config.i_dim_h = 1 : ui32,
        config.i_dim_n = 1 : ui32,
        config.i_dim_w = 1 : ui32,
        config.rep_dim_c = 1 : ui32,
        config.rep_dim_h = 12 : ui32,
        config.rep_dim_w = 20 : ui32
      }} {
      %461 = tosa.tile %arg5 {multiples = array<i64: 1, 1, 12, 20>} : (tensor<1x672x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc159)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc159)
    } -> tensor<1x672x12x20xbf16> loc(#loc159)
    %303 = xten_nn.subgraph (%arg5 = %302: tensor<1x672x12x20xbf16>, %arg6 = %294: tensor<1x672x12x20xbf16>)  attributes {
      LayerName = "Mul_233_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "652",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "650",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_233_Duplicated#1",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "653",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x672x12x20xbf16>, %arg8 = %arg6: tensor<1x672x12x20xbf16>)  attributes {
        LayerName = "Mul_233_Duplicated#1",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "652",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "650",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_233_Duplicated#1",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "653",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_233",
          OutputName = "Mul_233",
          shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc159)
        xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc159)
      } -> tensor<1x672x12x20xbf16> loc(#loc159)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc159)
    } -> tensor<1x672x12x20xbf16> loc(#loc159)
    %304 = xten_nn.subgraph (%arg5 = %303: tensor<1x672x12x20xbf16>, %arg6 = %70: tensor<112x672x1x1xbf16>, %arg7 = %69: tensor<112xbf16>, %arg8 = %284: tensor<1x112x12x20xbf16>)  attributes {
      LayerName = "Conv_234",
      OfmShare = 3 : index,
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "653",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        },
        {
          Name = "652",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[112, 672, 1, 1]> : vector<4xindex>
        },
        {
          Name = "653",
          UnknownDataFormat = true
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "653",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_235",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "656",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x672x12x20xbf16>, %arg10 = %arg6: tensor<112x672x1x1xbf16>, %arg11 = %arg7: tensor<112xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_234",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "653",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          },
          {
            Name = "652",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[112, 672, 1, 1]> : vector<4xindex>
          },
          {
            Name = "653",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_234",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1033",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc160)
        %465 = tosa.reshape %arg10 {new_shape = array<i64: 112, 1, 1, 672>} : (tensor<112x672x1x1xbf16>) -> tensor<112x1x1x672xbf16> loc(#loc160)
        %466 = tosa.transpose %arg9, %464 : (tensor<1x672x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x672xbf16> loc(#loc160)
        %467 = tosa.conv2d %466, %465, %arg11 {
          PartOfLayerName = "Conv_234",
          PartOfOutputName = "Conv_234",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x672xbf16>, tensor<112x1x1x672xbf16>, tensor<112xbf16>) -> tensor<1x12x20x112xbf16> loc(#loc160)
        %468 = tosa.transpose %467, %463 : (tensor<1x12x20x112xbf16>, tensor<4xi32>) -> tensor<1x112x12x20xbf16> loc(#loc160)
        xten_nn.output %468 : tensor<1x112x12x20xbf16> loc(#loc160)
      } -> tensor<1x112x12x20xbf16> loc(#loc160)
      %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x112x12x20xbf16>, %arg10 = %arg8: tensor<1x112x12x20xbf16>)  attributes {
        LayerName = "Add_235",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1033",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "653",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_235",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "656",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.act = 0 : ui8,
          config.act_type = "LINEAR",
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %463 = tosa.add %arg9, %arg10 {LayerName = "Add_235", OutputName = "Add_235"} : (tensor<1x112x12x20xbf16>, tensor<1x112x12x20xbf16>) -> tensor<1x112x12x20xbf16> loc(#loc161)
        xten_nn.output %463 : tensor<1x112x12x20xbf16> loc(#loc161)
      } -> tensor<1x112x12x20xbf16> loc(#loc161)
      xten_nn.output %462 : tensor<1x112x12x20xbf16> loc(#loc161)
    } -> tensor<1x112x12x20xbf16> loc(#loc348)
    %305 = xten_nn.subgraph (%arg5 = %304: tensor<1x112x12x20xbf16>, %arg6 = %68: tensor<672x112x1x1xbf16>, %arg7 = %67: tensor<672xbf16>)  attributes {
      LayerName = "Conv_236",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "656",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex>
        },
        {
          Name = "1033",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[672, 112, 1, 1]> : vector<4xindex>
        },
        {
          Name = "656",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_236",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1036",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x112x12x20xbf16>, %arg9 = %arg6: tensor<672x112x1x1xbf16>, %arg10 = %arg7: tensor<672xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_236",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "656",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex>
          },
          {
            Name = "1033",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[672, 112, 1, 1]> : vector<4xindex>
          },
          {
            Name = "656",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_236",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1036",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc162)
        %464 = tosa.reshape %arg9 {new_shape = array<i64: 672, 1, 1, 112>} : (tensor<672x112x1x1xbf16>) -> tensor<672x1x1x112xbf16> loc(#loc162)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x112x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x112xbf16> loc(#loc162)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_236",
          PartOfOutputName = "Conv_236",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x112xbf16>, tensor<672x1x1x112xbf16>, tensor<672xbf16>) -> tensor<1x12x20x672xbf16> loc(#loc162)
        %467 = tosa.transpose %466, %462 : (tensor<1x12x20x672xbf16>, tensor<4xi32>) -> tensor<1x672x12x20xbf16> loc(#loc162)
        xten_nn.output %467 : tensor<1x672x12x20xbf16> loc(#loc162)
      } -> tensor<1x672x12x20xbf16> loc(#loc162)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc162)
    } -> tensor<1x672x12x20xbf16> loc(#loc162)
    %306 = xten_nn.subgraph (%arg5 = %305: tensor<1x672x12x20xbf16>)  attributes {
      LayerName = "Add_238",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1036",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_238",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "660",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>)  attributes {
        LayerName = "Add_238",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1036",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_238",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "660",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_238", OutputName = "Add_238"} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc163)
        xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc163)
      } -> tensor<1x672x12x20xbf16> loc(#loc163)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc163)
    } -> tensor<1x672x12x20xbf16> loc(#loc163)
    %307 = xten_nn.subgraph (%arg5 = %306: tensor<1x672x12x20xbf16>)  attributes {
      LayerName = "Clip_241",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "660",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_241",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "663",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>)  attributes {
        LayerName = "Clip_241",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "660",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_241",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "663",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_241",
          OutputName = "Clip_241",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc164)
        xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc164)
      } -> tensor<1x672x12x20xbf16> loc(#loc164)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc164)
    } -> tensor<1x672x12x20xbf16> loc(#loc164)
    %308 = xten_nn.subgraph (%arg5 = %307: tensor<1x672x12x20xbf16>)  attributes {
      LayerName = "Div_243",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "663",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Div_243",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "665",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>)  attributes {
        LayerName = "Div_243",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "663",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Div_243",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "665",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_243",
          OutputName = "Div_243",
          shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc165)
        xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc165)
      } -> tensor<1x672x12x20xbf16> loc(#loc165)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc165)
    } -> tensor<1x672x12x20xbf16> loc(#loc165)
    %309 = xten_nn.subgraph (%arg5 = %305: tensor<1x672x12x20xbf16>, %arg6 = %308: tensor<1x672x12x20xbf16>)  attributes {
      LayerName = "Mul_244",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1036",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "656",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_244",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "666",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x672x12x20xbf16>, %arg8 = %arg6: tensor<1x672x12x20xbf16>)  attributes {
        LayerName = "Mul_244",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1036",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "656",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_244",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "666",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_244",
          OutputName = "Mul_244",
          shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc166)
        xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc166)
      } -> tensor<1x672x12x20xbf16> loc(#loc166)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc166)
    } -> tensor<1x672x12x20xbf16> loc(#loc166)
    %310 = xten_nn.subgraph (%arg5 = %309: tensor<1x672x12x20xbf16>, %arg6 = %66: tensor<672x1x9x9xbf16>, %arg7 = %65: tensor<672xbf16>)  attributes {
      LayerName = "Conv_245",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "666",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "CMHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1036",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[672, 1, 9, 9]> : vector<4xindex>
        },
        {
          Name = "666",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_245",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1039",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x672x12x20xbf16>, %arg9 = %arg6: tensor<672x1x9x9xbf16>, %arg10 = %arg7: tensor<672xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[4, 4], [4, 4]],
        LayerName = "Conv_245",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "666",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "CMHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1036",
            Port = "data_io.wts",
            SubPort = "wts_data",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[672, 1, 9, 9]> : vector<4xindex>
          },
          {
            Name = "666",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_245",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1039",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "DepthwiseConv2dBf16",
        With = {
          config.act = 0 : ui8,
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.kernel_height = 9 : ui8,
          config.kernel_width = 9 : ui8,
          config.stride = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc167)
        %465 = tosa.transpose %arg9, %464 : (tensor<672x1x9x9xbf16>, tensor<4xi32>) -> tensor<9x9x672x1xbf16> loc(#loc167)
        %466 = tosa.transpose %arg8, %463 : (tensor<1x672x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x672xbf16> loc(#loc167)
        %467 = tosa.depthwise_conv2d %466, %465, %arg10 {
          PartOfLayerName = "Conv_245",
          PartOfOutputName = "Conv_245",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 4, 4, 4, 4>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x672xbf16>, tensor<9x9x672x1xbf16>, tensor<672xbf16>) -> tensor<1x12x20x672xbf16> loc(#loc167)
        %468 = tosa.transpose %467, %462 : (tensor<1x12x20x672xbf16>, tensor<4xi32>) -> tensor<1x672x12x20xbf16> loc(#loc167)
        xten_nn.output %468 : tensor<1x672x12x20xbf16> loc(#loc167)
      } -> tensor<1x672x12x20xbf16> loc(#loc167)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc167)
    } -> tensor<1x672x12x20xbf16> loc(#loc167)
    %311 = xten_nn.subgraph (%arg5 = %310: tensor<1x672x12x20xbf16>)  attributes {
      LayerName = "Add_247",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1039",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_247",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "670",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>)  attributes {
        LayerName = "Add_247",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1039",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_247",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "670",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_247", OutputName = "Add_247"} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc168)
        xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc168)
      } -> tensor<1x672x12x20xbf16> loc(#loc168)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc168)
    } -> tensor<1x672x12x20xbf16> loc(#loc168)
    %312 = xten_nn.subgraph (%arg5 = %311: tensor<1x672x12x20xbf16>)  attributes {
      LayerName = "Clip_250",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "670",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_250",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "673",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>)  attributes {
        LayerName = "Clip_250",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "670",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_250",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "673",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_250",
          OutputName = "Clip_250",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc169)
        xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc169)
      } -> tensor<1x672x12x20xbf16> loc(#loc169)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc169)
    } -> tensor<1x672x12x20xbf16> loc(#loc169)
    %313 = xten_nn.subgraph (%arg5 = %312: tensor<1x672x12x20xbf16>)  attributes {
      LayerName = "Div_252",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "673",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Div_252",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "675",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>)  attributes {
        LayerName = "Div_252",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "673",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Div_252",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "675",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_252",
          OutputName = "Div_252",
          shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc170)
        xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc170)
      } -> tensor<1x672x12x20xbf16> loc(#loc170)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc170)
    } -> tensor<1x672x12x20xbf16> loc(#loc170)
    %314 = xten_nn.subgraph (%arg5 = %310: tensor<1x672x12x20xbf16>, %arg6 = %313: tensor<1x672x12x20xbf16>)  attributes {
      LayerName = "Mul_253_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1039",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "666",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_253",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "676",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x672x12x20xbf16>, %arg8 = %arg6: tensor<1x672x12x20xbf16>)  attributes {
        LayerName = "Mul_253_Duplicated#0",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1039",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "666",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_253",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "676",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_253",
          OutputName = "Mul_253",
          shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc171)
        xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc171)
      } -> tensor<1x672x12x20xbf16> loc(#loc171)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc171)
    } -> tensor<1x672x12x20xbf16> loc(#loc171)
    %315 = xten_nn.subgraph (%arg5 = %314: tensor<1x672x12x20xbf16>)  attributes {
      LayerName = "Mul_253_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1039",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "GlobalAveragePool_254_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "677",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 240]> : vector<4xindex>
        }
      ],
      Specializes = "Transpose4dAdf",
      With = {
        config.aie_arch = "aie2p",
        config.dim_0 = 12 : ui32,
        config.dim_1 = 84 : ui32,
        config.dim_2 = 20 : ui32,
        config.dim_3 = 8 : ui32,
        config.dtype = "bfloat16",
        config.perm = 6 : ui32
      }} {
      %461 = tosa.reshape %arg5 {new_shape = array<i64: 1, 672, 1, 240>} : (tensor<1x672x12x20xbf16>) -> tensor<1x672x1x240xbf16> loc(#loc349)
      xten_nn.output %461 : tensor<1x672x1x240xbf16> loc(#loc349)
    } -> tensor<1x672x1x240xbf16> loc(#loc349)
    %316 = xten_nn.subgraph (%arg5 = %315: tensor<1x672x1x240xbf16>)  attributes {
      LayerName = "GlobalAveragePool_254",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "676",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 240]> : vector<4xindex>
        }
      ],
      OutputName = "GlobalAveragePool_254_Duplicated#1",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "677",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x240xbf16>)  attributes {
        LayerName = "GlobalAveragePool_254",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "676",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 1, 240]> : vector<4xindex>
          }
        ],
        OutputName = "GlobalAveragePool_254_Duplicated#1",
        PadValue = 0.000000e+00 : bf16,
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "677",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "ReduceMeanC8Bf16",
        Traits = {
          Reduce = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.full_channel = 672 : ui32,
          config.full_height = 1 : ui32,
          config.full_width = 240 : ui32,
          config.reduce_dim = "W"
        }} {
        %462 = xten_nn.reduce_mean %arg6 {axes = array<i64: 3>, keepdims = 1 : i64} : (tensor<1x672x1x240xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc172)
        xten_nn.output %462 : tensor<1x672x1x1xbf16> loc(#loc172)
      } -> tensor<1x672x1x1xbf16> loc(#loc172)
      xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc172)
    } -> tensor<1x672x1x1xbf16> loc(#loc172)
    %317 = xten_nn.subgraph (%arg5 = %316: tensor<1x672x1x1xbf16>, %arg6 = %64: tensor<168x672x1x1xbf16>, %arg7 = %63: tensor<168xbf16>)  attributes {
      LayerName = "Conv_255",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "677",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
        },
        {
          Name = "676",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[168, 672, 1, 1]> : vector<4xindex>
        },
        {
          Name = "backbone.features.13.block.2.fc1.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_256",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "679",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x672x1x1xbf16>, %arg9 = %arg6: tensor<168x672x1x1xbf16>, %arg10 = %arg7: tensor<168xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_255",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "677",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
          },
          {
            Name = "676",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[168, 672, 1, 1]> : vector<4xindex>
          },
          {
            Name = "backbone.features.13.block.2.fc1.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_256",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "679",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true,
          NonNegativeOut = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 1 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 0.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 0.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = tosa.reshape %arg9 {new_shape = array<i64: 168, 1, 1, 672>} : (tensor<168x672x1x1xbf16>) -> tensor<168x1x1x672xbf16> loc(#loc350)
        %463 = tosa.reshape %arg8 {new_shape = array<i64: 1, 1, 1, 672>} : (tensor<1x672x1x1xbf16>) -> tensor<1x1x1x672xbf16> loc(#loc350)
        %464 = tosa.conv2d %463, %462, %arg10 {
          PartOfLayerName = "Conv_255",
          PartOfOutputName = "Conv_255",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x1x1x672xbf16>, tensor<168x1x1x672xbf16>, tensor<168xbf16>) -> tensor<1x1x1x168xbf16> loc(#loc173)
        %465 = tosa.clamp %464 {
          LayerName = "Relu_256",
          OutputName = "Relu_256",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x1x1x168xbf16>) -> tensor<1x1x1x168xbf16> loc(#loc174)
        %466 = tosa.reshape %465 {new_shape = array<i64: 1, 168, 1, 1>} : (tensor<1x1x1x168xbf16>) -> tensor<1x168x1x1xbf16> loc(#loc350)
        xten_nn.output %466 : tensor<1x168x1x1xbf16> loc(#loc174)
      } -> tensor<1x168x1x1xbf16> loc(#loc350)
      xten_nn.output %461 : tensor<1x168x1x1xbf16> loc(#loc350)
    } -> tensor<1x168x1x1xbf16> loc(#loc350)
    %318 = xten_nn.subgraph (%arg5 = %317: tensor<1x168x1x1xbf16>, %arg6 = %62: tensor<672x168x1x1xbf16>, %arg7 = %61: tensor<672xbf16>)  attributes {
      LayerName = "Conv_257",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "679",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex>
        },
        {
          Name = "678",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[672, 168, 1, 1]> : vector<4xindex>
        },
        {
          Name = "backbone.features.13.block.2.fc2.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_257",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "680",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x168x1x1xbf16>, %arg9 = %arg6: tensor<672x168x1x1xbf16>, %arg10 = %arg7: tensor<672xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_257",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "679",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex>
          },
          {
            Name = "678",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[672, 168, 1, 1]> : vector<4xindex>
          },
          {
            Name = "backbone.features.13.block.2.fc2.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_257",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "680",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = tosa.reshape %arg9 {new_shape = array<i64: 672, 1, 1, 168>} : (tensor<672x168x1x1xbf16>) -> tensor<672x1x1x168xbf16> loc(#loc175)
        %463 = tosa.reshape %arg8 {new_shape = array<i64: 1, 1, 1, 168>} : (tensor<1x168x1x1xbf16>) -> tensor<1x1x1x168xbf16> loc(#loc175)
        %464 = tosa.conv2d %463, %462, %arg10 {
          PartOfLayerName = "Conv_257",
          PartOfOutputName = "Conv_257",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x1x1x168xbf16>, tensor<672x1x1x168xbf16>, tensor<672xbf16>) -> tensor<1x1x1x672xbf16> loc(#loc175)
        %465 = tosa.reshape %464 {new_shape = array<i64: 1, 672, 1, 1>} : (tensor<1x1x1x672xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc175)
        xten_nn.output %465 : tensor<1x672x1x1xbf16> loc(#loc175)
      } -> tensor<1x672x1x1xbf16> loc(#loc175)
      xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc175)
    } -> tensor<1x672x1x1xbf16> loc(#loc175)
    %319 = xten_nn.subgraph (%arg5 = %318: tensor<1x672x1x1xbf16>)  attributes {
      LayerName = "Add_259",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "680",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Add_259",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "682",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x1xbf16>)  attributes {
        LayerName = "Add_259",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "680",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Add_259",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "682",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_259", OutputName = "Add_259"} : (tensor<1x672x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc176)
        xten_nn.output %463 : tensor<1x672x1x1xbf16> loc(#loc176)
      } -> tensor<1x672x1x1xbf16> loc(#loc176)
      xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc176)
    } -> tensor<1x672x1x1xbf16> loc(#loc176)
    %320 = xten_nn.subgraph (%arg5 = %319: tensor<1x672x1x1xbf16>)  attributes {
      LayerName = "Clip_262",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "682",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_262",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "685",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x1xbf16>)  attributes {
        LayerName = "Clip_262",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "682",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_262",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "685",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_262",
          OutputName = "Clip_262",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x672x1x1xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc177)
        xten_nn.output %462 : tensor<1x672x1x1xbf16> loc(#loc177)
      } -> tensor<1x672x1x1xbf16> loc(#loc177)
      xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc177)
    } -> tensor<1x672x1x1xbf16> loc(#loc177)
    %321 = xten_nn.subgraph (%arg5 = %320: tensor<1x672x1x1xbf16>)  attributes {
      LayerName = "Div_264",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "685",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Div_264",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "687",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x1xbf16>)  attributes {
        LayerName = "Div_264",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "685",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Div_264",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "687",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_264",
          OutputName = "Div_264",
          shift = 0 : i8} : (tensor<1x672x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc178)
        xten_nn.output %463 : tensor<1x672x1x1xbf16> loc(#loc178)
      } -> tensor<1x672x1x1xbf16> loc(#loc178)
      xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc178)
    } -> tensor<1x672x1x1xbf16> loc(#loc178)
    %322 = xten_nn.subgraph (%arg5 = %321: tensor<1x672x1x1xbf16>)  attributes {
      LayerName = "Mul_265_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "687",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_265_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "688",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      Specializes = "TileAdf",
      With = {
        config.aie_arch = "aie2p",
        config.dtype = "bfloat16",
        config.i_dim_c = 672 : ui32,
        config.i_dim_h = 1 : ui32,
        config.i_dim_n = 1 : ui32,
        config.i_dim_w = 1 : ui32,
        config.rep_dim_c = 1 : ui32,
        config.rep_dim_h = 12 : ui32,
        config.rep_dim_w = 20 : ui32
      }} {
      %461 = tosa.tile %arg5 {multiples = array<i64: 1, 1, 12, 20>} : (tensor<1x672x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc179)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc179)
    } -> tensor<1x672x12x20xbf16> loc(#loc179)
    %323 = xten_nn.subgraph (%arg5 = %322: tensor<1x672x12x20xbf16>, %arg6 = %314: tensor<1x672x12x20xbf16>)  attributes {
      LayerName = "Mul_265_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "687",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "685",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_265_Duplicated#1",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "688",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x672x12x20xbf16>, %arg8 = %arg6: tensor<1x672x12x20xbf16>)  attributes {
        LayerName = "Mul_265_Duplicated#1",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "687",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "685",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_265_Duplicated#1",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "688",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_265",
          OutputName = "Mul_265",
          shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc179)
        xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc179)
      } -> tensor<1x672x12x20xbf16> loc(#loc179)
      xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc179)
    } -> tensor<1x672x12x20xbf16> loc(#loc179)
    %324 = xten_nn.subgraph (%arg5 = %323: tensor<1x672x12x20xbf16>, %arg6 = %60: tensor<160x672x1x1xbf16>, %arg7 = %59: tensor<160xbf16>)  attributes {
      LayerName = "Conv_266",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "688",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
        },
        {
          Name = "687",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[160, 672, 1, 1]> : vector<4xindex>
        },
        {
          Name = "688",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_266",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1042",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x672x12x20xbf16>, %arg9 = %arg6: tensor<160x672x1x1xbf16>, %arg10 = %arg7: tensor<160xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_266",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "688",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex>
          },
          {
            Name = "687",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[160, 672, 1, 1]> : vector<4xindex>
          },
          {
            Name = "688",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_266",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1042",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc180)
        %464 = tosa.reshape %arg9 {new_shape = array<i64: 160, 1, 1, 672>} : (tensor<160x672x1x1xbf16>) -> tensor<160x1x1x672xbf16> loc(#loc180)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x672x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x672xbf16> loc(#loc180)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_266",
          PartOfOutputName = "Conv_266",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x672xbf16>, tensor<160x1x1x672xbf16>, tensor<160xbf16>) -> tensor<1x12x20x160xbf16> loc(#loc180)
        %467 = tosa.transpose %466, %462 : (tensor<1x12x20x160xbf16>, tensor<4xi32>) -> tensor<1x160x12x20xbf16> loc(#loc180)
        xten_nn.output %467 : tensor<1x160x12x20xbf16> loc(#loc180)
      } -> tensor<1x160x12x20xbf16> loc(#loc180)
      xten_nn.output %461 : tensor<1x160x12x20xbf16> loc(#loc180)
    } -> tensor<1x160x12x20xbf16> loc(#loc180)
    %325 = xten_nn.subgraph (%arg5 = %324: tensor<1x160x12x20xbf16>, %arg6 = %58: tensor<960x160x1x1xbf16>, %arg7 = %57: tensor<960xbf16>)  attributes {
      LayerName = "Conv_267",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1042",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex>
        },
        {
          Name = "688",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[960, 160, 1, 1]> : vector<4xindex>
        },
        {
          Name = "1046",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_267",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1045",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x160x12x20xbf16>, %arg9 = %arg6: tensor<960x160x1x1xbf16>, %arg10 = %arg7: tensor<960xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_267",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1042",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex>
          },
          {
            Name = "688",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[960, 160, 1, 1]> : vector<4xindex>
          },
          {
            Name = "1046",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_267",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1045",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc181)
        %464 = tosa.reshape %arg9 {new_shape = array<i64: 960, 1, 1, 160>} : (tensor<960x160x1x1xbf16>) -> tensor<960x1x1x160xbf16> loc(#loc181)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x160x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x160xbf16> loc(#loc181)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_267",
          PartOfOutputName = "Conv_267",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x160xbf16>, tensor<960x1x1x160xbf16>, tensor<960xbf16>) -> tensor<1x12x20x960xbf16> loc(#loc181)
        %467 = tosa.transpose %466, %462 : (tensor<1x12x20x960xbf16>, tensor<4xi32>) -> tensor<1x960x12x20xbf16> loc(#loc181)
        xten_nn.output %467 : tensor<1x960x12x20xbf16> loc(#loc181)
      } -> tensor<1x960x12x20xbf16> loc(#loc181)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc181)
    } -> tensor<1x960x12x20xbf16> loc(#loc181)
    %326 = xten_nn.subgraph (%arg5 = %325: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Add_269",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1045",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_269",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "694",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Add_269",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1045",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_269",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "694",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_269", OutputName = "Add_269"} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc182)
        xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc182)
      } -> tensor<1x960x12x20xbf16> loc(#loc182)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc182)
    } -> tensor<1x960x12x20xbf16> loc(#loc182)
    %327 = xten_nn.subgraph (%arg5 = %326: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Clip_272",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "694",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_272",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "697",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Clip_272",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "694",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_272",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "697",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_272",
          OutputName = "Clip_272",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc183)
        xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc183)
      } -> tensor<1x960x12x20xbf16> loc(#loc183)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc183)
    } -> tensor<1x960x12x20xbf16> loc(#loc183)
    %328 = xten_nn.subgraph (%arg5 = %327: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Div_274",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "697",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Div_274",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "699",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Div_274",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "697",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Div_274",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "699",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_274",
          OutputName = "Div_274",
          shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc184)
        xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc184)
      } -> tensor<1x960x12x20xbf16> loc(#loc184)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc184)
    } -> tensor<1x960x12x20xbf16> loc(#loc184)
    %329 = xten_nn.subgraph (%arg5 = %325: tensor<1x960x12x20xbf16>, %arg6 = %328: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Mul_275",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1045",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1042",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_275",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "700",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x960x12x20xbf16>, %arg8 = %arg6: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Mul_275",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1045",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1042",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_275",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "700",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_275",
          OutputName = "Mul_275",
          shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc185)
        xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc185)
      } -> tensor<1x960x12x20xbf16> loc(#loc185)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc185)
    } -> tensor<1x960x12x20xbf16> loc(#loc185)
    %330 = xten_nn.subgraph (%arg5 = %329: tensor<1x960x12x20xbf16>, %arg6 = %56: tensor<960x1x9x9xbf16>, %arg7 = %55: tensor<960xbf16>)  attributes {
      LayerName = "Conv_276",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "700",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "CMHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1045",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[960, 1, 9, 9]> : vector<4xindex>
        },
        {
          Name = "700",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_276",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1048",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x960x12x20xbf16>, %arg9 = %arg6: tensor<960x1x9x9xbf16>, %arg10 = %arg7: tensor<960xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[4, 4], [4, 4]],
        LayerName = "Conv_276",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "700",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "CMHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1045",
            Port = "data_io.wts",
            SubPort = "wts_data",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[960, 1, 9, 9]> : vector<4xindex>
          },
          {
            Name = "700",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_276",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1048",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "DepthwiseConv2dBf16",
        With = {
          config.act = 0 : ui8,
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.kernel_height = 9 : ui8,
          config.kernel_width = 9 : ui8,
          config.stride = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc186)
        %465 = tosa.transpose %arg9, %464 : (tensor<960x1x9x9xbf16>, tensor<4xi32>) -> tensor<9x9x960x1xbf16> loc(#loc186)
        %466 = tosa.transpose %arg8, %463 : (tensor<1x960x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x960xbf16> loc(#loc186)
        %467 = tosa.depthwise_conv2d %466, %465, %arg10 {
          PartOfLayerName = "Conv_276",
          PartOfOutputName = "Conv_276",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 4, 4, 4, 4>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x960xbf16>, tensor<9x9x960x1xbf16>, tensor<960xbf16>) -> tensor<1x12x20x960xbf16> loc(#loc186)
        %468 = tosa.transpose %467, %462 : (tensor<1x12x20x960xbf16>, tensor<4xi32>) -> tensor<1x960x12x20xbf16> loc(#loc186)
        xten_nn.output %468 : tensor<1x960x12x20xbf16> loc(#loc186)
      } -> tensor<1x960x12x20xbf16> loc(#loc186)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc186)
    } -> tensor<1x960x12x20xbf16> loc(#loc186)
    %331 = xten_nn.subgraph (%arg5 = %330: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Add_278",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1048",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_278",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "704",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Add_278",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1048",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_278",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "704",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_278", OutputName = "Add_278"} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc187)
        xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc187)
      } -> tensor<1x960x12x20xbf16> loc(#loc187)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc187)
    } -> tensor<1x960x12x20xbf16> loc(#loc187)
    %332 = xten_nn.subgraph (%arg5 = %331: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Clip_281",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "704",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_281",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "707",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Clip_281",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "704",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_281",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "707",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_281",
          OutputName = "Clip_281",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc188)
        xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc188)
      } -> tensor<1x960x12x20xbf16> loc(#loc188)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc188)
    } -> tensor<1x960x12x20xbf16> loc(#loc188)
    %333 = xten_nn.subgraph (%arg5 = %332: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Div_283",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "707",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Div_283",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "709",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Div_283",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "707",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Div_283",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "709",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_283",
          OutputName = "Div_283",
          shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc189)
        xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc189)
      } -> tensor<1x960x12x20xbf16> loc(#loc189)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc189)
    } -> tensor<1x960x12x20xbf16> loc(#loc189)
    %334 = xten_nn.subgraph (%arg5 = %330: tensor<1x960x12x20xbf16>, %arg6 = %333: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Mul_284_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1048",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "700",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_284",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "710",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x960x12x20xbf16>, %arg8 = %arg6: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Mul_284_Duplicated#0",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1048",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "700",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_284",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "710",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_284",
          OutputName = "Mul_284",
          shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc190)
        xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc190)
      } -> tensor<1x960x12x20xbf16> loc(#loc190)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc190)
    } -> tensor<1x960x12x20xbf16> loc(#loc190)
    %335 = xten_nn.subgraph (%arg5 = %334: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Mul_284_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1048",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "GlobalAveragePool_285_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "711",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex>
        }
      ],
      Specializes = "Transpose4dAdf",
      With = {
        config.aie_arch = "aie2p",
        config.dim_0 = 12 : ui32,
        config.dim_1 = 120 : ui32,
        config.dim_2 = 20 : ui32,
        config.dim_3 = 8 : ui32,
        config.dtype = "bfloat16",
        config.perm = 6 : ui32
      }} {
      %461 = tosa.reshape %arg5 {new_shape = array<i64: 1, 960, 1, 240>} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x1x240xbf16> loc(#loc351)
      xten_nn.output %461 : tensor<1x960x1x240xbf16> loc(#loc351)
    } -> tensor<1x960x1x240xbf16> loc(#loc351)
    %336 = xten_nn.subgraph (%arg5 = %335: tensor<1x960x1x240xbf16>)  attributes {
      LayerName = "GlobalAveragePool_285",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "710",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex>
        }
      ],
      OutputName = "GlobalAveragePool_285_Duplicated#1",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "711",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x240xbf16>)  attributes {
        LayerName = "GlobalAveragePool_285",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "710",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex>
          }
        ],
        OutputName = "GlobalAveragePool_285_Duplicated#1",
        PadValue = 0.000000e+00 : bf16,
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "711",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "ReduceMeanC8Bf16",
        Traits = {
          Reduce = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.full_channel = 960 : ui32,
          config.full_height = 1 : ui32,
          config.full_width = 240 : ui32,
          config.reduce_dim = "W"
        }} {
        %462 = xten_nn.reduce_mean %arg6 {axes = array<i64: 3>, keepdims = 1 : i64} : (tensor<1x960x1x240xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc191)
        xten_nn.output %462 : tensor<1x960x1x1xbf16> loc(#loc191)
      } -> tensor<1x960x1x1xbf16> loc(#loc191)
      xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc191)
    } -> tensor<1x960x1x1xbf16> loc(#loc191)
    %337 = xten_nn.subgraph (%arg5 = %336: tensor<1x960x1x1xbf16>, %arg6 = %54: tensor<240x960x1x1xbf16>, %arg7 = %53: tensor<240xbf16>)  attributes {
      LayerName = "Conv_286",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "711",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        },
        {
          Name = "710",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[240, 960, 1, 1]> : vector<4xindex>
        },
        {
          Name = "backbone.features.14.block.2.fc1.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_287",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "713",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x960x1x1xbf16>, %arg9 = %arg6: tensor<240x960x1x1xbf16>, %arg10 = %arg7: tensor<240xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_286",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "711",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
          },
          {
            Name = "710",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[240, 960, 1, 1]> : vector<4xindex>
          },
          {
            Name = "backbone.features.14.block.2.fc1.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_287",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "713",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true,
          NonNegativeOut = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 1 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 0.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 0.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = tosa.reshape %arg9 {new_shape = array<i64: 240, 1, 1, 960>} : (tensor<240x960x1x1xbf16>) -> tensor<240x1x1x960xbf16> loc(#loc352)
        %463 = tosa.reshape %arg8 {new_shape = array<i64: 1, 1, 1, 960>} : (tensor<1x960x1x1xbf16>) -> tensor<1x1x1x960xbf16> loc(#loc352)
        %464 = tosa.conv2d %463, %462, %arg10 {
          PartOfLayerName = "Conv_286",
          PartOfOutputName = "Conv_286",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x1x1x960xbf16>, tensor<240x1x1x960xbf16>, tensor<240xbf16>) -> tensor<1x1x1x240xbf16> loc(#loc192)
        %465 = tosa.clamp %464 {
          LayerName = "Relu_287",
          OutputName = "Relu_287",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x1x1x240xbf16>) -> tensor<1x1x1x240xbf16> loc(#loc193)
        %466 = tosa.reshape %465 {new_shape = array<i64: 1, 240, 1, 1>} : (tensor<1x1x1x240xbf16>) -> tensor<1x240x1x1xbf16> loc(#loc352)
        xten_nn.output %466 : tensor<1x240x1x1xbf16> loc(#loc193)
      } -> tensor<1x240x1x1xbf16> loc(#loc352)
      xten_nn.output %461 : tensor<1x240x1x1xbf16> loc(#loc352)
    } -> tensor<1x240x1x1xbf16> loc(#loc352)
    %338 = xten_nn.subgraph (%arg5 = %337: tensor<1x240x1x1xbf16>, %arg6 = %52: tensor<960x240x1x1xbf16>, %arg7 = %51: tensor<960xbf16>)  attributes {
      LayerName = "Conv_288",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "713",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex>
        },
        {
          Name = "712",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[960, 240, 1, 1]> : vector<4xindex>
        },
        {
          Name = "backbone.features.14.block.2.fc2.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_288",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "714",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x240x1x1xbf16>, %arg9 = %arg6: tensor<960x240x1x1xbf16>, %arg10 = %arg7: tensor<960xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_288",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "713",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex>
          },
          {
            Name = "712",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[960, 240, 1, 1]> : vector<4xindex>
          },
          {
            Name = "backbone.features.14.block.2.fc2.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_288",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "714",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = tosa.reshape %arg9 {new_shape = array<i64: 960, 1, 1, 240>} : (tensor<960x240x1x1xbf16>) -> tensor<960x1x1x240xbf16> loc(#loc194)
        %463 = tosa.reshape %arg8 {new_shape = array<i64: 1, 1, 1, 240>} : (tensor<1x240x1x1xbf16>) -> tensor<1x1x1x240xbf16> loc(#loc194)
        %464 = tosa.conv2d %463, %462, %arg10 {
          PartOfLayerName = "Conv_288",
          PartOfOutputName = "Conv_288",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x1x1x240xbf16>, tensor<960x1x1x240xbf16>, tensor<960xbf16>) -> tensor<1x1x1x960xbf16> loc(#loc194)
        %465 = tosa.reshape %464 {new_shape = array<i64: 1, 960, 1, 1>} : (tensor<1x1x1x960xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc194)
        xten_nn.output %465 : tensor<1x960x1x1xbf16> loc(#loc194)
      } -> tensor<1x960x1x1xbf16> loc(#loc194)
      xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc194)
    } -> tensor<1x960x1x1xbf16> loc(#loc194)
    %339 = xten_nn.subgraph (%arg5 = %338: tensor<1x960x1x1xbf16>)  attributes {
      LayerName = "Add_290",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "714",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Add_290",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "716",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x1xbf16>)  attributes {
        LayerName = "Add_290",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "714",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Add_290",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "716",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_290", OutputName = "Add_290"} : (tensor<1x960x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc195)
        xten_nn.output %463 : tensor<1x960x1x1xbf16> loc(#loc195)
      } -> tensor<1x960x1x1xbf16> loc(#loc195)
      xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc195)
    } -> tensor<1x960x1x1xbf16> loc(#loc195)
    %340 = xten_nn.subgraph (%arg5 = %339: tensor<1x960x1x1xbf16>)  attributes {
      LayerName = "Clip_293",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "716",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_293",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "719",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x1xbf16>)  attributes {
        LayerName = "Clip_293",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "716",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_293",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "719",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_293",
          OutputName = "Clip_293",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x960x1x1xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc196)
        xten_nn.output %462 : tensor<1x960x1x1xbf16> loc(#loc196)
      } -> tensor<1x960x1x1xbf16> loc(#loc196)
      xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc196)
    } -> tensor<1x960x1x1xbf16> loc(#loc196)
    %341 = xten_nn.subgraph (%arg5 = %340: tensor<1x960x1x1xbf16>)  attributes {
      LayerName = "Div_295",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "719",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Div_295",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "721",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x1xbf16>)  attributes {
        LayerName = "Div_295",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "719",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Div_295",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "721",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_295",
          OutputName = "Div_295",
          shift = 0 : i8} : (tensor<1x960x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc197)
        xten_nn.output %463 : tensor<1x960x1x1xbf16> loc(#loc197)
      } -> tensor<1x960x1x1xbf16> loc(#loc197)
      xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc197)
    } -> tensor<1x960x1x1xbf16> loc(#loc197)
    %342 = xten_nn.subgraph (%arg5 = %341: tensor<1x960x1x1xbf16>)  attributes {
      LayerName = "Mul_296_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "721",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_296_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "722",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      Specializes = "TileAdf",
      With = {
        config.aie_arch = "aie2p",
        config.dtype = "bfloat16",
        config.i_dim_c = 960 : ui32,
        config.i_dim_h = 1 : ui32,
        config.i_dim_n = 1 : ui32,
        config.i_dim_w = 1 : ui32,
        config.rep_dim_c = 1 : ui32,
        config.rep_dim_h = 12 : ui32,
        config.rep_dim_w = 20 : ui32
      }} {
      %461 = tosa.tile %arg5 {multiples = array<i64: 1, 1, 12, 20>} : (tensor<1x960x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc198)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc198)
    } -> tensor<1x960x12x20xbf16> loc(#loc198)
    %343 = xten_nn.subgraph (%arg5 = %342: tensor<1x960x12x20xbf16>, %arg6 = %334: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Mul_296_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "721",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "719",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_296_Duplicated#1",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "722",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x960x12x20xbf16>, %arg8 = %arg6: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Mul_296_Duplicated#1",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "721",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "719",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_296_Duplicated#1",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "722",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_296",
          OutputName = "Mul_296",
          shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc198)
        xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc198)
      } -> tensor<1x960x12x20xbf16> loc(#loc198)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc198)
    } -> tensor<1x960x12x20xbf16> loc(#loc198)
    %344 = xten_nn.subgraph (%arg5 = %343: tensor<1x960x12x20xbf16>, %arg6 = %50: tensor<160x960x1x1xbf16>, %arg7 = %49: tensor<160xbf16>, %arg8 = %324: tensor<1x160x12x20xbf16>)  attributes {
      LayerName = "Conv_297",
      OfmShare = 3 : index,
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "722",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        },
        {
          Name = "721",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[160, 960, 1, 1]> : vector<4xindex>
        },
        {
          Name = "722",
          UnknownDataFormat = true
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "722",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_298",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "725",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x960x12x20xbf16>, %arg10 = %arg6: tensor<160x960x1x1xbf16>, %arg11 = %arg7: tensor<160xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_297",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "722",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          },
          {
            Name = "721",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[160, 960, 1, 1]> : vector<4xindex>
          },
          {
            Name = "722",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_297",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1051",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc199)
        %465 = tosa.reshape %arg10 {new_shape = array<i64: 160, 1, 1, 960>} : (tensor<160x960x1x1xbf16>) -> tensor<160x1x1x960xbf16> loc(#loc199)
        %466 = tosa.transpose %arg9, %464 : (tensor<1x960x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x960xbf16> loc(#loc199)
        %467 = tosa.conv2d %466, %465, %arg11 {
          PartOfLayerName = "Conv_297",
          PartOfOutputName = "Conv_297",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x960xbf16>, tensor<160x1x1x960xbf16>, tensor<160xbf16>) -> tensor<1x12x20x160xbf16> loc(#loc199)
        %468 = tosa.transpose %467, %463 : (tensor<1x12x20x160xbf16>, tensor<4xi32>) -> tensor<1x160x12x20xbf16> loc(#loc199)
        xten_nn.output %468 : tensor<1x160x12x20xbf16> loc(#loc199)
      } -> tensor<1x160x12x20xbf16> loc(#loc199)
      %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x160x12x20xbf16>, %arg10 = %arg8: tensor<1x160x12x20xbf16>)  attributes {
        LayerName = "Add_298",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1051",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "722",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_298",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "725",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.act = 0 : ui8,
          config.act_type = "LINEAR",
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %463 = tosa.add %arg9, %arg10 {LayerName = "Add_298", OutputName = "Add_298"} : (tensor<1x160x12x20xbf16>, tensor<1x160x12x20xbf16>) -> tensor<1x160x12x20xbf16> loc(#loc200)
        xten_nn.output %463 : tensor<1x160x12x20xbf16> loc(#loc200)
      } -> tensor<1x160x12x20xbf16> loc(#loc200)
      xten_nn.output %462 : tensor<1x160x12x20xbf16> loc(#loc200)
    } -> tensor<1x160x12x20xbf16> loc(#loc353)
    %345 = xten_nn.subgraph (%arg5 = %344: tensor<1x160x12x20xbf16>, %arg6 = %48: tensor<960x160x1x1xbf16>, %arg7 = %47: tensor<960xbf16>)  attributes {
      LayerName = "Conv_299",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "725",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex>
        },
        {
          Name = "1051",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[960, 160, 1, 1]> : vector<4xindex>
        },
        {
          Name = "725",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_299",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1054",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x160x12x20xbf16>, %arg9 = %arg6: tensor<960x160x1x1xbf16>, %arg10 = %arg7: tensor<960xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_299",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "725",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex>
          },
          {
            Name = "1051",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[960, 160, 1, 1]> : vector<4xindex>
          },
          {
            Name = "725",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_299",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1054",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc201)
        %464 = tosa.reshape %arg9 {new_shape = array<i64: 960, 1, 1, 160>} : (tensor<960x160x1x1xbf16>) -> tensor<960x1x1x160xbf16> loc(#loc201)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x160x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x160xbf16> loc(#loc201)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_299",
          PartOfOutputName = "Conv_299",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x160xbf16>, tensor<960x1x1x160xbf16>, tensor<960xbf16>) -> tensor<1x12x20x960xbf16> loc(#loc201)
        %467 = tosa.transpose %466, %462 : (tensor<1x12x20x960xbf16>, tensor<4xi32>) -> tensor<1x960x12x20xbf16> loc(#loc201)
        xten_nn.output %467 : tensor<1x960x12x20xbf16> loc(#loc201)
      } -> tensor<1x960x12x20xbf16> loc(#loc201)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc201)
    } -> tensor<1x960x12x20xbf16> loc(#loc201)
    %346 = xten_nn.subgraph (%arg5 = %345: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Add_301",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1054",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_301",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "729",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Add_301",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1054",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_301",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "729",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_301", OutputName = "Add_301"} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc202)
        xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc202)
      } -> tensor<1x960x12x20xbf16> loc(#loc202)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc202)
    } -> tensor<1x960x12x20xbf16> loc(#loc202)
    %347 = xten_nn.subgraph (%arg5 = %346: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Clip_304",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "729",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_304",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "732",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Clip_304",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "729",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_304",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "732",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_304",
          OutputName = "Clip_304",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc203)
        xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc203)
      } -> tensor<1x960x12x20xbf16> loc(#loc203)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc203)
    } -> tensor<1x960x12x20xbf16> loc(#loc203)
    %348 = xten_nn.subgraph (%arg5 = %347: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Div_306",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "732",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Div_306",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "734",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Div_306",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "732",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Div_306",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "734",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_306",
          OutputName = "Div_306",
          shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc204)
        xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc204)
      } -> tensor<1x960x12x20xbf16> loc(#loc204)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc204)
    } -> tensor<1x960x12x20xbf16> loc(#loc204)
    %349 = xten_nn.subgraph (%arg5 = %345: tensor<1x960x12x20xbf16>, %arg6 = %348: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Mul_307",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1054",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "725",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_307",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "735",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x960x12x20xbf16>, %arg8 = %arg6: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Mul_307",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1054",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "725",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_307",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "735",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_307",
          OutputName = "Mul_307",
          shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc205)
        xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc205)
      } -> tensor<1x960x12x20xbf16> loc(#loc205)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc205)
    } -> tensor<1x960x12x20xbf16> loc(#loc205)
    %350 = xten_nn.subgraph (%arg5 = %349: tensor<1x960x12x20xbf16>, %arg6 = %46: tensor<960x1x9x9xbf16>, %arg7 = %45: tensor<960xbf16>)  attributes {
      LayerName = "Conv_308",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "735",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "CMHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1054",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[960, 1, 9, 9]> : vector<4xindex>
        },
        {
          Name = "735",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_308",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1057",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x960x12x20xbf16>, %arg9 = %arg6: tensor<960x1x9x9xbf16>, %arg10 = %arg7: tensor<960xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[4, 4], [4, 4]],
        LayerName = "Conv_308",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "735",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "CMHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1054",
            Port = "data_io.wts",
            SubPort = "wts_data",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[960, 1, 9, 9]> : vector<4xindex>
          },
          {
            Name = "735",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_308",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1057",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "DepthwiseConv2dBf16",
        With = {
          config.act = 0 : ui8,
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.kernel_height = 9 : ui8,
          config.kernel_width = 9 : ui8,
          config.stride = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc206)
        %465 = tosa.transpose %arg9, %464 : (tensor<960x1x9x9xbf16>, tensor<4xi32>) -> tensor<9x9x960x1xbf16> loc(#loc206)
        %466 = tosa.transpose %arg8, %463 : (tensor<1x960x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x960xbf16> loc(#loc206)
        %467 = tosa.depthwise_conv2d %466, %465, %arg10 {
          PartOfLayerName = "Conv_308",
          PartOfOutputName = "Conv_308",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 4, 4, 4, 4>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x960xbf16>, tensor<9x9x960x1xbf16>, tensor<960xbf16>) -> tensor<1x12x20x960xbf16> loc(#loc206)
        %468 = tosa.transpose %467, %462 : (tensor<1x12x20x960xbf16>, tensor<4xi32>) -> tensor<1x960x12x20xbf16> loc(#loc206)
        xten_nn.output %468 : tensor<1x960x12x20xbf16> loc(#loc206)
      } -> tensor<1x960x12x20xbf16> loc(#loc206)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc206)
    } -> tensor<1x960x12x20xbf16> loc(#loc206)
    %351 = xten_nn.subgraph (%arg5 = %350: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Add_310",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1057",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_310",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "739",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Add_310",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1057",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_310",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "739",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_310", OutputName = "Add_310"} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc207)
        xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc207)
      } -> tensor<1x960x12x20xbf16> loc(#loc207)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc207)
    } -> tensor<1x960x12x20xbf16> loc(#loc207)
    %352 = xten_nn.subgraph (%arg5 = %351: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Clip_313",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "739",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_313",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "742",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Clip_313",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "739",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_313",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "742",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_313",
          OutputName = "Clip_313",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc208)
        xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc208)
      } -> tensor<1x960x12x20xbf16> loc(#loc208)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc208)
    } -> tensor<1x960x12x20xbf16> loc(#loc208)
    %353 = xten_nn.subgraph (%arg5 = %352: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Div_315",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "742",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Div_315",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "744",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Div_315",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "742",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Div_315",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "744",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_315",
          OutputName = "Div_315",
          shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc209)
        xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc209)
      } -> tensor<1x960x12x20xbf16> loc(#loc209)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc209)
    } -> tensor<1x960x12x20xbf16> loc(#loc209)
    %354 = xten_nn.subgraph (%arg5 = %350: tensor<1x960x12x20xbf16>, %arg6 = %353: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Mul_316_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1057",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "735",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_316",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "745",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x960x12x20xbf16>, %arg8 = %arg6: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Mul_316_Duplicated#0",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1057",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "735",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_316",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "745",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_316",
          OutputName = "Mul_316",
          shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc210)
        xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc210)
      } -> tensor<1x960x12x20xbf16> loc(#loc210)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc210)
    } -> tensor<1x960x12x20xbf16> loc(#loc210)
    %355 = xten_nn.subgraph (%arg5 = %354: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Mul_316_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1057",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "GlobalAveragePool_317_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "746",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex>
        }
      ],
      Specializes = "Transpose4dAdf",
      With = {
        config.aie_arch = "aie2p",
        config.dim_0 = 12 : ui32,
        config.dim_1 = 120 : ui32,
        config.dim_2 = 20 : ui32,
        config.dim_3 = 8 : ui32,
        config.dtype = "bfloat16",
        config.perm = 6 : ui32
      }} {
      %461 = tosa.reshape %arg5 {new_shape = array<i64: 1, 960, 1, 240>} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x1x240xbf16> loc(#loc354)
      xten_nn.output %461 : tensor<1x960x1x240xbf16> loc(#loc354)
    } -> tensor<1x960x1x240xbf16> loc(#loc354)
    %356 = xten_nn.subgraph (%arg5 = %355: tensor<1x960x1x240xbf16>)  attributes {
      LayerName = "GlobalAveragePool_317",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "745",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex>
        }
      ],
      OutputName = "GlobalAveragePool_317_Duplicated#1",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "746",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x240xbf16>)  attributes {
        LayerName = "GlobalAveragePool_317",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "745",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex>
          }
        ],
        OutputName = "GlobalAveragePool_317_Duplicated#1",
        PadValue = 0.000000e+00 : bf16,
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "746",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "ReduceMeanC8Bf16",
        Traits = {
          Reduce = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.full_channel = 960 : ui32,
          config.full_height = 1 : ui32,
          config.full_width = 240 : ui32,
          config.reduce_dim = "W"
        }} {
        %462 = xten_nn.reduce_mean %arg6 {axes = array<i64: 3>, keepdims = 1 : i64} : (tensor<1x960x1x240xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc211)
        xten_nn.output %462 : tensor<1x960x1x1xbf16> loc(#loc211)
      } -> tensor<1x960x1x1xbf16> loc(#loc211)
      xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc211)
    } -> tensor<1x960x1x1xbf16> loc(#loc211)
    %357 = xten_nn.subgraph (%arg5 = %356: tensor<1x960x1x1xbf16>, %arg6 = %44: tensor<240x960x1x1xbf16>, %arg7 = %43: tensor<240xbf16>)  attributes {
      LayerName = "Conv_318",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "746",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        },
        {
          Name = "745",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[240, 960, 1, 1]> : vector<4xindex>
        },
        {
          Name = "backbone.features.15.block.2.fc1.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_319",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "748",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x960x1x1xbf16>, %arg9 = %arg6: tensor<240x960x1x1xbf16>, %arg10 = %arg7: tensor<240xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_318",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "746",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
          },
          {
            Name = "745",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[240, 960, 1, 1]> : vector<4xindex>
          },
          {
            Name = "backbone.features.15.block.2.fc1.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_319",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "748",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true,
          NonNegativeOut = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 1 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 0.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 0.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = tosa.reshape %arg9 {new_shape = array<i64: 240, 1, 1, 960>} : (tensor<240x960x1x1xbf16>) -> tensor<240x1x1x960xbf16> loc(#loc355)
        %463 = tosa.reshape %arg8 {new_shape = array<i64: 1, 1, 1, 960>} : (tensor<1x960x1x1xbf16>) -> tensor<1x1x1x960xbf16> loc(#loc355)
        %464 = tosa.conv2d %463, %462, %arg10 {
          PartOfLayerName = "Conv_318",
          PartOfOutputName = "Conv_318",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x1x1x960xbf16>, tensor<240x1x1x960xbf16>, tensor<240xbf16>) -> tensor<1x1x1x240xbf16> loc(#loc212)
        %465 = tosa.clamp %464 {
          LayerName = "Relu_319",
          OutputName = "Relu_319",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x1x1x240xbf16>) -> tensor<1x1x1x240xbf16> loc(#loc213)
        %466 = tosa.reshape %465 {new_shape = array<i64: 1, 240, 1, 1>} : (tensor<1x1x1x240xbf16>) -> tensor<1x240x1x1xbf16> loc(#loc355)
        xten_nn.output %466 : tensor<1x240x1x1xbf16> loc(#loc213)
      } -> tensor<1x240x1x1xbf16> loc(#loc355)
      xten_nn.output %461 : tensor<1x240x1x1xbf16> loc(#loc355)
    } -> tensor<1x240x1x1xbf16> loc(#loc355)
    %358 = xten_nn.subgraph (%arg5 = %357: tensor<1x240x1x1xbf16>, %arg6 = %42: tensor<960x240x1x1xbf16>, %arg7 = %41: tensor<960xbf16>)  attributes {
      LayerName = "Conv_320",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "748",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex>
        },
        {
          Name = "747",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[960, 240, 1, 1]> : vector<4xindex>
        },
        {
          Name = "backbone.features.15.block.2.fc2.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_320",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "749",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x240x1x1xbf16>, %arg9 = %arg6: tensor<960x240x1x1xbf16>, %arg10 = %arg7: tensor<960xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_320",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "748",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex>
          },
          {
            Name = "747",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[960, 240, 1, 1]> : vector<4xindex>
          },
          {
            Name = "backbone.features.15.block.2.fc2.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_320",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "749",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = tosa.reshape %arg9 {new_shape = array<i64: 960, 1, 1, 240>} : (tensor<960x240x1x1xbf16>) -> tensor<960x1x1x240xbf16> loc(#loc214)
        %463 = tosa.reshape %arg8 {new_shape = array<i64: 1, 1, 1, 240>} : (tensor<1x240x1x1xbf16>) -> tensor<1x1x1x240xbf16> loc(#loc214)
        %464 = tosa.conv2d %463, %462, %arg10 {
          PartOfLayerName = "Conv_320",
          PartOfOutputName = "Conv_320",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x1x1x240xbf16>, tensor<960x1x1x240xbf16>, tensor<960xbf16>) -> tensor<1x1x1x960xbf16> loc(#loc214)
        %465 = tosa.reshape %464 {new_shape = array<i64: 1, 960, 1, 1>} : (tensor<1x1x1x960xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc214)
        xten_nn.output %465 : tensor<1x960x1x1xbf16> loc(#loc214)
      } -> tensor<1x960x1x1xbf16> loc(#loc214)
      xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc214)
    } -> tensor<1x960x1x1xbf16> loc(#loc214)
    %359 = xten_nn.subgraph (%arg5 = %358: tensor<1x960x1x1xbf16>)  attributes {
      LayerName = "Add_322",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "749",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Add_322",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "751",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x1xbf16>)  attributes {
        LayerName = "Add_322",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "749",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Add_322",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "751",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_322", OutputName = "Add_322"} : (tensor<1x960x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc215)
        xten_nn.output %463 : tensor<1x960x1x1xbf16> loc(#loc215)
      } -> tensor<1x960x1x1xbf16> loc(#loc215)
      xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc215)
    } -> tensor<1x960x1x1xbf16> loc(#loc215)
    %360 = xten_nn.subgraph (%arg5 = %359: tensor<1x960x1x1xbf16>)  attributes {
      LayerName = "Clip_325",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "751",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_325",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "754",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x1xbf16>)  attributes {
        LayerName = "Clip_325",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "751",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_325",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "754",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_325",
          OutputName = "Clip_325",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x960x1x1xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc216)
        xten_nn.output %462 : tensor<1x960x1x1xbf16> loc(#loc216)
      } -> tensor<1x960x1x1xbf16> loc(#loc216)
      xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc216)
    } -> tensor<1x960x1x1xbf16> loc(#loc216)
    %361 = xten_nn.subgraph (%arg5 = %360: tensor<1x960x1x1xbf16>)  attributes {
      LayerName = "Div_327",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "754",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Div_327",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "756",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x1xbf16>)  attributes {
        LayerName = "Div_327",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "754",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Div_327",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "756",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_327",
          OutputName = "Div_327",
          shift = 0 : i8} : (tensor<1x960x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc217)
        xten_nn.output %463 : tensor<1x960x1x1xbf16> loc(#loc217)
      } -> tensor<1x960x1x1xbf16> loc(#loc217)
      xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc217)
    } -> tensor<1x960x1x1xbf16> loc(#loc217)
    %362 = xten_nn.subgraph (%arg5 = %361: tensor<1x960x1x1xbf16>)  attributes {
      LayerName = "Mul_328_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "756",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_328_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "757",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      Specializes = "TileAdf",
      With = {
        config.aie_arch = "aie2p",
        config.dtype = "bfloat16",
        config.i_dim_c = 960 : ui32,
        config.i_dim_h = 1 : ui32,
        config.i_dim_n = 1 : ui32,
        config.i_dim_w = 1 : ui32,
        config.rep_dim_c = 1 : ui32,
        config.rep_dim_h = 12 : ui32,
        config.rep_dim_w = 20 : ui32
      }} {
      %461 = tosa.tile %arg5 {multiples = array<i64: 1, 1, 12, 20>} : (tensor<1x960x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc218)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc218)
    } -> tensor<1x960x12x20xbf16> loc(#loc218)
    %363 = xten_nn.subgraph (%arg5 = %362: tensor<1x960x12x20xbf16>, %arg6 = %354: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Mul_328_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "756",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "754",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_328_Duplicated#1",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "757",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x960x12x20xbf16>, %arg8 = %arg6: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Mul_328_Duplicated#1",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "756",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "754",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_328_Duplicated#1",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "757",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_328",
          OutputName = "Mul_328",
          shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc218)
        xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc218)
      } -> tensor<1x960x12x20xbf16> loc(#loc218)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc218)
    } -> tensor<1x960x12x20xbf16> loc(#loc218)
    %364 = xten_nn.subgraph (%arg5 = %363: tensor<1x960x12x20xbf16>, %arg6 = %40: tensor<160x960x1x1xbf16>, %arg7 = %39: tensor<160xbf16>, %arg8 = %344: tensor<1x160x12x20xbf16>)  attributes {
      LayerName = "Conv_329",
      OfmShare = 3 : index,
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "757",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        },
        {
          Name = "756",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[160, 960, 1, 1]> : vector<4xindex>
        },
        {
          Name = "757",
          UnknownDataFormat = true
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "757",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_330",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "760",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x960x12x20xbf16>, %arg10 = %arg6: tensor<160x960x1x1xbf16>, %arg11 = %arg7: tensor<160xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_329",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "757",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          },
          {
            Name = "756",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[160, 960, 1, 1]> : vector<4xindex>
          },
          {
            Name = "757",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_329",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1060",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc219)
        %465 = tosa.reshape %arg10 {new_shape = array<i64: 160, 1, 1, 960>} : (tensor<160x960x1x1xbf16>) -> tensor<160x1x1x960xbf16> loc(#loc219)
        %466 = tosa.transpose %arg9, %464 : (tensor<1x960x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x960xbf16> loc(#loc219)
        %467 = tosa.conv2d %466, %465, %arg11 {
          PartOfLayerName = "Conv_329",
          PartOfOutputName = "Conv_329",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x960xbf16>, tensor<160x1x1x960xbf16>, tensor<160xbf16>) -> tensor<1x12x20x160xbf16> loc(#loc219)
        %468 = tosa.transpose %467, %463 : (tensor<1x12x20x160xbf16>, tensor<4xi32>) -> tensor<1x160x12x20xbf16> loc(#loc219)
        xten_nn.output %468 : tensor<1x160x12x20xbf16> loc(#loc219)
      } -> tensor<1x160x12x20xbf16> loc(#loc219)
      %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x160x12x20xbf16>, %arg10 = %arg8: tensor<1x160x12x20xbf16>)  attributes {
        LayerName = "Add_330",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1060",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "757",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_330",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "760",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.act = 0 : ui8,
          config.act_type = "LINEAR",
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %463 = tosa.add %arg9, %arg10 {LayerName = "Add_330", OutputName = "Add_330"} : (tensor<1x160x12x20xbf16>, tensor<1x160x12x20xbf16>) -> tensor<1x160x12x20xbf16> loc(#loc220)
        xten_nn.output %463 : tensor<1x160x12x20xbf16> loc(#loc220)
      } -> tensor<1x160x12x20xbf16> loc(#loc220)
      xten_nn.output %462 : tensor<1x160x12x20xbf16> loc(#loc220)
    } -> tensor<1x160x12x20xbf16> loc(#loc356)
    %365 = xten_nn.subgraph (%arg5 = %364: tensor<1x160x12x20xbf16>, %arg6 = %38: tensor<960x160x1x1xbf16>, %arg7 = %37: tensor<960xbf16>)  attributes {
      LayerName = "Conv_331",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "760",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex>
        },
        {
          Name = "1060",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[960, 160, 1, 1]> : vector<4xindex>
        },
        {
          Name = "760",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_331",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1063",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x160x12x20xbf16>, %arg9 = %arg6: tensor<960x160x1x1xbf16>, %arg10 = %arg7: tensor<960xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_331",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "760",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex>
          },
          {
            Name = "1060",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[960, 160, 1, 1]> : vector<4xindex>
          },
          {
            Name = "760",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_331",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1063",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc221)
        %464 = tosa.reshape %arg9 {new_shape = array<i64: 960, 1, 1, 160>} : (tensor<960x160x1x1xbf16>) -> tensor<960x1x1x160xbf16> loc(#loc221)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x160x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x160xbf16> loc(#loc221)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_331",
          PartOfOutputName = "Conv_331",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x160xbf16>, tensor<960x1x1x160xbf16>, tensor<960xbf16>) -> tensor<1x12x20x960xbf16> loc(#loc221)
        %467 = tosa.transpose %466, %462 : (tensor<1x12x20x960xbf16>, tensor<4xi32>) -> tensor<1x960x12x20xbf16> loc(#loc221)
        xten_nn.output %467 : tensor<1x960x12x20xbf16> loc(#loc221)
      } -> tensor<1x960x12x20xbf16> loc(#loc221)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc221)
    } -> tensor<1x960x12x20xbf16> loc(#loc221)
    %366 = xten_nn.subgraph (%arg5 = %365: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Add_333",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1063",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_333",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "764",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Add_333",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1063",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_333",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "764",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 3.000000e+00 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.add %arg6, %462 {LayerName = "Add_333", OutputName = "Add_333"} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc222)
        xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc222)
      } -> tensor<1x960x12x20xbf16> loc(#loc222)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc222)
    } -> tensor<1x960x12x20xbf16> loc(#loc222)
    %367 = xten_nn.subgraph (%arg5 = %366: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Clip_336",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "764",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_336",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "767",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Clip_336",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "764",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_336",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "767",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 6.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_336",
          OutputName = "Clip_336",
          max_fp = 6.000000e+00 : f32,
          max_int = 6 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc223)
        xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc223)
      } -> tensor<1x960x12x20xbf16> loc(#loc223)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc223)
    } -> tensor<1x960x12x20xbf16> loc(#loc223)
    %368 = xten_nn.subgraph (%arg5 = %367: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Div_338",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "767",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Div_338",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "769",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Div_338",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "767",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Div_338",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "769",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulAttributeBroadcastingBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.num_kernel_iters = 0 : ui16,
          config.scalar = 1.660160e-01 : bf16,
          config.scalar_position = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc)
        %463 = tosa.mul %arg6, %462 {
          LayerName = "Div_338",
          OutputName = "Div_338",
          shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc224)
        xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc224)
      } -> tensor<1x960x12x20xbf16> loc(#loc224)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc224)
    } -> tensor<1x960x12x20xbf16> loc(#loc224)
    %369 = xten_nn.subgraph (%arg5 = %365: tensor<1x960x12x20xbf16>, %arg6 = %368: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Mul_339_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1063",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "760",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_339",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "770",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x960x12x20xbf16>, %arg8 = %arg6: tensor<1x960x12x20xbf16>)  attributes {
        LayerName = "Mul_339_Duplicated#0",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1063",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "760",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_339",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "770",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_339",
          OutputName = "Mul_339",
          shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc225)
        xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc225)
      } -> tensor<1x960x12x20xbf16> loc(#loc225)
      xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc225)
    } -> tensor<1x960x12x20xbf16> loc(#loc225)
    %370 = xten_nn.subgraph (%arg5 = %369: tensor<1x960x12x20xbf16>)  attributes {
      LayerName = "Mul_339_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1063",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "GlobalAveragePool_342_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "774",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex>
        }
      ],
      Specializes = "Transpose4dAdf",
      With = {
        config.aie_arch = "aie2p",
        config.dim_0 = 12 : ui32,
        config.dim_1 = 120 : ui32,
        config.dim_2 = 20 : ui32,
        config.dim_3 = 8 : ui32,
        config.dtype = "bfloat16",
        config.perm = 6 : ui32
      }} {
      %461 = tosa.reshape %arg5 {new_shape = array<i64: 1, 960, 1, 240>} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x1x240xbf16> loc(#loc357)
      xten_nn.output %461 : tensor<1x960x1x240xbf16> loc(#loc357)
    } -> tensor<1x960x1x240xbf16> loc(#loc357)
    %371 = xten_nn.subgraph (%arg5 = %370: tensor<1x960x1x240xbf16>)  attributes {
      LayerName = "GlobalAveragePool_342",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "770",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex>
        }
      ],
      OutputName = "GlobalAveragePool_342_Duplicated#1",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "774",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x240xbf16>)  attributes {
        LayerName = "GlobalAveragePool_342",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "770",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex>
          }
        ],
        OutputName = "GlobalAveragePool_342_Duplicated#1",
        PadValue = 0.000000e+00 : bf16,
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "774",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "ReduceMeanC8Bf16",
        Traits = {
          Reduce = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.full_channel = 960 : ui32,
          config.full_height = 1 : ui32,
          config.full_width = 240 : ui32,
          config.reduce_dim = "W"
        }} {
        %462 = xten_nn.reduce_mean %arg6 {axes = array<i64: 3>, keepdims = 1 : i64} : (tensor<1x960x1x240xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc226)
        xten_nn.output %462 : tensor<1x960x1x1xbf16> loc(#loc226)
      } -> tensor<1x960x1x1xbf16> loc(#loc226)
      xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc226)
    } -> tensor<1x960x1x1xbf16> loc(#loc226)
    %372 = xten_nn.subgraph (%arg5 = %371: tensor<1x960x1x1xbf16>, %arg6 = %36: tensor<128x960x1x1xbf16>, %arg7 = %35: tensor<128xbf16>)  attributes {
      LayerName = "Conv_343",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "774",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
        },
        {
          Name = "770",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[128, 960, 1, 1]> : vector<4xindex>
        },
        {
          Name = "aspp.aspp2.1.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_343",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "775",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x960x1x1xbf16>, %arg9 = %arg6: tensor<128x960x1x1xbf16>, %arg10 = %arg7: tensor<128xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_343",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "774",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex>
          },
          {
            Name = "770",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[128, 960, 1, 1]> : vector<4xindex>
          },
          {
            Name = "aspp.aspp2.1.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_343",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "775",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 128, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = tosa.reshape %arg9 {new_shape = array<i64: 128, 1, 1, 960>} : (tensor<128x960x1x1xbf16>) -> tensor<128x1x1x960xbf16> loc(#loc227)
        %463 = tosa.reshape %arg8 {new_shape = array<i64: 1, 1, 1, 960>} : (tensor<1x960x1x1xbf16>) -> tensor<1x1x1x960xbf16> loc(#loc227)
        %464 = tosa.conv2d %463, %462, %arg10 {
          PartOfLayerName = "Conv_343",
          PartOfOutputName = "Conv_343",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x1x1x960xbf16>, tensor<128x1x1x960xbf16>, tensor<128xbf16>) -> tensor<1x1x1x128xbf16> loc(#loc227)
        %465 = tosa.reshape %464 {new_shape = array<i64: 1, 128, 1, 1>} : (tensor<1x1x1x128xbf16>) -> tensor<1x128x1x1xbf16> loc(#loc227)
        xten_nn.output %465 : tensor<1x128x1x1xbf16> loc(#loc227)
      } -> tensor<1x128x1x1xbf16> loc(#loc227)
      xten_nn.output %461 : tensor<1x128x1x1xbf16> loc(#loc227)
    } -> tensor<1x128x1x1xbf16> loc(#loc227)
    %373 = xten_nn.subgraph (%arg5 = %372: tensor<1x128x1x1xbf16>)  attributes {
      LayerName = "Sigmoid_344",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "775",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Sigmoid_344",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "776",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 1, 1]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x128x1x1xbf16>)  attributes {
        LayerName = "Sigmoid_344",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "775",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 128, 1, 1]> : vector<4xindex>
          }
        ],
        OutputName = "Sigmoid_344",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "776",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 128, 1, 1]> : vector<4xindex>
          }
        ],
        Specializes = "SigmoidTemplatedBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.ENABLE_FP16_AS_BF16 = 0 : ui8,
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.sigmoid %arg6 {LayerName = "Sigmoid_344", OutputName = "Sigmoid_344"} : (tensor<1x128x1x1xbf16>) -> tensor<1x128x1x1xbf16> loc(#loc228)
        xten_nn.output %462 : tensor<1x128x1x1xbf16> loc(#loc228)
      } -> tensor<1x128x1x1xbf16> loc(#loc228)
      xten_nn.output %461 : tensor<1x128x1x1xbf16> loc(#loc228)
    } -> tensor<1x128x1x1xbf16> loc(#loc228)
    %374 = xten_nn.subgraph (%arg5 = %373: tensor<1x128x1x1xbf16>)  attributes {
      LayerName = "Mul_345_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "773",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 1, 1]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_345_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "777",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
        }
      ],
      Specializes = "TileAdf",
      With = {
        config.aie_arch = "aie2p",
        config.dtype = "bfloat16",
        config.i_dim_c = 128 : ui32,
        config.i_dim_h = 1 : ui32,
        config.i_dim_n = 1 : ui32,
        config.i_dim_w = 1 : ui32,
        config.rep_dim_c = 1 : ui32,
        config.rep_dim_h = 12 : ui32,
        config.rep_dim_w = 20 : ui32
      }} {
      %461 = tosa.tile %arg5 {multiples = array<i64: 1, 1, 12, 20>} : (tensor<1x128x1x1xbf16>) -> tensor<1x128x12x20xbf16> loc(#loc229)
      xten_nn.output %461 : tensor<1x128x12x20xbf16> loc(#loc229)
    } -> tensor<1x128x12x20xbf16> loc(#loc229)
    %375 = xten_nn.subgraph (%arg5 = %369: tensor<1x960x12x20xbf16>, %arg6 = %34: tensor<128x960x1x1xbf16>, %arg7 = %33: tensor<128xbf16>, %arg8 = %374: tensor<1x128x12x20xbf16>)  attributes {
      LayerName = "Conv_340",
      OfmShare = 3 : index,
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "770",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
        },
        {
          Name = "1063",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[128, 960, 1, 1]> : vector<4xindex>
        },
        {
          Name = "770",
          UnknownDataFormat = true
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "1066",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_345_Duplicated#1",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "777",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x960x12x20xbf16>, %arg10 = %arg6: tensor<128x960x1x1xbf16>, %arg11 = %arg7: tensor<128xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_340",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "770",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex>
          },
          {
            Name = "1063",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[128, 960, 1, 1]> : vector<4xindex>
          },
          {
            Name = "770",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_341",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "773",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true,
          NonNegativeOut = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 1 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 0.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 0.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc359)
        %465 = tosa.reshape %arg10 {new_shape = array<i64: 128, 1, 1, 960>} : (tensor<128x960x1x1xbf16>) -> tensor<128x1x1x960xbf16> loc(#loc359)
        %466 = tosa.transpose %arg9, %464 : (tensor<1x960x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x960xbf16> loc(#loc359)
        %467 = tosa.conv2d %466, %465, %arg11 {
          PartOfLayerName = "Conv_340",
          PartOfOutputName = "Conv_340",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x960xbf16>, tensor<128x1x1x960xbf16>, tensor<128xbf16>) -> tensor<1x12x20x128xbf16> loc(#loc230)
        %468 = tosa.clamp %467 {
          LayerName = "Relu_341",
          OutputName = "Relu_341",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x12x20x128xbf16>) -> tensor<1x12x20x128xbf16> loc(#loc231)
        %469 = tosa.transpose %468, %463 : (tensor<1x12x20x128xbf16>, tensor<4xi32>) -> tensor<1x128x12x20xbf16> loc(#loc359)
        xten_nn.output %469 : tensor<1x128x12x20xbf16> loc(#loc231)
      } -> tensor<1x128x12x20xbf16> loc(#loc359)
      %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x128x12x20xbf16>, %arg10 = %arg8: tensor<1x128x12x20xbf16>)  attributes {
        LayerName = "Mul_345_Duplicated#1",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "773",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "1066",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_345_Duplicated#1",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "777",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %463 = tosa.mul %arg9, %arg10 {
          LayerName = "Mul_345",
          OutputName = "Mul_345",
          shift = 0 : i8} : (tensor<1x128x12x20xbf16>, tensor<1x128x12x20xbf16>) -> tensor<1x128x12x20xbf16> loc(#loc229)
        xten_nn.output %463 : tensor<1x128x12x20xbf16> loc(#loc229)
      } -> tensor<1x128x12x20xbf16> loc(#loc229)
      xten_nn.output %462 : tensor<1x128x12x20xbf16> loc(#loc229)
    } -> tensor<1x128x12x20xbf16> loc(#loc358)
    %376 = xten_nn.subgraph (%arg5 = %375: tensor<1x128x12x20xbf16>)  attributes {
      LayerName = "Split_349_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "777",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Split_349_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "781",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        }
      ],
      Specializes = "SliceHCWC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.axis_letter = "C",
        config.dim_c = 128 : ui32,
        config.dim_h = 12 : ui32,
        config.dim_w = 20 : ui32,
        config.dtype = "bfloat16",
        config.end = 64 : ui32,
        config.start = 0 : ui32,
        config.step = 1 : ui32
      }} {
      %461 = tosa.slice %arg5 {
        PartOfLayerName = "Split_349",
        PartOfOutputName = "Split_349",
        size = array<i64: 1, 64, 12, 20>,
        start = array<i64: 0, 0, 0, 0>} : (tensor<1x128x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc232)
      xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc232)
    } -> tensor<1x64x12x20xbf16> loc(#loc232)
    %377 = xten_nn.subgraph (%arg5 = %375: tensor<1x128x12x20xbf16>)  attributes {
      LayerName = "Split_349_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "777",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Split_349_Duplicated#1",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "781",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        }
      ],
      Specializes = "SliceHCWC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.axis_letter = "C",
        config.dim_c = 128 : ui32,
        config.dim_h = 12 : ui32,
        config.dim_w = 20 : ui32,
        config.dtype = "bfloat16",
        config.end = 128 : ui32,
        config.start = 64 : ui32,
        config.step = 1 : ui32
      }} {
      %461 = tosa.slice %arg5 {
        PartOfLayerName = "Split_349",
        PartOfOutputName = "Split_349",
        size = array<i64: 1, 64, 12, 20>,
        start = array<i64: 0, 64, 0, 0>} : (tensor<1x128x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc232)
      xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc232)
    } -> tensor<1x64x12x20xbf16> loc(#loc232)
    %378 = xten_nn.subgraph (%arg5 = %377: tensor<1x64x12x20xbf16>, %arg6 = %arg4: tensor<1x64x12x20xbf16>)  attributes {
      Axis = 1 : i32,
      LayerName = "Concat_350",
      Op = "Concat",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Port = "data_io.ifm1",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Port = "data_io.ifm2",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Concat_350",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "PseudoOp",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "783",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
        }
      ],
      current_data_format = "NCHW",
      data_format = "HCWN"} {
      %461 = tosa.concat %arg5, %arg6 {
        LayerName = "Concat_350",
        OutputName = "Concat_350",
        axis = 1 : i32} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x128x12x20xbf16> loc(#loc233)
      xten_nn.output %461 : tensor<1x128x12x20xbf16> loc(#loc233)
    } -> tensor<1x128x12x20xbf16> loc(#loc233)
    %379 = xten_nn.subgraph (%arg5 = %378: tensor<1x128x12x20xbf16>, %arg6 = %32: tensor<128x128x3x3xbf16>, %arg7 = %31: tensor<128xbf16>)  attributes {
      LayerName = "Conv_351",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "783",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
        },
        {
          Name = "397",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[128, 128, 3, 3]> : vector<4xindex>
        },
        {
          Name = "decoder.decode4.gru.ih.0.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_351",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "784",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x128x12x20xbf16>, %arg9 = %arg6: tensor<128x128x3x3xbf16>, %arg10 = %arg7: tensor<128xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 1], [1, 1]],
        LayerName = "Conv_351",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "783",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
          },
          {
            Name = "397",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[128, 128, 3, 3]> : vector<4xindex>
          },
          {
            Name = "decoder.decode4.gru.ih.0.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_351",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "784",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 3 : ui8,
          config.ksize.width = 3 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = tosa.transpose %arg9, %463 : (tensor<128x128x3x3xbf16>, tensor<4xi32>) -> tensor<128x3x3x128xbf16> loc(#loc234)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x128x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x128xbf16> loc(#loc234)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_351",
          PartOfOutputName = "Conv_351",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 1, 1, 1>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x128xbf16>, tensor<128x3x3x128xbf16>, tensor<128xbf16>) -> tensor<1x12x20x128xbf16> loc(#loc234)
        %467 = tosa.transpose %466, %462 : (tensor<1x12x20x128xbf16>, tensor<4xi32>) -> tensor<1x128x12x20xbf16> loc(#loc234)
        xten_nn.output %467 : tensor<1x128x12x20xbf16> loc(#loc234)
      } -> tensor<1x128x12x20xbf16> loc(#loc234)
      xten_nn.output %461 : tensor<1x128x12x20xbf16> loc(#loc234)
    } -> tensor<1x128x12x20xbf16> loc(#loc234)
    %380 = xten_nn.subgraph (%arg5 = %379: tensor<1x128x12x20xbf16>)  attributes {
      LayerName = "Sigmoid_352",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "784",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Sigmoid_352",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "785",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x128x12x20xbf16>)  attributes {
        LayerName = "Sigmoid_352",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "784",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Sigmoid_352",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "785",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "SigmoidTemplatedBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.ENABLE_FP16_AS_BF16 = 0 : ui8,
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.sigmoid %arg6 {LayerName = "Sigmoid_352", OutputName = "Sigmoid_352"} : (tensor<1x128x12x20xbf16>) -> tensor<1x128x12x20xbf16> loc(#loc235)
        xten_nn.output %462 : tensor<1x128x12x20xbf16> loc(#loc235)
      } -> tensor<1x128x12x20xbf16> loc(#loc235)
      xten_nn.output %461 : tensor<1x128x12x20xbf16> loc(#loc235)
    } -> tensor<1x128x12x20xbf16> loc(#loc235)
    %381 = xten_nn.subgraph (%arg5 = %380: tensor<1x128x12x20xbf16>)  attributes {
      LayerName = "Split_353_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "785",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Split_353_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "786",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        }
      ],
      Specializes = "SliceHCWC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.axis_letter = "C",
        config.dim_c = 128 : ui32,
        config.dim_h = 12 : ui32,
        config.dim_w = 20 : ui32,
        config.dtype = "bfloat16",
        config.end = 64 : ui32,
        config.start = 0 : ui32,
        config.step = 1 : ui32
      }} {
      %461 = tosa.slice %arg5 {
        PartOfLayerName = "Split_353",
        PartOfOutputName = "Split_353",
        size = array<i64: 1, 64, 12, 20>,
        start = array<i64: 0, 0, 0, 0>} : (tensor<1x128x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc236)
      xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc236)
    } -> tensor<1x64x12x20xbf16> loc(#loc236)
    %382 = xten_nn.subgraph (%arg5 = %380: tensor<1x128x12x20xbf16>)  attributes {
      LayerName = "Split_353_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "785",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Split_353_Duplicated#1",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "786",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        }
      ],
      Specializes = "SliceHCWC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.axis_letter = "C",
        config.dim_c = 128 : ui32,
        config.dim_h = 12 : ui32,
        config.dim_w = 20 : ui32,
        config.dtype = "bfloat16",
        config.end = 128 : ui32,
        config.start = 64 : ui32,
        config.step = 1 : ui32
      }} {
      %461 = tosa.slice %arg5 {
        PartOfLayerName = "Split_353",
        PartOfOutputName = "Split_353",
        size = array<i64: 1, 64, 12, 20>,
        start = array<i64: 0, 64, 0, 0>} : (tensor<1x128x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc236)
      xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc236)
    } -> tensor<1x64x12x20xbf16> loc(#loc236)
    %383 = xten_nn.subgraph (%arg5 = %30: tensor<1x64x12x20xbf16>, %arg6 = %382: tensor<1x64x12x20xbf16>)  attributes {
      LayerName = "Sub_359",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "890",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "787",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Sub_359",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "793",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x64x12x20xbf16>, %arg8 = %arg6: tensor<1x64x12x20xbf16>)  attributes {
        LayerName = "Sub_359",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "890",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "787",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Sub_359",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "793",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "SubBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.sub %arg7, %arg8 {LayerName = "Sub_359", OutputName = "Sub_359"} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc5)
        xten_nn.output %462 : tensor<1x64x12x20xbf16> loc(#loc5)
      } -> tensor<1x64x12x20xbf16> loc(#loc5)
      xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc5)
    } -> tensor<1x64x12x20xbf16> loc(#loc5)
    %384 = xten_nn.subgraph (%arg5 = %383: tensor<1x64x12x20xbf16>, %arg6 = %arg4: tensor<1x64x12x20xbf16>)  attributes {
      LayerName = "Mul_360",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_360",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x64x12x20xbf16>, %arg8 = %arg6: tensor<1x64x12x20xbf16>)  attributes {
        LayerName = "Mul_360",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_360",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_360",
          OutputName = "Mul_360",
          shift = 0 : i8} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc237)
        xten_nn.output %462 : tensor<1x64x12x20xbf16> loc(#loc237)
      } -> tensor<1x64x12x20xbf16> loc(#loc237)
      xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc237)
    } -> tensor<1x64x12x20xbf16> loc(#loc237)
    %385 = xten_nn.subgraph (%arg5 = %381: tensor<1x64x12x20xbf16>, %arg6 = %arg4: tensor<1x64x12x20xbf16>)  attributes {
      LayerName = "Mul_354",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "786",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "785",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_354",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "788",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x64x12x20xbf16>, %arg8 = %arg6: tensor<1x64x12x20xbf16>)  attributes {
        LayerName = "Mul_354",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "786",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "785",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_354",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "788",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_354",
          OutputName = "Mul_354",
          shift = 0 : i8} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc238)
        xten_nn.output %462 : tensor<1x64x12x20xbf16> loc(#loc238)
      } -> tensor<1x64x12x20xbf16> loc(#loc238)
      xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc238)
    } -> tensor<1x64x12x20xbf16> loc(#loc238)
    %386 = xten_nn.subgraph (%arg5 = %377: tensor<1x64x12x20xbf16>, %arg6 = %385: tensor<1x64x12x20xbf16>)  attributes {
      Axis = 1 : i32,
      LayerName = "Concat_355",
      Op = "Concat",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "782",
          Port = "data_io.ifm1",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "788",
          Port = "data_io.ifm2",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Concat_355",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "PseudoOp",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "789",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
        }
      ],
      current_data_format = "NCHW",
      data_format = "HCWN"} {
      %461 = tosa.concat %arg5, %arg6 {
        LayerName = "Concat_355",
        OutputName = "Concat_355",
        axis = 1 : i32} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x128x12x20xbf16> loc(#loc239)
      xten_nn.output %461 : tensor<1x128x12x20xbf16> loc(#loc239)
    } -> tensor<1x128x12x20xbf16> loc(#loc239)
    %387 = xten_nn.subgraph (%arg5 = %386: tensor<1x128x12x20xbf16>, %arg6 = %29: tensor<64x128x3x3xbf16>, %arg7 = %28: tensor<64xbf16>)  attributes {
      LayerName = "Conv_356",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "789",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
        },
        {
          Name = "788",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[64, 128, 3, 3]> : vector<4xindex>
        },
        {
          Name = "decoder.decode4.gru.hh.0.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_356",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "790",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x128x12x20xbf16>, %arg9 = %arg6: tensor<64x128x3x3xbf16>, %arg10 = %arg7: tensor<64xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 1], [1, 1]],
        LayerName = "Conv_356",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "789",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
          },
          {
            Name = "788",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[64, 128, 3, 3]> : vector<4xindex>
          },
          {
            Name = "decoder.decode4.gru.hh.0.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_356",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "790",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 3 : ui8,
          config.ksize.width = 3 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = tosa.transpose %arg9, %463 : (tensor<64x128x3x3xbf16>, tensor<4xi32>) -> tensor<64x3x3x128xbf16> loc(#loc240)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x128x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x128xbf16> loc(#loc240)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_356",
          PartOfOutputName = "Conv_356",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 1, 1, 1>,
          stride = array<i64: 1, 1>} : (tensor<1x12x20x128xbf16>, tensor<64x3x3x128xbf16>, tensor<64xbf16>) -> tensor<1x12x20x64xbf16> loc(#loc240)
        %467 = tosa.transpose %466, %462 : (tensor<1x12x20x64xbf16>, tensor<4xi32>) -> tensor<1x64x12x20xbf16> loc(#loc240)
        xten_nn.output %467 : tensor<1x64x12x20xbf16> loc(#loc240)
      } -> tensor<1x64x12x20xbf16> loc(#loc240)
      xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc240)
    } -> tensor<1x64x12x20xbf16> loc(#loc240)
    %388 = xten_nn.subgraph (%arg5 = %387: tensor<1x64x12x20xbf16>)  attributes {
      LayerName = "Tanh_357",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "790",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Tanh_357",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "791",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x64x12x20xbf16>)  attributes {
        LayerName = "Tanh_357",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "790",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Tanh_357",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "791",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "TanhTemplatedBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.ENABLE_FP16_AS_BF16 = 0 : ui8,
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.tanh %arg6 {LayerName = "Tanh_357", OutputName = "Tanh_357"} : (tensor<1x64x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc241)
        xten_nn.output %462 : tensor<1x64x12x20xbf16> loc(#loc241)
      } -> tensor<1x64x12x20xbf16> loc(#loc241)
      xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc241)
    } -> tensor<1x64x12x20xbf16> loc(#loc241)
    %389 = xten_nn.subgraph (%arg5 = %382: tensor<1x64x12x20xbf16>, %arg6 = %388: tensor<1x64x12x20xbf16>)  attributes {
      LayerName = "Mul_361",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "787",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "791",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_361",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "795",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x64x12x20xbf16>, %arg8 = %arg6: tensor<1x64x12x20xbf16>)  attributes {
        LayerName = "Mul_361",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "787",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "791",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_361",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "795",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_361",
          OutputName = "Mul_361",
          shift = 0 : i8} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc242)
        xten_nn.output %462 : tensor<1x64x12x20xbf16> loc(#loc242)
      } -> tensor<1x64x12x20xbf16> loc(#loc242)
      xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc242)
    } -> tensor<1x64x12x20xbf16> loc(#loc242)
    %390 = xten_nn.subgraph (%arg5 = %384: tensor<1x64x12x20xbf16>, %arg6 = %389: tensor<1x64x12x20xbf16>)  attributes {
      LayerName = "Add_362",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "794",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "793",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Add_362",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "796",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x64x12x20xbf16>, %arg8 = %arg6: tensor<1x64x12x20xbf16>)  attributes {
        LayerName = "Add_362",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "794",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "793",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
          }
        ],
        OutputName = "Add_362",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "796",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
          }
        ],
        Specializes = "AddBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.act = 0 : ui8,
          config.act_type = "LINEAR",
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.add %arg7, %arg8 {LayerName = "Add_362", OutputName = "Add_362"} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc243)
        xten_nn.output %462 : tensor<1x64x12x20xbf16> loc(#loc243)
      } -> tensor<1x64x12x20xbf16> loc(#loc243)
      xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc243)
    } -> tensor<1x64x12x20xbf16> loc(#loc243)
    %391 = xten_nn.subgraph (%arg5 = %376: tensor<1x64x12x20xbf16>, %arg6 = %390: tensor<1x64x12x20xbf16>)  attributes {
      Axis = 1 : i32,
      LayerName = "Concat_363",
      Op = "Concat",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "781",
          Port = "data_io.ifm1",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "777",
          Port = "data_io.ifm2",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Concat_363",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "PseudoOp",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "797",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
        }
      ],
      current_data_format = "NCHW",
      data_format = "HCWN"} {
      %461 = tosa.concat %arg5, %arg6 {
        LayerName = "Concat_363",
        OutputName = "Concat_363",
        axis = 1 : i32} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x128x12x20xbf16> loc(#loc244)
      xten_nn.output %461 : tensor<1x128x12x20xbf16> loc(#loc244)
    } -> tensor<1x128x12x20xbf16> loc(#loc244)
    %392 = xten_nn.subgraph (%arg5 = %391: tensor<1x128x12x20xbf16>)  attributes {
      LayerName = "Resize_365",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "797",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex>
        }
      ],
      OutputName = "Resize_365",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "802",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 24, 40]> : vector<4xindex>
        }
      ],
      Specializes = "ResizeAdf",
      With = {
        config.co_trans_mode = 1 : ui32,
        config.dim_0 = 1 : ui32,
        config.dim_1 = 128 : ui32,
        config.dim_2 = 12 : ui32,
        config.dim_3 = 20 : ui32,
        config.dtype = "bfloat16",
        config.mode = 1 : ui32,
        config.nearest_mode = 0 : ui32,
        config.output_H = 24 : ui32,
        config.output_W = 40 : ui32
      }} {
      %461 = xten_nn.resize %arg5 {
        LayerName = "Resize_365",
        OutputName = "Resize_365",
        coordinate_transformation_mode = 1 : i64,
        mode = 1 : i64,
        nearest_mode = 0 : i64,
        scales = array<f32: 1.000000e+00, 1.000000e+00, 2.000000e+00, 2.000000e+00>} : (tensor<1x128x12x20xbf16>) -> tensor<1x128x24x40xbf16> loc(#loc245)
      xten_nn.output %461 : tensor<1x128x24x40xbf16> loc(#loc245)
    } -> tensor<1x128x24x40xbf16> loc(#loc245)
    %393 = xten_nn.subgraph (%arg5 = %392: tensor<1x128x24x40xbf16>)  attributes {
      LayerName = "Slice_371",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "802",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 24, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Slice_371",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "812",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 23, 40]> : vector<4xindex>
        }
      ],
      Specializes = "SliceHCWC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.axis_letter = "H",
        config.dim_c = 128 : ui32,
        config.dim_h = 24 : ui32,
        config.dim_w = 40 : ui32,
        config.dtype = "bfloat16",
        config.end = 23 : ui32,
        config.start = 0 : ui32,
        config.step = 1 : ui32
      }} {
      %461 = tosa.slice %arg5 {
        LayerName = "Slice_371",
        OutputName = "Slice_371",
        size = array<i64: 1, 128, 23, 40>,
        start = array<i64: 0, 0, 0, 0>} : (tensor<1x128x24x40xbf16>) -> tensor<1x128x23x40xbf16> loc(#loc246)
      xten_nn.output %461 : tensor<1x128x23x40xbf16> loc(#loc246)
    } -> tensor<1x128x23x40xbf16> loc(#loc246)
    %394 = xten_nn.subgraph (%arg5 = %393: tensor<1x128x23x40xbf16>, %arg6 = %220: tensor<1x40x23x40xbf16>, %arg7 = %169: tensor<1x3x23x40xbf16>)  attributes {
      Axis = 1 : i32,
      LayerName = "Concat_372",
      Op = "Concat",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "812",
          Port = "data_io.ifm1",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 128, 23, 40]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "802",
          Port = "data_io.ifm2",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "496",
          Port = "data_io.ifm3",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Concat_372",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "PseudoOp",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "813",
          Port = "data_io.ofm",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 171, 23, 40]> : vector<4xindex>
        }
      ],
      current_data_format = "NCHW",
      data_format = "HCWN"} {
      %461 = tosa.concat %arg5, %arg6, %arg7 {
        LayerName = "Concat_372",
        OutputName = "Concat_372",
        axis = 1 : i32} : (tensor<1x128x23x40xbf16>, tensor<1x40x23x40xbf16>, tensor<1x3x23x40xbf16>) -> tensor<1x171x23x40xbf16> loc(#loc247)
      xten_nn.output %461 : tensor<1x171x23x40xbf16> loc(#loc247)
    } -> tensor<1x171x23x40xbf16> loc(#loc247)
    %395 = xten_nn.subgraph (%arg5 = %394: tensor<1x171x23x40xbf16>, %arg6 = %27: tensor<80x171x3x3xbf16>, %arg7 = %26: tensor<80xbf16>)  attributes {
      LayerName = "Conv_373",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "813",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 171, 23, 40]> : vector<4xindex>
        },
        {
          Name = "812",
          UnknownDataFormat = true,
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[80, 171, 3, 3]> : vector<4xindex>
        },
        {
          Name = "813",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_374",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "816",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x171x23x40xbf16>, %arg9 = %arg6: tensor<80x171x3x3xbf16>, %arg10 = %arg7: tensor<80xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 1], [1, 1]],
        LayerName = "Conv_373",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "813",
            Port = "data_io.ifm",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 171, 23, 40]> : vector<4xindex>
          },
          {
            Name = "812",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[80, 171, 3, 3]> : vector<4xindex>
          },
          {
            Name = "813",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_374",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "816",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true,
          NonNegativeOut = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 1 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 3 : ui8,
          config.ksize.width = 3 : ui8,
          config.lrelu_alpha = 0.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 0.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = tosa.transpose %arg9, %463 : (tensor<80x171x3x3xbf16>, tensor<4xi32>) -> tensor<80x3x3x171xbf16> loc(#loc360)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x171x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x171xbf16> loc(#loc360)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_373",
          PartOfOutputName = "Conv_373",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 1, 1, 1>,
          stride = array<i64: 1, 1>} : (tensor<1x23x40x171xbf16>, tensor<80x3x3x171xbf16>, tensor<80xbf16>) -> tensor<1x23x40x80xbf16> loc(#loc248)
        %467 = tosa.clamp %466 {
          LayerName = "Relu_374",
          OutputName = "Relu_374",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x23x40x80xbf16>) -> tensor<1x23x40x80xbf16> loc(#loc249)
        %468 = tosa.transpose %467, %462 : (tensor<1x23x40x80xbf16>, tensor<4xi32>) -> tensor<1x80x23x40xbf16> loc(#loc360)
        xten_nn.output %468 : tensor<1x80x23x40xbf16> loc(#loc249)
      } -> tensor<1x80x23x40xbf16> loc(#loc360)
      xten_nn.output %461 : tensor<1x80x23x40xbf16> loc(#loc360)
    } -> tensor<1x80x23x40xbf16> loc(#loc360)
    %396 = xten_nn.subgraph (%arg5 = %395: tensor<1x80x23x40xbf16>)  attributes {
      LayerName = "Split_375_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "816",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Split_375_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "817",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      Specializes = "SliceHCWC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.axis_letter = "C",
        config.dim_c = 80 : ui32,
        config.dim_h = 23 : ui32,
        config.dim_w = 40 : ui32,
        config.dtype = "bfloat16",
        config.end = 40 : ui32,
        config.start = 0 : ui32,
        config.step = 1 : ui32
      }} {
      %461 = tosa.slice %arg5 {
        PartOfLayerName = "Split_375",
        PartOfOutputName = "Split_375",
        size = array<i64: 1, 40, 23, 40>,
        start = array<i64: 0, 0, 0, 0>} : (tensor<1x80x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc250)
      xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc250)
    } -> tensor<1x40x23x40xbf16> loc(#loc250)
    %397 = xten_nn.subgraph (%arg5 = %395: tensor<1x80x23x40xbf16>)  attributes {
      LayerName = "Split_375_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "816",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Split_375_Duplicated#1",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "817",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      Specializes = "SliceHCWC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.axis_letter = "C",
        config.dim_c = 80 : ui32,
        config.dim_h = 23 : ui32,
        config.dim_w = 40 : ui32,
        config.dtype = "bfloat16",
        config.end = 80 : ui32,
        config.start = 40 : ui32,
        config.step = 1 : ui32
      }} {
      %461 = tosa.slice %arg5 {
        PartOfLayerName = "Split_375",
        PartOfOutputName = "Split_375",
        size = array<i64: 1, 40, 23, 40>,
        start = array<i64: 0, 40, 0, 0>} : (tensor<1x80x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc250)
      xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc250)
    } -> tensor<1x40x23x40xbf16> loc(#loc250)
    %398 = xten_nn.subgraph (%arg5 = %397: tensor<1x40x23x40xbf16>, %arg6 = %arg3: tensor<1x40x23x40xbf16>)  attributes {
      Axis = 1 : i32,
      LayerName = "Concat_376",
      Op = "Concat",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Port = "data_io.ifm1",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Port = "data_io.ifm2",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Concat_376",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "PseudoOp",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "819",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex>
        }
      ],
      current_data_format = "NCHW",
      data_format = "HCWN"} {
      %461 = tosa.concat %arg5, %arg6 {
        LayerName = "Concat_376",
        OutputName = "Concat_376",
        axis = 1 : i32} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x80x23x40xbf16> loc(#loc251)
      xten_nn.output %461 : tensor<1x80x23x40xbf16> loc(#loc251)
    } -> tensor<1x80x23x40xbf16> loc(#loc251)
    %399 = xten_nn.subgraph (%arg5 = %398: tensor<1x80x23x40xbf16>, %arg6 = %25: tensor<80x80x3x3xbf16>, %arg7 = %24: tensor<80xbf16>)  attributes {
      LayerName = "Conv_377",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "819",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex>
        },
        {
          Name = "396",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[80, 80, 3, 3]> : vector<4xindex>
        },
        {
          Name = "decoder.decode3.gru.ih.0.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_377",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "820",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x80x23x40xbf16>, %arg9 = %arg6: tensor<80x80x3x3xbf16>, %arg10 = %arg7: tensor<80xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 1], [1, 1]],
        LayerName = "Conv_377",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "819",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex>
          },
          {
            Name = "396",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[80, 80, 3, 3]> : vector<4xindex>
          },
          {
            Name = "decoder.decode3.gru.ih.0.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_377",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "820",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 3 : ui8,
          config.ksize.width = 3 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = tosa.transpose %arg9, %463 : (tensor<80x80x3x3xbf16>, tensor<4xi32>) -> tensor<80x3x3x80xbf16> loc(#loc252)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x80x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x80xbf16> loc(#loc252)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_377",
          PartOfOutputName = "Conv_377",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 1, 1, 1>,
          stride = array<i64: 1, 1>} : (tensor<1x23x40x80xbf16>, tensor<80x3x3x80xbf16>, tensor<80xbf16>) -> tensor<1x23x40x80xbf16> loc(#loc252)
        %467 = tosa.transpose %466, %462 : (tensor<1x23x40x80xbf16>, tensor<4xi32>) -> tensor<1x80x23x40xbf16> loc(#loc252)
        xten_nn.output %467 : tensor<1x80x23x40xbf16> loc(#loc252)
      } -> tensor<1x80x23x40xbf16> loc(#loc252)
      xten_nn.output %461 : tensor<1x80x23x40xbf16> loc(#loc252)
    } -> tensor<1x80x23x40xbf16> loc(#loc252)
    %400 = xten_nn.subgraph (%arg5 = %399: tensor<1x80x23x40xbf16>)  attributes {
      LayerName = "Sigmoid_378",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "820",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Sigmoid_378",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "821",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x80x23x40xbf16>)  attributes {
        LayerName = "Sigmoid_378",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "820",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex>
          }
        ],
        OutputName = "Sigmoid_378",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "821",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "SigmoidTemplatedBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.ENABLE_FP16_AS_BF16 = 0 : ui8,
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.sigmoid %arg6 {LayerName = "Sigmoid_378", OutputName = "Sigmoid_378"} : (tensor<1x80x23x40xbf16>) -> tensor<1x80x23x40xbf16> loc(#loc253)
        xten_nn.output %462 : tensor<1x80x23x40xbf16> loc(#loc253)
      } -> tensor<1x80x23x40xbf16> loc(#loc253)
      xten_nn.output %461 : tensor<1x80x23x40xbf16> loc(#loc253)
    } -> tensor<1x80x23x40xbf16> loc(#loc253)
    %401 = xten_nn.subgraph (%arg5 = %400: tensor<1x80x23x40xbf16>)  attributes {
      LayerName = "Split_379_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "821",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Split_379_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "822",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      Specializes = "SliceHCWC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.axis_letter = "C",
        config.dim_c = 80 : ui32,
        config.dim_h = 23 : ui32,
        config.dim_w = 40 : ui32,
        config.dtype = "bfloat16",
        config.end = 40 : ui32,
        config.start = 0 : ui32,
        config.step = 1 : ui32
      }} {
      %461 = tosa.slice %arg5 {
        PartOfLayerName = "Split_379",
        PartOfOutputName = "Split_379",
        size = array<i64: 1, 40, 23, 40>,
        start = array<i64: 0, 0, 0, 0>} : (tensor<1x80x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc254)
      xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc254)
    } -> tensor<1x40x23x40xbf16> loc(#loc254)
    %402 = xten_nn.subgraph (%arg5 = %400: tensor<1x80x23x40xbf16>)  attributes {
      LayerName = "Split_379_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "821",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Split_379_Duplicated#1",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "822",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      Specializes = "SliceHCWC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.axis_letter = "C",
        config.dim_c = 80 : ui32,
        config.dim_h = 23 : ui32,
        config.dim_w = 40 : ui32,
        config.dtype = "bfloat16",
        config.end = 80 : ui32,
        config.start = 40 : ui32,
        config.step = 1 : ui32
      }} {
      %461 = tosa.slice %arg5 {
        PartOfLayerName = "Split_379",
        PartOfOutputName = "Split_379",
        size = array<i64: 1, 40, 23, 40>,
        start = array<i64: 0, 40, 0, 0>} : (tensor<1x80x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc254)
      xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc254)
    } -> tensor<1x40x23x40xbf16> loc(#loc254)
    %403 = xten_nn.subgraph (%arg5 = %23: tensor<1x40x23x40xbf16>, %arg6 = %402: tensor<1x40x23x40xbf16>)  attributes {
      LayerName = "Sub_385",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "890",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "823",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Sub_385",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "829",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x40x23x40xbf16>, %arg8 = %arg6: tensor<1x40x23x40xbf16>)  attributes {
        LayerName = "Sub_385",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "890",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "823",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          }
        ],
        OutputName = "Sub_385",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "829",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "SubBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.sub %arg7, %arg8 {LayerName = "Sub_385", OutputName = "Sub_385"} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc4)
        xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc4)
      } -> tensor<1x40x23x40xbf16> loc(#loc4)
      xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc4)
    } -> tensor<1x40x23x40xbf16> loc(#loc4)
    %404 = xten_nn.subgraph (%arg5 = %403: tensor<1x40x23x40xbf16>, %arg6 = %arg3: tensor<1x40x23x40xbf16>)  attributes {
      LayerName = "Mul_386",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_386",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x40x23x40xbf16>, %arg8 = %arg6: tensor<1x40x23x40xbf16>)  attributes {
        LayerName = "Mul_386",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_386",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_386",
          OutputName = "Mul_386",
          shift = 0 : i8} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc255)
        xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc255)
      } -> tensor<1x40x23x40xbf16> loc(#loc255)
      xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc255)
    } -> tensor<1x40x23x40xbf16> loc(#loc255)
    %405 = xten_nn.subgraph (%arg5 = %401: tensor<1x40x23x40xbf16>, %arg6 = %arg3: tensor<1x40x23x40xbf16>)  attributes {
      LayerName = "Mul_380",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "822",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "821",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_380",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "824",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x40x23x40xbf16>, %arg8 = %arg6: tensor<1x40x23x40xbf16>)  attributes {
        LayerName = "Mul_380",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "822",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "821",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_380",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "824",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_380",
          OutputName = "Mul_380",
          shift = 0 : i8} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc256)
        xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc256)
      } -> tensor<1x40x23x40xbf16> loc(#loc256)
      xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc256)
    } -> tensor<1x40x23x40xbf16> loc(#loc256)
    %406 = xten_nn.subgraph (%arg5 = %397: tensor<1x40x23x40xbf16>, %arg6 = %405: tensor<1x40x23x40xbf16>)  attributes {
      Axis = 1 : i32,
      LayerName = "Concat_381",
      Op = "Concat",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "818",
          Port = "data_io.ifm1",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "824",
          Port = "data_io.ifm2",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Concat_381",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "PseudoOp",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "825",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex>
        }
      ],
      current_data_format = "NCHW",
      data_format = "HCWN"} {
      %461 = tosa.concat %arg5, %arg6 {
        LayerName = "Concat_381",
        OutputName = "Concat_381",
        axis = 1 : i32} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x80x23x40xbf16> loc(#loc257)
      xten_nn.output %461 : tensor<1x80x23x40xbf16> loc(#loc257)
    } -> tensor<1x80x23x40xbf16> loc(#loc257)
    %407 = xten_nn.subgraph (%arg5 = %406: tensor<1x80x23x40xbf16>, %arg6 = %22: tensor<40x80x3x3xbf16>, %arg7 = %21: tensor<40xbf16>)  attributes {
      LayerName = "Conv_382",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "825",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex>
        },
        {
          Name = "824",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[40, 80, 3, 3]> : vector<4xindex>
        },
        {
          Name = "decoder.decode3.gru.hh.0.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_382",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "826",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x80x23x40xbf16>, %arg9 = %arg6: tensor<40x80x3x3xbf16>, %arg10 = %arg7: tensor<40xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 1], [1, 1]],
        LayerName = "Conv_382",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "825",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex>
          },
          {
            Name = "824",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[40, 80, 3, 3]> : vector<4xindex>
          },
          {
            Name = "decoder.decode3.gru.hh.0.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_382",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "826",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 3 : ui8,
          config.ksize.width = 3 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = tosa.transpose %arg9, %463 : (tensor<40x80x3x3xbf16>, tensor<4xi32>) -> tensor<40x3x3x80xbf16> loc(#loc258)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x80x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x80xbf16> loc(#loc258)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_382",
          PartOfOutputName = "Conv_382",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 1, 1, 1>,
          stride = array<i64: 1, 1>} : (tensor<1x23x40x80xbf16>, tensor<40x3x3x80xbf16>, tensor<40xbf16>) -> tensor<1x23x40x40xbf16> loc(#loc258)
        %467 = tosa.transpose %466, %462 : (tensor<1x23x40x40xbf16>, tensor<4xi32>) -> tensor<1x40x23x40xbf16> loc(#loc258)
        xten_nn.output %467 : tensor<1x40x23x40xbf16> loc(#loc258)
      } -> tensor<1x40x23x40xbf16> loc(#loc258)
      xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc258)
    } -> tensor<1x40x23x40xbf16> loc(#loc258)
    %408 = xten_nn.subgraph (%arg5 = %407: tensor<1x40x23x40xbf16>)  attributes {
      LayerName = "Tanh_383",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "826",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Tanh_383",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "827",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x40x23x40xbf16>)  attributes {
        LayerName = "Tanh_383",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "826",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          }
        ],
        OutputName = "Tanh_383",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "827",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "TanhTemplatedBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.ENABLE_FP16_AS_BF16 = 0 : ui8,
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.tanh %arg6 {LayerName = "Tanh_383", OutputName = "Tanh_383"} : (tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc259)
        xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc259)
      } -> tensor<1x40x23x40xbf16> loc(#loc259)
      xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc259)
    } -> tensor<1x40x23x40xbf16> loc(#loc259)
    %409 = xten_nn.subgraph (%arg5 = %402: tensor<1x40x23x40xbf16>, %arg6 = %408: tensor<1x40x23x40xbf16>)  attributes {
      LayerName = "Mul_387",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "823",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "827",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_387",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "831",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x40x23x40xbf16>, %arg8 = %arg6: tensor<1x40x23x40xbf16>)  attributes {
        LayerName = "Mul_387",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "823",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "827",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_387",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "831",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_387",
          OutputName = "Mul_387",
          shift = 0 : i8} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc260)
        xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc260)
      } -> tensor<1x40x23x40xbf16> loc(#loc260)
      xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc260)
    } -> tensor<1x40x23x40xbf16> loc(#loc260)
    %410 = xten_nn.subgraph (%arg5 = %404: tensor<1x40x23x40xbf16>, %arg6 = %409: tensor<1x40x23x40xbf16>)  attributes {
      LayerName = "Add_388",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "830",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "829",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Add_388",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "832",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x40x23x40xbf16>, %arg8 = %arg6: tensor<1x40x23x40xbf16>)  attributes {
        LayerName = "Add_388",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "830",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "829",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          }
        ],
        OutputName = "Add_388",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "832",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
          }
        ],
        Specializes = "AddBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.act = 0 : ui8,
          config.act_type = "LINEAR",
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.add %arg7, %arg8 {LayerName = "Add_388", OutputName = "Add_388"} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc261)
        xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc261)
      } -> tensor<1x40x23x40xbf16> loc(#loc261)
      xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc261)
    } -> tensor<1x40x23x40xbf16> loc(#loc261)
    %411 = xten_nn.subgraph (%arg5 = %396: tensor<1x40x23x40xbf16>, %arg6 = %410: tensor<1x40x23x40xbf16>)  attributes {
      Axis = 1 : i32,
      LayerName = "Concat_389",
      Op = "Concat",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "817",
          Port = "data_io.ifm1",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "816",
          Port = "data_io.ifm2",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Concat_389",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "PseudoOp",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "833",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex>
        }
      ],
      current_data_format = "NCHW",
      data_format = "HCWN"} {
      %461 = tosa.concat %arg5, %arg6 {
        LayerName = "Concat_389",
        OutputName = "Concat_389",
        axis = 1 : i32} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x80x23x40xbf16> loc(#loc262)
      xten_nn.output %461 : tensor<1x80x23x40xbf16> loc(#loc262)
    } -> tensor<1x80x23x40xbf16> loc(#loc262)
    %412 = xten_nn.subgraph (%arg5 = %411: tensor<1x80x23x40xbf16>)  attributes {
      LayerName = "Resize_391",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "833",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex>
        }
      ],
      OutputName = "Resize_391",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "838",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 46, 80]> : vector<4xindex>
        }
      ],
      Specializes = "ResizeAdf",
      With = {
        config.co_trans_mode = 1 : ui32,
        config.dim_0 = 1 : ui32,
        config.dim_1 = 80 : ui32,
        config.dim_2 = 23 : ui32,
        config.dim_3 = 40 : ui32,
        config.dtype = "bfloat16",
        config.mode = 1 : ui32,
        config.nearest_mode = 0 : ui32,
        config.output_H = 46 : ui32,
        config.output_W = 80 : ui32
      }} {
      %461 = xten_nn.resize %arg5 {
        LayerName = "Resize_391",
        OutputName = "Resize_391",
        coordinate_transformation_mode = 1 : i64,
        mode = 1 : i64,
        nearest_mode = 0 : i64,
        scales = array<f32: 1.000000e+00, 1.000000e+00, 2.000000e+00, 2.000000e+00>} : (tensor<1x80x23x40xbf16>) -> tensor<1x80x46x80xbf16> loc(#loc263)
      xten_nn.output %461 : tensor<1x80x46x80xbf16> loc(#loc263)
    } -> tensor<1x80x46x80xbf16> loc(#loc263)
    %413 = xten_nn.subgraph (%arg5 = %412: tensor<1x80x46x80xbf16>)  attributes {
      LayerName = "Slice_397",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "838",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 46, 80]> : vector<4xindex>
        }
      ],
      OutputName = "Slice_397",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "848",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 45, 80]> : vector<4xindex>
        }
      ],
      Specializes = "SliceHCWC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.axis_letter = "H",
        config.dim_c = 80 : ui32,
        config.dim_h = 46 : ui32,
        config.dim_w = 80 : ui32,
        config.dtype = "bfloat16",
        config.end = 45 : ui32,
        config.start = 0 : ui32,
        config.step = 1 : ui32
      }} {
      %461 = tosa.slice %arg5 {
        LayerName = "Slice_397",
        OutputName = "Slice_397",
        size = array<i64: 1, 80, 45, 80>,
        start = array<i64: 0, 0, 0, 0>} : (tensor<1x80x46x80xbf16>) -> tensor<1x80x45x80xbf16> loc(#loc264)
      xten_nn.output %461 : tensor<1x80x45x80xbf16> loc(#loc264)
    } -> tensor<1x80x45x80xbf16> loc(#loc264)
    %414 = xten_nn.subgraph (%arg5 = %413: tensor<1x80x45x80xbf16>, %arg6 = %184: tensor<1x24x45x80xbf16>, %arg7 = %168: tensor<1x3x45x80xbf16>)  attributes {
      Axis = 1 : i32,
      LayerName = "Concat_398",
      Op = "Concat",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "848",
          Port = "data_io.ifm1",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 80, 45, 80]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "838",
          Port = "data_io.ifm2",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "434",
          Port = "data_io.ifm3",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 45, 80]> : vector<4xindex>
        }
      ],
      OutputName = "Concat_398",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "PseudoOp",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "849",
          Port = "data_io.ofm",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 107, 45, 80]> : vector<4xindex>
        }
      ],
      current_data_format = "NCHW",
      data_format = "HCWN"} {
      %461 = tosa.concat %arg5, %arg6, %arg7 {
        LayerName = "Concat_398",
        OutputName = "Concat_398",
        axis = 1 : i32} : (tensor<1x80x45x80xbf16>, tensor<1x24x45x80xbf16>, tensor<1x3x45x80xbf16>) -> tensor<1x107x45x80xbf16> loc(#loc265)
      xten_nn.output %461 : tensor<1x107x45x80xbf16> loc(#loc265)
    } -> tensor<1x107x45x80xbf16> loc(#loc265)
    %415 = xten_nn.subgraph (%arg5 = %414: tensor<1x107x45x80xbf16>, %arg6 = %20: tensor<40x107x3x3xbf16>, %arg7 = %19: tensor<40xbf16>)  attributes {
      LayerName = "Conv_399",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "849",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 107, 45, 80]> : vector<4xindex>
        },
        {
          Name = "848",
          UnknownDataFormat = true,
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[40, 107, 3, 3]> : vector<4xindex>
        },
        {
          Name = "849",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_400",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "852",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x107x45x80xbf16>, %arg9 = %arg6: tensor<40x107x3x3xbf16>, %arg10 = %arg7: tensor<40xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 1], [1, 1]],
        LayerName = "Conv_399",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "849",
            Port = "data_io.ifm",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 107, 45, 80]> : vector<4xindex>
          },
          {
            Name = "848",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[40, 107, 3, 3]> : vector<4xindex>
          },
          {
            Name = "849",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_400",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "852",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true,
          NonNegativeOut = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 1 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 3 : ui8,
          config.ksize.width = 3 : ui8,
          config.lrelu_alpha = 0.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 0.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = tosa.transpose %arg9, %463 : (tensor<40x107x3x3xbf16>, tensor<4xi32>) -> tensor<40x3x3x107xbf16> loc(#loc361)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x107x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x107xbf16> loc(#loc361)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_399",
          PartOfOutputName = "Conv_399",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 1, 1, 1>,
          stride = array<i64: 1, 1>} : (tensor<1x45x80x107xbf16>, tensor<40x3x3x107xbf16>, tensor<40xbf16>) -> tensor<1x45x80x40xbf16> loc(#loc266)
        %467 = tosa.clamp %466 {
          LayerName = "Relu_400",
          OutputName = "Relu_400",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x45x80x40xbf16>) -> tensor<1x45x80x40xbf16> loc(#loc267)
        %468 = tosa.transpose %467, %462 : (tensor<1x45x80x40xbf16>, tensor<4xi32>) -> tensor<1x40x45x80xbf16> loc(#loc361)
        xten_nn.output %468 : tensor<1x40x45x80xbf16> loc(#loc267)
      } -> tensor<1x40x45x80xbf16> loc(#loc361)
      xten_nn.output %461 : tensor<1x40x45x80xbf16> loc(#loc361)
    } -> tensor<1x40x45x80xbf16> loc(#loc361)
    %416 = xten_nn.subgraph (%arg5 = %415: tensor<1x40x45x80xbf16>)  attributes {
      LayerName = "Split_401_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "852",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex>
        }
      ],
      OutputName = "Split_401_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "853",
          Port = "data_io.ofm",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        }
      ],
      Specializes = "SliceHCWC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.axis_letter = "C",
        config.dim_c = 40 : ui32,
        config.dim_h = 45 : ui32,
        config.dim_w = 80 : ui32,
        config.dtype = "bfloat16",
        config.end = 20 : ui32,
        config.start = 0 : ui32,
        config.step = 1 : ui32
      }} {
      %461 = tosa.slice %arg5 {
        PartOfLayerName = "Split_401",
        PartOfOutputName = "Split_401",
        size = array<i64: 1, 20, 45, 80>,
        start = array<i64: 0, 0, 0, 0>} : (tensor<1x40x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc268)
      xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc268)
    } -> tensor<1x20x45x80xbf16> loc(#loc268)
    %417 = xten_nn.subgraph (%arg5 = %415: tensor<1x40x45x80xbf16>)  attributes {
      LayerName = "Split_401_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "852",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex>
        }
      ],
      OutputName = "Split_401_Duplicated#1",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "853",
          Port = "data_io.ofm",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        }
      ],
      Specializes = "SliceHCWC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.axis_letter = "C",
        config.dim_c = 40 : ui32,
        config.dim_h = 45 : ui32,
        config.dim_w = 80 : ui32,
        config.dtype = "bfloat16",
        config.end = 40 : ui32,
        config.start = 20 : ui32,
        config.step = 1 : ui32
      }} {
      %461 = tosa.slice %arg5 {
        PartOfLayerName = "Split_401",
        PartOfOutputName = "Split_401",
        size = array<i64: 1, 20, 45, 80>,
        start = array<i64: 0, 20, 0, 0>} : (tensor<1x40x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc268)
      xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc268)
    } -> tensor<1x20x45x80xbf16> loc(#loc268)
    %418 = xten_nn.subgraph (%arg5 = %417: tensor<1x20x45x80xbf16>, %arg6 = %arg2: tensor<1x20x45x80xbf16>)  attributes {
      LayerName = "Concat_402",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Port = "data_io.ifm1",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Port = "data_io.ifm2",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        }
      ],
      OutputName = "Concat_402",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "855",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex>
        }
      ],
      Specializes = "ConcatC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.dtype = "bfloat16",
        config.in1_dim_c = 24 : ui32,
        config.in1_dim_h = 45 : ui32,
        config.in1_dim_w = 80 : ui32,
        config.in2_dim_c = 24 : ui32,
        config.in2_dim_h = 45 : ui32,
        config.in2_dim_w = 80 : ui32,
        config.num_eff_concat_input0_size = 20 : ui32,
        config.num_eff_concat_input0_start = 0 : ui32,
        config.num_eff_concat_input1_size = 20 : ui32,
        config.num_eff_concat_input1_start = 0 : ui32
      }} {
      %461 = tosa.concat %arg5, %arg6 {
        LayerName = "Concat_402",
        OutputName = "Concat_402",
        axis = 1 : i32} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x40x45x80xbf16> loc(#loc269)
      xten_nn.output %461 : tensor<1x40x45x80xbf16> loc(#loc269)
    } -> tensor<1x40x45x80xbf16> loc(#loc269)
    %419 = xten_nn.subgraph (%arg5 = %418: tensor<1x40x45x80xbf16>, %arg6 = %18: tensor<40x40x3x3xbf16>, %arg7 = %17: tensor<40xbf16>)  attributes {
      LayerName = "Conv_403",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "855",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex>
        },
        {
          Name = "395",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[40, 40, 3, 3]> : vector<4xindex>
        },
        {
          Name = "decoder.decode2.gru.ih.0.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_403",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "856",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x40x45x80xbf16>, %arg9 = %arg6: tensor<40x40x3x3xbf16>, %arg10 = %arg7: tensor<40xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 1], [1, 1]],
        LayerName = "Conv_403",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "855",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex>
          },
          {
            Name = "395",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[40, 40, 3, 3]> : vector<4xindex>
          },
          {
            Name = "decoder.decode2.gru.ih.0.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_403",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "856",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 3 : ui8,
          config.ksize.width = 3 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = tosa.transpose %arg9, %463 : (tensor<40x40x3x3xbf16>, tensor<4xi32>) -> tensor<40x3x3x40xbf16> loc(#loc270)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x40x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x40xbf16> loc(#loc270)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_403",
          PartOfOutputName = "Conv_403",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 1, 1, 1>,
          stride = array<i64: 1, 1>} : (tensor<1x45x80x40xbf16>, tensor<40x3x3x40xbf16>, tensor<40xbf16>) -> tensor<1x45x80x40xbf16> loc(#loc270)
        %467 = tosa.transpose %466, %462 : (tensor<1x45x80x40xbf16>, tensor<4xi32>) -> tensor<1x40x45x80xbf16> loc(#loc270)
        xten_nn.output %467 : tensor<1x40x45x80xbf16> loc(#loc270)
      } -> tensor<1x40x45x80xbf16> loc(#loc270)
      xten_nn.output %461 : tensor<1x40x45x80xbf16> loc(#loc270)
    } -> tensor<1x40x45x80xbf16> loc(#loc270)
    %420 = xten_nn.subgraph (%arg5 = %419: tensor<1x40x45x80xbf16>)  attributes {
      LayerName = "Sigmoid_404",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "856",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex>
        }
      ],
      OutputName = "Sigmoid_404",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "857",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x40x45x80xbf16>)  attributes {
        LayerName = "Sigmoid_404",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "856",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex>
          }
        ],
        OutputName = "Sigmoid_404",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "857",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex>
          }
        ],
        Specializes = "SigmoidTemplatedBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.ENABLE_FP16_AS_BF16 = 0 : ui8,
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.sigmoid %arg6 {LayerName = "Sigmoid_404", OutputName = "Sigmoid_404"} : (tensor<1x40x45x80xbf16>) -> tensor<1x40x45x80xbf16> loc(#loc271)
        xten_nn.output %462 : tensor<1x40x45x80xbf16> loc(#loc271)
      } -> tensor<1x40x45x80xbf16> loc(#loc271)
      xten_nn.output %461 : tensor<1x40x45x80xbf16> loc(#loc271)
    } -> tensor<1x40x45x80xbf16> loc(#loc271)
    %421 = xten_nn.subgraph (%arg5 = %420: tensor<1x40x45x80xbf16>)  attributes {
      LayerName = "Split_405_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "857",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex>
        }
      ],
      OutputName = "Split_405_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "858",
          Port = "data_io.ofm",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        }
      ],
      Specializes = "SliceHCWC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.axis_letter = "C",
        config.dim_c = 40 : ui32,
        config.dim_h = 45 : ui32,
        config.dim_w = 80 : ui32,
        config.dtype = "bfloat16",
        config.end = 20 : ui32,
        config.start = 0 : ui32,
        config.step = 1 : ui32
      }} {
      %461 = tosa.slice %arg5 {
        PartOfLayerName = "Split_405",
        PartOfOutputName = "Split_405",
        size = array<i64: 1, 20, 45, 80>,
        start = array<i64: 0, 0, 0, 0>} : (tensor<1x40x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc272)
      xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc272)
    } -> tensor<1x20x45x80xbf16> loc(#loc272)
    %422 = xten_nn.subgraph (%arg5 = %420: tensor<1x40x45x80xbf16>)  attributes {
      LayerName = "Split_405_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "857",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex>
        }
      ],
      OutputName = "Split_405_Duplicated#1",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "858",
          Port = "data_io.ofm",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        }
      ],
      Specializes = "SliceHCWC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.axis_letter = "C",
        config.dim_c = 40 : ui32,
        config.dim_h = 45 : ui32,
        config.dim_w = 80 : ui32,
        config.dtype = "bfloat16",
        config.end = 40 : ui32,
        config.start = 20 : ui32,
        config.step = 1 : ui32
      }} {
      %461 = tosa.slice %arg5 {
        PartOfLayerName = "Split_405",
        PartOfOutputName = "Split_405",
        size = array<i64: 1, 20, 45, 80>,
        start = array<i64: 0, 20, 0, 0>} : (tensor<1x40x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc272)
      xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc272)
    } -> tensor<1x20x45x80xbf16> loc(#loc272)
    %423 = xten_nn.subgraph (%arg5 = %16: tensor<1x20x45x80xbf16>, %arg6 = %422: tensor<1x20x45x80xbf16>)  attributes {
      LayerName = "Sub_411",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "890",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "859",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        }
      ],
      OutputName = "Sub_411",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "865",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x20x45x80xbf16>, %arg8 = %arg6: tensor<1x20x45x80xbf16>)  attributes {
        LayerName = "Sub_411",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "890",
            Port = "data_io.ifm1",
            l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "859",
            Port = "data_io.ifm2",
            l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
          }
        ],
        OutputName = "Sub_411",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "865",
            Port = "data_io.ofm",
            l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
          }
        ],
        Specializes = "SubBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.sub %arg7, %arg8 {LayerName = "Sub_411", OutputName = "Sub_411"} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc3)
        xten_nn.output %462 : tensor<1x20x45x80xbf16> loc(#loc3)
      } -> tensor<1x20x45x80xbf16> loc(#loc3)
      xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc3)
    } -> tensor<1x20x45x80xbf16> loc(#loc3)
    %424 = xten_nn.subgraph (%arg5 = %423: tensor<1x20x45x80xbf16>, %arg6 = %arg2: tensor<1x20x45x80xbf16>)  attributes {
      LayerName = "Mul_412",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_412",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x20x45x80xbf16>, %arg8 = %arg6: tensor<1x20x45x80xbf16>)  attributes {
        LayerName = "Mul_412",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Port = "data_io.ifm1",
            l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Port = "data_io.ifm2",
            l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_412",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Port = "data_io.ofm",
            l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_412",
          OutputName = "Mul_412",
          shift = 0 : i8} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc273)
        xten_nn.output %462 : tensor<1x20x45x80xbf16> loc(#loc273)
      } -> tensor<1x20x45x80xbf16> loc(#loc273)
      xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc273)
    } -> tensor<1x20x45x80xbf16> loc(#loc273)
    %425 = xten_nn.subgraph (%arg5 = %421: tensor<1x20x45x80xbf16>, %arg6 = %arg2: tensor<1x20x45x80xbf16>)  attributes {
      LayerName = "Mul_406",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "858",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "857",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_406",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "860",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x20x45x80xbf16>, %arg8 = %arg6: tensor<1x20x45x80xbf16>)  attributes {
        LayerName = "Mul_406",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "858",
            Port = "data_io.ifm1",
            l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "857",
            Port = "data_io.ifm2",
            l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_406",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "860",
            Port = "data_io.ofm",
            l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_406",
          OutputName = "Mul_406",
          shift = 0 : i8} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc274)
        xten_nn.output %462 : tensor<1x20x45x80xbf16> loc(#loc274)
      } -> tensor<1x20x45x80xbf16> loc(#loc274)
      xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc274)
    } -> tensor<1x20x45x80xbf16> loc(#loc274)
    %426 = xten_nn.subgraph (%arg5 = %417: tensor<1x20x45x80xbf16>, %arg6 = %425: tensor<1x20x45x80xbf16>)  attributes {
      LayerName = "Concat_407",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "854",
          Port = "data_io.ifm1",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "860",
          Port = "data_io.ifm2",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        }
      ],
      OutputName = "Concat_407",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "861",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex>
        }
      ],
      Specializes = "ConcatC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.dtype = "bfloat16",
        config.in1_dim_c = 24 : ui32,
        config.in1_dim_h = 45 : ui32,
        config.in1_dim_w = 80 : ui32,
        config.in2_dim_c = 24 : ui32,
        config.in2_dim_h = 45 : ui32,
        config.in2_dim_w = 80 : ui32,
        config.num_eff_concat_input0_size = 20 : ui32,
        config.num_eff_concat_input0_start = 0 : ui32,
        config.num_eff_concat_input1_size = 20 : ui32,
        config.num_eff_concat_input1_start = 0 : ui32
      }} {
      %461 = tosa.concat %arg5, %arg6 {
        LayerName = "Concat_407",
        OutputName = "Concat_407",
        axis = 1 : i32} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x40x45x80xbf16> loc(#loc275)
      xten_nn.output %461 : tensor<1x40x45x80xbf16> loc(#loc275)
    } -> tensor<1x40x45x80xbf16> loc(#loc275)
    %427 = xten_nn.subgraph (%arg5 = %426: tensor<1x40x45x80xbf16>, %arg6 = %15: tensor<20x40x3x3xbf16>, %arg7 = %14: tensor<20xbf16>)  attributes {
      LayerName = "Conv_408",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "861",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex>
        },
        {
          Name = "860",
          UnknownDataFormat = true,
          l3_extend_end = dense<[4, 0, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[20, 40, 3, 3]> : vector<4xindex>
        },
        {
          Name = "decoder.decode2.gru.hh.0.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_408",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "862",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x40x45x80xbf16>, %arg9 = %arg6: tensor<20x40x3x3xbf16>, %arg10 = %arg7: tensor<20xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 1], [1, 1]],
        LayerName = "Conv_408",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "861",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex>
          },
          {
            Name = "860",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<[4, 0, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[20, 40, 3, 3]> : vector<4xindex>
          },
          {
            Name = "decoder.decode2.gru.hh.0.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_408",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "862",
            Port = "data_io.ofm",
            l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 3 : ui8,
          config.ksize.width = 3 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = tosa.transpose %arg9, %463 : (tensor<20x40x3x3xbf16>, tensor<4xi32>) -> tensor<20x3x3x40xbf16> loc(#loc276)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x40x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x40xbf16> loc(#loc276)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_408",
          PartOfOutputName = "Conv_408",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 1, 1, 1>,
          stride = array<i64: 1, 1>} : (tensor<1x45x80x40xbf16>, tensor<20x3x3x40xbf16>, tensor<20xbf16>) -> tensor<1x45x80x20xbf16> loc(#loc276)
        %467 = tosa.transpose %466, %462 : (tensor<1x45x80x20xbf16>, tensor<4xi32>) -> tensor<1x20x45x80xbf16> loc(#loc276)
        xten_nn.output %467 : tensor<1x20x45x80xbf16> loc(#loc276)
      } -> tensor<1x20x45x80xbf16> loc(#loc276)
      xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc276)
    } -> tensor<1x20x45x80xbf16> loc(#loc276)
    %428 = xten_nn.subgraph (%arg5 = %427: tensor<1x20x45x80xbf16>)  attributes {
      LayerName = "Tanh_409",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "862",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        }
      ],
      OutputName = "Tanh_409",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "863",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x20x45x80xbf16>)  attributes {
        LayerName = "Tanh_409",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "862",
            Port = "data_io.ifm",
            l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
          }
        ],
        OutputName = "Tanh_409",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "863",
            Port = "data_io.ofm",
            l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
          }
        ],
        Specializes = "TanhTemplatedBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.ENABLE_FP16_AS_BF16 = 0 : ui8,
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.tanh %arg6 {LayerName = "Tanh_409", OutputName = "Tanh_409"} : (tensor<1x20x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc277)
        xten_nn.output %462 : tensor<1x20x45x80xbf16> loc(#loc277)
      } -> tensor<1x20x45x80xbf16> loc(#loc277)
      xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc277)
    } -> tensor<1x20x45x80xbf16> loc(#loc277)
    %429 = xten_nn.subgraph (%arg5 = %422: tensor<1x20x45x80xbf16>, %arg6 = %428: tensor<1x20x45x80xbf16>)  attributes {
      LayerName = "Mul_413",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "859",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "863",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_413",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "867",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x20x45x80xbf16>, %arg8 = %arg6: tensor<1x20x45x80xbf16>)  attributes {
        LayerName = "Mul_413",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "859",
            Port = "data_io.ifm1",
            l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "863",
            Port = "data_io.ifm2",
            l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_413",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "867",
            Port = "data_io.ofm",
            l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_413",
          OutputName = "Mul_413",
          shift = 0 : i8} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc278)
        xten_nn.output %462 : tensor<1x20x45x80xbf16> loc(#loc278)
      } -> tensor<1x20x45x80xbf16> loc(#loc278)
      xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc278)
    } -> tensor<1x20x45x80xbf16> loc(#loc278)
    %430 = xten_nn.subgraph (%arg5 = %424: tensor<1x20x45x80xbf16>, %arg6 = %429: tensor<1x20x45x80xbf16>)  attributes {
      LayerName = "Add_414",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "866",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "865",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        }
      ],
      OutputName = "Add_414",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "868",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x20x45x80xbf16>, %arg8 = %arg6: tensor<1x20x45x80xbf16>)  attributes {
        LayerName = "Add_414",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "866",
            Port = "data_io.ifm1",
            l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "865",
            Port = "data_io.ifm2",
            l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
          }
        ],
        OutputName = "Add_414",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "868",
            Port = "data_io.ofm",
            l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
          }
        ],
        Specializes = "AddBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.act = 0 : ui8,
          config.act_type = "LINEAR",
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.add %arg7, %arg8 {LayerName = "Add_414", OutputName = "Add_414"} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc279)
        xten_nn.output %462 : tensor<1x20x45x80xbf16> loc(#loc279)
      } -> tensor<1x20x45x80xbf16> loc(#loc279)
      xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc279)
    } -> tensor<1x20x45x80xbf16> loc(#loc279)
    %431 = xten_nn.subgraph (%arg5 = %416: tensor<1x20x45x80xbf16>, %arg6 = %430: tensor<1x20x45x80xbf16>)  attributes {
      LayerName = "Concat_415",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "853",
          Port = "data_io.ifm1",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "852",
          Port = "data_io.ifm2",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex>
        }
      ],
      OutputName = "Concat_415",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "869",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex>
        }
      ],
      Specializes = "ConcatC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.dtype = "bfloat16",
        config.in1_dim_c = 24 : ui32,
        config.in1_dim_h = 45 : ui32,
        config.in1_dim_w = 80 : ui32,
        config.in2_dim_c = 24 : ui32,
        config.in2_dim_h = 45 : ui32,
        config.in2_dim_w = 80 : ui32,
        config.num_eff_concat_input0_size = 20 : ui32,
        config.num_eff_concat_input0_start = 0 : ui32,
        config.num_eff_concat_input1_size = 20 : ui32,
        config.num_eff_concat_input1_start = 0 : ui32
      }} {
      %461 = tosa.concat %arg5, %arg6 {
        LayerName = "Concat_415",
        OutputName = "Concat_415",
        axis = 1 : i32} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x40x45x80xbf16> loc(#loc280)
      xten_nn.output %461 : tensor<1x40x45x80xbf16> loc(#loc280)
    } -> tensor<1x40x45x80xbf16> loc(#loc280)
    %432 = xten_nn.subgraph (%arg5 = %431: tensor<1x40x45x80xbf16>)  attributes {
      LayerName = "Resize_417",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "869",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex>
        }
      ],
      OutputName = "Resize_417",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "874",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 90, 160]> : vector<4xindex>
        }
      ],
      Specializes = "ResizeAdf",
      With = {
        config.co_trans_mode = 1 : ui32,
        config.dim_0 = 1 : ui32,
        config.dim_1 = 40 : ui32,
        config.dim_2 = 45 : ui32,
        config.dim_3 = 80 : ui32,
        config.dtype = "bfloat16",
        config.mode = 1 : ui32,
        config.nearest_mode = 0 : ui32,
        config.output_H = 90 : ui32,
        config.output_W = 160 : ui32
      }} {
      %461 = xten_nn.resize %arg5 {
        LayerName = "Resize_417",
        OutputName = "Resize_417",
        coordinate_transformation_mode = 1 : i64,
        mode = 1 : i64,
        nearest_mode = 0 : i64,
        scales = array<f32: 1.000000e+00, 1.000000e+00, 2.000000e+00, 2.000000e+00>} : (tensor<1x40x45x80xbf16>) -> tensor<1x40x90x160xbf16> loc(#loc281)
      xten_nn.output %461 : tensor<1x40x90x160xbf16> loc(#loc281)
    } -> tensor<1x40x90x160xbf16> loc(#loc281)
    %433 = xten_nn.subgraph (%arg5 = %432: tensor<1x40x90x160xbf16>, %arg6 = %178: tensor<1x16x90x160xbf16>, %arg7 = %167: tensor<1x3x90x160xbf16>)  attributes {
      Axis = 1 : i32,
      LayerName = "Concat_418",
      Op = "Concat",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "874",
          Port = "data_io.ifm1",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 40, 90, 160]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "869",
          Port = "data_io.ifm2",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "417",
          Port = "data_io.ifm3",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "Concat_418",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "PseudoOp",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "875",
          Port = "data_io.ofm",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 59, 90, 160]> : vector<4xindex>
        }
      ],
      current_data_format = "NCHW",
      data_format = "HCWN"} {
      %461 = tosa.concat %arg5, %arg6, %arg7 {
        LayerName = "Concat_418",
        OutputName = "Concat_418",
        axis = 1 : i32} : (tensor<1x40x90x160xbf16>, tensor<1x16x90x160xbf16>, tensor<1x3x90x160xbf16>) -> tensor<1x59x90x160xbf16> loc(#loc282)
      xten_nn.output %461 : tensor<1x59x90x160xbf16> loc(#loc282)
    } -> tensor<1x59x90x160xbf16> loc(#loc282)
    %434 = xten_nn.subgraph (%arg5 = %433: tensor<1x59x90x160xbf16>, %arg6 = %13: tensor<32x59x3x3xbf16>, %arg7 = %12: tensor<32xbf16>)  attributes {
      LayerName = "Conv_419",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "875",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 59, 90, 160]> : vector<4xindex>
        },
        {
          Name = "874",
          UnknownDataFormat = true,
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[32, 59, 3, 3]> : vector<4xindex>
        },
        {
          Name = "875",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_420",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "878",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x59x90x160xbf16>, %arg9 = %arg6: tensor<32x59x3x3xbf16>, %arg10 = %arg7: tensor<32xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 1], [1, 1]],
        LayerName = "Conv_419",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "875",
            Port = "data_io.ifm",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 59, 90, 160]> : vector<4xindex>
          },
          {
            Name = "874",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[32, 59, 3, 3]> : vector<4xindex>
          },
          {
            Name = "875",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_420",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "878",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true,
          NonNegativeOut = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 1 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 3 : ui8,
          config.ksize.width = 3 : ui8,
          config.lrelu_alpha = 0.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 0.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = tosa.transpose %arg9, %463 : (tensor<32x59x3x3xbf16>, tensor<4xi32>) -> tensor<32x3x3x59xbf16> loc(#loc362)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x59x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x59xbf16> loc(#loc362)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_419",
          PartOfOutputName = "Conv_419",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 1, 1, 1>,
          stride = array<i64: 1, 1>} : (tensor<1x90x160x59xbf16>, tensor<32x3x3x59xbf16>, tensor<32xbf16>) -> tensor<1x90x160x32xbf16> loc(#loc283)
        %467 = tosa.clamp %466 {
          LayerName = "Relu_420",
          OutputName = "Relu_420",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x90x160x32xbf16>) -> tensor<1x90x160x32xbf16> loc(#loc284)
        %468 = tosa.transpose %467, %462 : (tensor<1x90x160x32xbf16>, tensor<4xi32>) -> tensor<1x32x90x160xbf16> loc(#loc362)
        xten_nn.output %468 : tensor<1x32x90x160xbf16> loc(#loc284)
      } -> tensor<1x32x90x160xbf16> loc(#loc362)
      xten_nn.output %461 : tensor<1x32x90x160xbf16> loc(#loc362)
    } -> tensor<1x32x90x160xbf16> loc(#loc362)
    %435 = xten_nn.subgraph (%arg5 = %434: tensor<1x32x90x160xbf16>)  attributes {
      LayerName = "Split_421_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "878",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "Split_421_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "879",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      Specializes = "SliceHCWC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.axis_letter = "C",
        config.dim_c = 32 : ui32,
        config.dim_h = 90 : ui32,
        config.dim_w = 160 : ui32,
        config.dtype = "bfloat16",
        config.end = 16 : ui32,
        config.start = 0 : ui32,
        config.step = 1 : ui32
      }} {
      %461 = tosa.slice %arg5 {
        PartOfLayerName = "Split_421",
        PartOfOutputName = "Split_421",
        size = array<i64: 1, 16, 90, 160>,
        start = array<i64: 0, 0, 0, 0>} : (tensor<1x32x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc285)
      xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc285)
    } -> tensor<1x16x90x160xbf16> loc(#loc285)
    %436 = xten_nn.subgraph (%arg5 = %434: tensor<1x32x90x160xbf16>)  attributes {
      LayerName = "Split_421_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "878",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "Split_421_Duplicated#1",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "879",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      Specializes = "SliceHCWC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.axis_letter = "C",
        config.dim_c = 32 : ui32,
        config.dim_h = 90 : ui32,
        config.dim_w = 160 : ui32,
        config.dtype = "bfloat16",
        config.end = 32 : ui32,
        config.start = 16 : ui32,
        config.step = 1 : ui32
      }} {
      %461 = tosa.slice %arg5 {
        PartOfLayerName = "Split_421",
        PartOfOutputName = "Split_421",
        size = array<i64: 1, 16, 90, 160>,
        start = array<i64: 0, 16, 0, 0>} : (tensor<1x32x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc285)
      xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc285)
    } -> tensor<1x16x90x160xbf16> loc(#loc285)
    %437 = xten_nn.subgraph (%arg5 = %436: tensor<1x16x90x160xbf16>, %arg6 = %arg1: tensor<1x16x90x160xbf16>)  attributes {
      Axis = 1 : i32,
      LayerName = "Concat_422",
      Op = "Concat",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Port = "data_io.ifm1",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Port = "data_io.ifm2",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "Concat_422",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "PseudoOp",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "881",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex>
        }
      ],
      current_data_format = "NCHW",
      data_format = "HCWN"} {
      %461 = tosa.concat %arg5, %arg6 {
        LayerName = "Concat_422",
        OutputName = "Concat_422",
        axis = 1 : i32} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x32x90x160xbf16> loc(#loc286)
      xten_nn.output %461 : tensor<1x32x90x160xbf16> loc(#loc286)
    } -> tensor<1x32x90x160xbf16> loc(#loc286)
    %438 = xten_nn.subgraph (%arg5 = %437: tensor<1x32x90x160xbf16>, %arg6 = %11: tensor<32x32x3x3xbf16>, %arg7 = %10: tensor<32xbf16>)  attributes {
      LayerName = "Conv_423",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "881",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex>
        },
        {
          Name = "394",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[32, 32, 3, 3]> : vector<4xindex>
        },
        {
          Name = "decoder.decode1.gru.ih.0.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_423",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "882",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x32x90x160xbf16>, %arg9 = %arg6: tensor<32x32x3x3xbf16>, %arg10 = %arg7: tensor<32xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 1], [1, 1]],
        LayerName = "Conv_423",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "881",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex>
          },
          {
            Name = "394",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[32, 32, 3, 3]> : vector<4xindex>
          },
          {
            Name = "decoder.decode1.gru.ih.0.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_423",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "882",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 3 : ui8,
          config.ksize.width = 3 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = tosa.transpose %arg9, %463 : (tensor<32x32x3x3xbf16>, tensor<4xi32>) -> tensor<32x3x3x32xbf16> loc(#loc287)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x32x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x32xbf16> loc(#loc287)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_423",
          PartOfOutputName = "Conv_423",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 1, 1, 1>,
          stride = array<i64: 1, 1>} : (tensor<1x90x160x32xbf16>, tensor<32x3x3x32xbf16>, tensor<32xbf16>) -> tensor<1x90x160x32xbf16> loc(#loc287)
        %467 = tosa.transpose %466, %462 : (tensor<1x90x160x32xbf16>, tensor<4xi32>) -> tensor<1x32x90x160xbf16> loc(#loc287)
        xten_nn.output %467 : tensor<1x32x90x160xbf16> loc(#loc287)
      } -> tensor<1x32x90x160xbf16> loc(#loc287)
      xten_nn.output %461 : tensor<1x32x90x160xbf16> loc(#loc287)
    } -> tensor<1x32x90x160xbf16> loc(#loc287)
    %439 = xten_nn.subgraph (%arg5 = %438: tensor<1x32x90x160xbf16>)  attributes {
      LayerName = "Sigmoid_424",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "882",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "Sigmoid_424",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "883",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x32x90x160xbf16>)  attributes {
        LayerName = "Sigmoid_424",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "882",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex>
          }
        ],
        OutputName = "Sigmoid_424",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "883",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex>
          }
        ],
        Specializes = "SigmoidTemplatedBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.ENABLE_FP16_AS_BF16 = 0 : ui8,
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.sigmoid %arg6 {LayerName = "Sigmoid_424", OutputName = "Sigmoid_424"} : (tensor<1x32x90x160xbf16>) -> tensor<1x32x90x160xbf16> loc(#loc288)
        xten_nn.output %462 : tensor<1x32x90x160xbf16> loc(#loc288)
      } -> tensor<1x32x90x160xbf16> loc(#loc288)
      xten_nn.output %461 : tensor<1x32x90x160xbf16> loc(#loc288)
    } -> tensor<1x32x90x160xbf16> loc(#loc288)
    %440 = xten_nn.subgraph (%arg5 = %439: tensor<1x32x90x160xbf16>)  attributes {
      LayerName = "Split_425_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "883",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "Split_425_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "884",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      Specializes = "SliceHCWC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.axis_letter = "C",
        config.dim_c = 32 : ui32,
        config.dim_h = 90 : ui32,
        config.dim_w = 160 : ui32,
        config.dtype = "bfloat16",
        config.end = 16 : ui32,
        config.start = 0 : ui32,
        config.step = 1 : ui32
      }} {
      %461 = tosa.slice %arg5 {
        PartOfLayerName = "Split_425",
        PartOfOutputName = "Split_425",
        size = array<i64: 1, 16, 90, 160>,
        start = array<i64: 0, 0, 0, 0>} : (tensor<1x32x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc289)
      xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc289)
    } -> tensor<1x16x90x160xbf16> loc(#loc289)
    %441 = xten_nn.subgraph (%arg5 = %439: tensor<1x32x90x160xbf16>)  attributes {
      LayerName = "Split_425_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "883",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "Split_425_Duplicated#1",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "884",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      Specializes = "SliceHCWC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.axis_letter = "C",
        config.dim_c = 32 : ui32,
        config.dim_h = 90 : ui32,
        config.dim_w = 160 : ui32,
        config.dtype = "bfloat16",
        config.end = 32 : ui32,
        config.start = 16 : ui32,
        config.step = 1 : ui32
      }} {
      %461 = tosa.slice %arg5 {
        PartOfLayerName = "Split_425",
        PartOfOutputName = "Split_425",
        size = array<i64: 1, 16, 90, 160>,
        start = array<i64: 0, 16, 0, 0>} : (tensor<1x32x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc289)
      xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc289)
    } -> tensor<1x16x90x160xbf16> loc(#loc289)
    %442 = xten_nn.subgraph (%arg5 = %9: tensor<1x16x90x160xbf16>, %arg6 = %441: tensor<1x16x90x160xbf16>)  attributes {
      LayerName = "Sub_431",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "890",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "885",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "Sub_431",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "891",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x16x90x160xbf16>, %arg8 = %arg6: tensor<1x16x90x160xbf16>)  attributes {
        LayerName = "Sub_431",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "890",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "885",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        OutputName = "Sub_431",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "891",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        Specializes = "SubBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.sub %arg7, %arg8 {LayerName = "Sub_431", OutputName = "Sub_431"} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc2)
        xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc2)
      } -> tensor<1x16x90x160xbf16> loc(#loc2)
      xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc2)
    } -> tensor<1x16x90x160xbf16> loc(#loc2)
    %443 = xten_nn.subgraph (%arg5 = %442: tensor<1x16x90x160xbf16>, %arg6 = %arg1: tensor<1x16x90x160xbf16>)  attributes {
      LayerName = "Mul_432",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_432",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x16x90x160xbf16>, %arg8 = %arg6: tensor<1x16x90x160xbf16>)  attributes {
        LayerName = "Mul_432",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_432",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_432",
          OutputName = "Mul_432",
          shift = 0 : i8} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc290)
        xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc290)
      } -> tensor<1x16x90x160xbf16> loc(#loc290)
      xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc290)
    } -> tensor<1x16x90x160xbf16> loc(#loc290)
    %444 = xten_nn.subgraph (%arg5 = %440: tensor<1x16x90x160xbf16>, %arg6 = %arg1: tensor<1x16x90x160xbf16>)  attributes {
      LayerName = "Mul_426",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "884",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "883",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_426",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "886",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x16x90x160xbf16>, %arg8 = %arg6: tensor<1x16x90x160xbf16>)  attributes {
        LayerName = "Mul_426",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "884",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "883",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_426",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "886",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_426",
          OutputName = "Mul_426",
          shift = 0 : i8} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc291)
        xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc291)
      } -> tensor<1x16x90x160xbf16> loc(#loc291)
      xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc291)
    } -> tensor<1x16x90x160xbf16> loc(#loc291)
    %445 = xten_nn.subgraph (%arg5 = %436: tensor<1x16x90x160xbf16>, %arg6 = %444: tensor<1x16x90x160xbf16>)  attributes {
      Axis = 1 : i32,
      LayerName = "Concat_427",
      Op = "Concat",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "880",
          Port = "data_io.ifm1",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "886",
          Port = "data_io.ifm2",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "Concat_427",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "PseudoOp",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "887",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex>
        }
      ],
      current_data_format = "NCHW",
      data_format = "HCWN"} {
      %461 = tosa.concat %arg5, %arg6 {
        LayerName = "Concat_427",
        OutputName = "Concat_427",
        axis = 1 : i32} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x32x90x160xbf16> loc(#loc292)
      xten_nn.output %461 : tensor<1x32x90x160xbf16> loc(#loc292)
    } -> tensor<1x32x90x160xbf16> loc(#loc292)
    %446 = xten_nn.subgraph (%arg5 = %445: tensor<1x32x90x160xbf16>, %arg6 = %8: tensor<16x32x3x3xbf16>, %arg7 = %7: tensor<16xbf16>)  attributes {
      LayerName = "Conv_428",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "887",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex>
        },
        {
          Name = "886",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[16, 32, 3, 3]> : vector<4xindex>
        },
        {
          Name = "decoder.decode1.gru.hh.0.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_428",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "888",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x32x90x160xbf16>, %arg9 = %arg6: tensor<16x32x3x3xbf16>, %arg10 = %arg7: tensor<16xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 1], [1, 1]],
        LayerName = "Conv_428",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "887",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex>
          },
          {
            Name = "886",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[16, 32, 3, 3]> : vector<4xindex>
          },
          {
            Name = "decoder.decode1.gru.hh.0.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_428",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "888",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 3 : ui8,
          config.ksize.width = 3 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = tosa.transpose %arg9, %463 : (tensor<16x32x3x3xbf16>, tensor<4xi32>) -> tensor<16x3x3x32xbf16> loc(#loc293)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x32x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x32xbf16> loc(#loc293)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_428",
          PartOfOutputName = "Conv_428",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 1, 1, 1>,
          stride = array<i64: 1, 1>} : (tensor<1x90x160x32xbf16>, tensor<16x3x3x32xbf16>, tensor<16xbf16>) -> tensor<1x90x160x16xbf16> loc(#loc293)
        %467 = tosa.transpose %466, %462 : (tensor<1x90x160x16xbf16>, tensor<4xi32>) -> tensor<1x16x90x160xbf16> loc(#loc293)
        xten_nn.output %467 : tensor<1x16x90x160xbf16> loc(#loc293)
      } -> tensor<1x16x90x160xbf16> loc(#loc293)
      xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc293)
    } -> tensor<1x16x90x160xbf16> loc(#loc293)
    %447 = xten_nn.subgraph (%arg5 = %446: tensor<1x16x90x160xbf16>)  attributes {
      LayerName = "Tanh_429",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "888",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "Tanh_429",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "889",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "single", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x16x90x160xbf16>)  attributes {
        LayerName = "Tanh_429",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "888",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        OutputName = "Tanh_429",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "889",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        Specializes = "TanhTemplatedBf16",
        Traits = {
          Elementwise = true,
          Unary = true
        },
        With = {
          config.ENABLE_FP16_AS_BF16 = 0 : ui8,
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.tanh %arg6 {LayerName = "Tanh_429", OutputName = "Tanh_429"} : (tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc294)
        xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc294)
      } -> tensor<1x16x90x160xbf16> loc(#loc294)
      xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc294)
    } -> tensor<1x16x90x160xbf16> loc(#loc294)
    %448 = xten_nn.subgraph (%arg5 = %441: tensor<1x16x90x160xbf16>, %arg6 = %447: tensor<1x16x90x160xbf16>)  attributes {
      LayerName = "Mul_433",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "885",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "889",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "Mul_433",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "893",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x16x90x160xbf16>, %arg8 = %arg6: tensor<1x16x90x160xbf16>)  attributes {
        LayerName = "Mul_433",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "885",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "889",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        OutputName = "Mul_433",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "893",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        Specializes = "MulBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.mul %arg7, %arg8 {
          LayerName = "Mul_433",
          OutputName = "Mul_433",
          shift = 0 : i8} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc295)
        xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc295)
      } -> tensor<1x16x90x160xbf16> loc(#loc295)
      xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc295)
    } -> tensor<1x16x90x160xbf16> loc(#loc295)
    %449 = xten_nn.subgraph (%arg5 = %443: tensor<1x16x90x160xbf16>, %arg6 = %448: tensor<1x16x90x160xbf16>)  attributes {
      LayerName = "Add_434",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "892",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "891",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "Add_434",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "894",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x16x90x160xbf16>, %arg8 = %arg6: tensor<1x16x90x160xbf16>)  attributes {
        LayerName = "Add_434",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "892",
            Port = "data_io.ifm1",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "891",
            Port = "data_io.ifm2",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        OutputName = "Add_434",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "894",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
          }
        ],
        Specializes = "AddBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.act = 0 : ui8,
          config.act_type = "LINEAR",
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.add %arg7, %arg8 {LayerName = "Add_434", OutputName = "Add_434"} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc296)
        xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc296)
      } -> tensor<1x16x90x160xbf16> loc(#loc296)
      xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc296)
    } -> tensor<1x16x90x160xbf16> loc(#loc296)
    %450 = xten_nn.subgraph (%arg5 = %435: tensor<1x16x90x160xbf16>, %arg6 = %449: tensor<1x16x90x160xbf16>)  attributes {
      Axis = 1 : i32,
      LayerName = "Concat_435",
      Op = "Concat",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "879",
          Port = "data_io.ifm1",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "878",
          Port = "data_io.ifm2",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "Concat_435",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "PseudoOp",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "895",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex>
        }
      ],
      current_data_format = "NCHW",
      data_format = "HCWN"} {
      %461 = tosa.concat %arg5, %arg6 {
        LayerName = "Concat_435",
        OutputName = "Concat_435",
        axis = 1 : i32} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x32x90x160xbf16> loc(#loc297)
      xten_nn.output %461 : tensor<1x32x90x160xbf16> loc(#loc297)
    } -> tensor<1x32x90x160xbf16> loc(#loc297)
    %451 = xten_nn.subgraph (%arg5 = %450: tensor<1x32x90x160xbf16>)  attributes {
      LayerName = "Resize_437",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "895",
          Port = "data_io.ifm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex>
        }
      ],
      OutputName = "Resize_437",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "900",
          Port = "data_io.ofm",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 32, 180, 320]> : vector<4xindex>
        }
      ],
      Specializes = "ResizeAdf",
      With = {
        config.co_trans_mode = 1 : ui32,
        config.dim_0 = 1 : ui32,
        config.dim_1 = 32 : ui32,
        config.dim_2 = 90 : ui32,
        config.dim_3 = 160 : ui32,
        config.dtype = "bfloat16",
        config.mode = 1 : ui32,
        config.nearest_mode = 0 : ui32,
        config.output_H = 180 : ui32,
        config.output_W = 320 : ui32
      }} {
      %461 = xten_nn.resize %arg5 {
        LayerName = "Resize_437",
        OutputName = "Resize_437",
        coordinate_transformation_mode = 1 : i64,
        mode = 1 : i64,
        nearest_mode = 0 : i64,
        scales = array<f32: 1.000000e+00, 1.000000e+00, 2.000000e+00, 2.000000e+00>} : (tensor<1x32x90x160xbf16>) -> tensor<1x32x180x320xbf16> loc(#loc298)
      xten_nn.output %461 : tensor<1x32x180x320xbf16> loc(#loc298)
    } -> tensor<1x32x180x320xbf16> loc(#loc298)
    %452 = xten_nn.subgraph (%arg5 = %451: tensor<1x32x180x320xbf16>, %arg6 = %166: tensor<1x3x180x320xbf16>)  attributes {
      Axis = 1 : i32,
      LayerName = "Concat_438",
      Op = "Concat",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "900",
          Port = "data_io.ifm1",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 32, 180, 320]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "895",
          Port = "data_io.ifm2",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
        }
      ],
      OutputName = "Concat_438",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "PseudoOp",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "901",
          Port = "data_io.ofm",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 35, 180, 320]> : vector<4xindex>
        }
      ],
      current_data_format = "NCHW",
      data_format = "HCWN"} {
      %461 = tosa.concat %arg5, %arg6 {
        LayerName = "Concat_438",
        OutputName = "Concat_438",
        axis = 1 : i32} : (tensor<1x32x180x320xbf16>, tensor<1x3x180x320xbf16>) -> tensor<1x35x180x320xbf16> loc(#loc299)
      xten_nn.output %461 : tensor<1x35x180x320xbf16> loc(#loc299)
    } -> tensor<1x35x180x320xbf16> loc(#loc299)
    %453 = xten_nn.subgraph (%arg5 = %452: tensor<1x35x180x320xbf16>, %arg6 = %6: tensor<16x35x3x3xbf16>, %arg7 = %5: tensor<16xbf16>)  attributes {
      LayerName = "Conv_439",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "901",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 35, 180, 320]> : vector<4xindex>
        },
        {
          Name = "900",
          UnknownDataFormat = true,
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[16, 35, 3, 3]> : vector<4xindex>
        },
        {
          Name = "901",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_440",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "904",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x35x180x320xbf16>, %arg9 = %arg6: tensor<16x35x3x3xbf16>, %arg10 = %arg7: tensor<16xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 1], [1, 1]],
        LayerName = "Conv_439",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "901",
            Port = "data_io.ifm",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 35, 180, 320]> : vector<4xindex>
          },
          {
            Name = "900",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[16, 35, 3, 3]> : vector<4xindex>
          },
          {
            Name = "901",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_440",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "904",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true,
          NonNegativeOut = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 1 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 3 : ui8,
          config.ksize.width = 3 : ui8,
          config.lrelu_alpha = 0.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 0.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = tosa.transpose %arg9, %463 : (tensor<16x35x3x3xbf16>, tensor<4xi32>) -> tensor<16x3x3x35xbf16> loc(#loc363)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x35x180x320xbf16>, tensor<4xi32>) -> tensor<1x180x320x35xbf16> loc(#loc363)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_439",
          PartOfOutputName = "Conv_439",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 1, 1, 1>,
          stride = array<i64: 1, 1>} : (tensor<1x180x320x35xbf16>, tensor<16x3x3x35xbf16>, tensor<16xbf16>) -> tensor<1x180x320x16xbf16> loc(#loc300)
        %467 = tosa.clamp %466 {
          LayerName = "Relu_440",
          OutputName = "Relu_440",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x180x320x16xbf16>) -> tensor<1x180x320x16xbf16> loc(#loc301)
        %468 = tosa.transpose %467, %462 : (tensor<1x180x320x16xbf16>, tensor<4xi32>) -> tensor<1x16x180x320xbf16> loc(#loc363)
        xten_nn.output %468 : tensor<1x16x180x320xbf16> loc(#loc301)
      } -> tensor<1x16x180x320xbf16> loc(#loc363)
      xten_nn.output %461 : tensor<1x16x180x320xbf16> loc(#loc363)
    } -> tensor<1x16x180x320xbf16> loc(#loc363)
    %454 = xten_nn.subgraph (%arg5 = %453: tensor<1x16x180x320xbf16>, %arg6 = %4: tensor<16x16x3x3xbf16>, %arg7 = %3: tensor<16xbf16>)  attributes {
      LayerName = "Conv_441",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "904",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex>
        },
        {
          Name = "1078",
          UnknownDataFormat = true,
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[16, 16, 3, 3]> : vector<4xindex>
        },
        {
          Name = "1082",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Relu_442",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "907",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x16x180x320xbf16>, %arg9 = %arg6: tensor<16x16x3x3xbf16>, %arg10 = %arg7: tensor<16xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[1, 1], [1, 1]],
        LayerName = "Conv_441",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "904",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex>
          },
          {
            Name = "1078",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[16, 16, 3, 3]> : vector<4xindex>
          },
          {
            Name = "1082",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Relu_442",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "907",
            Port = "data_io.ofm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true,
          NonNegativeOut = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 1 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 3 : ui8,
          config.ksize.width = 3 : ui8,
          config.lrelu_alpha = 0.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 0.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %464 = tosa.transpose %arg9, %463 : (tensor<16x16x3x3xbf16>, tensor<4xi32>) -> tensor<16x3x3x16xbf16> loc(#loc364)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x16x180x320xbf16>, tensor<4xi32>) -> tensor<1x180x320x16xbf16> loc(#loc364)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_441",
          PartOfOutputName = "Conv_441",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 1, 1, 1, 1>,
          stride = array<i64: 1, 1>} : (tensor<1x180x320x16xbf16>, tensor<16x3x3x16xbf16>, tensor<16xbf16>) -> tensor<1x180x320x16xbf16> loc(#loc302)
        %467 = tosa.clamp %466 {
          LayerName = "Relu_442",
          OutputName = "Relu_442",
          max_fp = 3.40282347E+38 : f32,
          max_int = 2147483647 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x180x320x16xbf16>) -> tensor<1x180x320x16xbf16> loc(#loc303)
        %468 = tosa.transpose %467, %462 : (tensor<1x180x320x16xbf16>, tensor<4xi32>) -> tensor<1x16x180x320xbf16> loc(#loc364)
        xten_nn.output %468 : tensor<1x16x180x320xbf16> loc(#loc303)
      } -> tensor<1x16x180x320xbf16> loc(#loc364)
      xten_nn.output %461 : tensor<1x16x180x320xbf16> loc(#loc364)
    } -> tensor<1x16x180x320xbf16> loc(#loc364)
    %455 = xten_nn.subgraph (%arg5 = %454: tensor<1x16x180x320xbf16>, %arg6 = %2: tensor<4x16x1x1xbf16>, %arg7 = %1: tensor<4xbf16>)  attributes {
      LayerName = "Conv_443",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "907",
          l3_extend_end = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex>
        },
        {
          Name = "1081",
          UnknownDataFormat = true,
          l3_extend_end = dense<[4, 0, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[4, 16, 1, 1]> : vector<4xindex>
        },
        {
          Name = "project_mat.conv.weight",
          UnknownDataFormat = true
        }
      ],
      OutputName = "Conv_443",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "908",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 4, 180, 320]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x16x180x320xbf16>, %arg9 = %arg6: tensor<4x16x1x1xbf16>, %arg10 = %arg7: tensor<4xbf16>)  attributes {
        Dilations = array<i64: 1, 1>,
        HWPadding = [[0, 0], [0, 0]],
        LayerName = "Conv_443",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "907",
            Port = "data_io.ifm",
            l3_extend_end = dense<0> : vector<4xindex>,
            l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex>
          },
          {
            Name = "1081",
            Port = "data_io.wts",
            SubPort = "wts_data",
            UnknownDataFormat = true,
            l3_extend_end = dense<[4, 0, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[4, 16, 1, 1]> : vector<4xindex>
          },
          {
            Name = "project_mat.conv.weight",
            Port = "data_io.wts",
            SubPort = "bias",
            UnknownDataFormat = true
          }
        ],
        OutputName = "Conv_443",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "908",
            Port = "data_io.ofm",
            l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 4, 180, 320]> : vector<4xindex>
          }
        ],
        Specializes = "Conv2DBf16",
        Traits = {
          AllowDMAOptimization = true
        },
        With = {
          config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8,
          config.act = 0 : ui8,
          config.act_type = "RELU",
          config.aie_arch = "aie2p",
          config.batch_size = 1 : ui8,
          config.compiler = "chess",
          config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8],
          config.dtype_ifm = "bfloat16",
          config.dtype_ofm = "bfloat16",
          config.dtype_wts = "bfloat16",
          config.ksize.height = 1 : ui8,
          config.ksize.width = 1 : ui8,
          config.lrelu_alpha = 1.000000e+00 : bf16,
          config.lrelu_alpha_kernel = 1.000000e+00 : bf16,
          config.stride_h = 1 : ui8,
          config.stride_w = 1 : ui8
        }} {
        %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc)
        %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc304)
        %464 = tosa.reshape %arg9 {new_shape = array<i64: 4, 1, 1, 16>} : (tensor<4x16x1x1xbf16>) -> tensor<4x1x1x16xbf16> loc(#loc304)
        %465 = tosa.transpose %arg8, %463 : (tensor<1x16x180x320xbf16>, tensor<4xi32>) -> tensor<1x180x320x16xbf16> loc(#loc304)
        %466 = tosa.conv2d %465, %464, %arg10 {
          PartOfLayerName = "Conv_443",
          PartOfOutputName = "Conv_443",
          dilation = array<i64: 1, 1>,
          pad = array<i64: 0, 0, 0, 0>,
          stride = array<i64: 1, 1>} : (tensor<1x180x320x16xbf16>, tensor<4x1x1x16xbf16>, tensor<4xbf16>) -> tensor<1x180x320x4xbf16> loc(#loc304)
        %467 = tosa.transpose %466, %462 : (tensor<1x180x320x4xbf16>, tensor<4xi32>) -> tensor<1x4x180x320xbf16> loc(#loc304)
        xten_nn.output %467 : tensor<1x4x180x320xbf16> loc(#loc304)
      } -> tensor<1x4x180x320xbf16> loc(#loc304)
      xten_nn.output %461 : tensor<1x4x180x320xbf16> loc(#loc304)
    } -> tensor<1x4x180x320xbf16> loc(#loc304)
    %456 = xten_nn.subgraph (%arg5 = %455: tensor<1x4x180x320xbf16>)  attributes {
      LayerName = "Split_444_Duplicated#0",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "908",
          Port = "data_io.ifm",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 4, 180, 320]> : vector<4xindex>
        }
      ],
      OutputName = "Split_444_Duplicated#0",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "909",
          Port = "data_io.ofm",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
        }
      ],
      Specializes = "SliceHCWC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.axis_letter = "C",
        config.dim_c = 8 : ui32,
        config.dim_h = 180 : ui32,
        config.dim_w = 320 : ui32,
        config.dtype = "bfloat16",
        config.end = 3 : ui32,
        config.start = 0 : ui32,
        config.step = 1 : ui32
      }} {
      %461 = tosa.slice %arg5 {
        PartOfLayerName = "Split_444",
        PartOfOutputName = "Split_444",
        size = array<i64: 1, 3, 180, 320>,
        start = array<i64: 0, 0, 0, 0>} : (tensor<1x4x180x320xbf16>) -> tensor<1x3x180x320xbf16> loc(#loc305)
      xten_nn.output %461 : tensor<1x3x180x320xbf16> loc(#loc305)
    } -> tensor<1x3x180x320xbf16> loc(#loc305)
    %457 = xten_nn.subgraph (%arg5 = %456: tensor<1x3x180x320xbf16>, %arg6 = %166: tensor<1x3x180x320xbf16>)  attributes {
      LayerName = "Add_445",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "909",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
        },
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "908",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
        }
      ],
      OutputName = "Add_445_Duplicated#1",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "911",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x3x180x320xbf16>, %arg8 = %arg6: tensor<1x3x180x320xbf16>)  attributes {
        LayerName = "Add_445",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "909",
            Port = "data_io.ifm1",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
          },
          {
            CurrentDataFormat = "NCHW",
            External = false,
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "908",
            Port = "data_io.ifm2",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
          }
        ],
        OutputName = "Add_445_Duplicated#1",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "911",
            Port = "data_io.ofm",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
          }
        ],
        Specializes = "AddBf16",
        Traits = {
          Binary = true,
          Elementwise = true
        },
        With = {
          config.act = 0 : ui8,
          config.act_type = "LINEAR",
          config.aie_arch = "aie2p",
          config.compiler = "chess",
          config.dtype = "bfloat16",
          config.num_kernel_iters = 0 : ui16
        }} {
        %462 = tosa.add %arg7, %arg8 {LayerName = "Add_445", OutputName = "Add_445"} : (tensor<1x3x180x320xbf16>, tensor<1x3x180x320xbf16>) -> tensor<1x3x180x320xbf16> loc(#loc11)
        xten_nn.output %462 : tensor<1x3x180x320xbf16> loc(#loc11)
      } -> tensor<1x3x180x320xbf16> loc(#loc11)
      xten_nn.output %461 : tensor<1x3x180x320xbf16> loc(#loc11)
    } -> tensor<1x3x180x320xbf16> loc(#loc11)
    %458 = xten_nn.subgraph (%arg5 = %457: tensor<1x3x180x320xbf16>)  attributes {
      LayerName = "Clip_446",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "911",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_446",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "916",
          l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x3x180x320xbf16>)  attributes {
        LayerName = "Clip_446",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "911",
            Port = "data_io.ifm",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_446",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "916",
            Port = "data_io.ofm",
            l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 1.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_446",
          OutputName = "Clip_446",
          max_fp = 1.000000e+00 : f32,
          max_int = 1 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x3x180x320xbf16>) -> tensor<1x3x180x320xbf16> loc(#loc306)
        xten_nn.output %462 : tensor<1x3x180x320xbf16> loc(#loc306)
      } -> tensor<1x3x180x320xbf16> loc(#loc306)
      xten_nn.output %461 : tensor<1x3x180x320xbf16> loc(#loc306)
    } -> tensor<1x3x180x320xbf16> loc(#loc306)
    %459 = xten_nn.subgraph (%arg5 = %455: tensor<1x4x180x320xbf16>)  attributes {
      LayerName = "Split_444_Duplicated#1",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          External = false,
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "908",
          Port = "data_io.ifm",
          l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 4, 180, 320]> : vector<4xindex>
        }
      ],
      OutputName = "Split_444_Duplicated#1",
      Overlay = "1x1_1x1_unspecifiedConnectivity",
      Reason = "TemplatedGraph",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "909",
          Port = "data_io.ofm",
          l3_extend_end = dense<[0, 7, 0, 0]> : vector<4xindex>,
          l3_extend_start = dense<0> : vector<4xindex>,
          l3_tile_count = dense<[1, 1, 180, 320]> : vector<4xindex>
        }
      ],
      Specializes = "SliceHCWC8Adf",
      With = {
        config.aie_arch = "aie2p",
        config.axis_letter = "C",
        config.dim_c = 8 : ui32,
        config.dim_h = 180 : ui32,
        config.dim_w = 320 : ui32,
        config.dtype = "bfloat16",
        config.end = 4 : ui32,
        config.start = 3 : ui32,
        config.step = 1 : ui32
      }} {
      %461 = tosa.slice %arg5 {
        PartOfLayerName = "Split_444",
        PartOfOutputName = "Split_444",
        size = array<i64: 1, 1, 180, 320>,
        start = array<i64: 0, 3, 0, 0>} : (tensor<1x4x180x320xbf16>) -> tensor<1x1x180x320xbf16> loc(#loc305)
      xten_nn.output %461 : tensor<1x1x180x320xbf16> loc(#loc305)
    } -> tensor<1x1x180x320xbf16> loc(#loc305)
    %460 = xten_nn.subgraph (%arg5 = %459: tensor<1x1x180x320xbf16>)  attributes {
      LayerName = "Clip_447",
      Operands = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "910",
          l3_extend_end = dense<[0, 7, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 1, 180, 320]> : vector<4xindex>
        }
      ],
      OutputName = "Clip_447",
      Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight",
      Reason = "InCoreChain",
      Results = [
        {
          CurrentDataFormat = "NCHW",
          L3DataFormat = "HCWN",
          L3Vectorization = "C:8",
          Name = "921",
          l3_extend_end = dense<[0, 7, 0, 0]> : vector<4xindex>,
          l3_tile_count = dense<[1, 1, 180, 320]> : vector<4xindex>
        }
      ],
      memory_configuration = {
        L1 = {layout = "strict"},
        L2 = {feature_maps_buffering = "double", layout = "flexible"}
      }} {
      %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x1x180x320xbf16>)  attributes {
        LayerName = "Clip_447",
        Operands = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "910",
            Port = "data_io.ifm",
            l3_extend_end = dense<[0, 7, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 1, 180, 320]> : vector<4xindex>
          }
        ],
        OutputName = "Clip_447",
        Reason = "MllibKernel",
        Results = [
          {
            CurrentDataFormat = "NCHW",
            L3DataFormat = "HCWN",
            L3Vectorization = "C:8",
            Name = "921",
            Port = "data_io.ofm",
            l3_extend_end = dense<[0, 7, 0, 0]> : vector<4xindex>,
            l3_tile_count = dense<[1, 1, 180, 320]> : vector<4xindex>
          }
        ],
        Specializes = "ClipBf16",
        Traits = {
          Elementwise = true,
          NonNegativeOut = true,
          Unary = true
        },
        With = {
          config.aie_arch = "aie2p",
          config.clamp_max = 1.000000e+00 : bf16,
          config.clamp_min = 0.000000e+00 : bf16,
          config.compiler = "chess",
          config.ifm_shift = 0 : si8,
          config.num_kernel_iters = 0 : ui16,
          config.ofm_shift = 0 : si8
        }} {
        %462 = tosa.clamp %arg6 {
          LayerName = "Clip_447",
          OutputName = "Clip_447",
          max_fp = 1.000000e+00 : f32,
          max_int = 1 : i64,
          min_fp = 0.000000e+00 : f32,
          min_int = 0 : i64} : (tensor<1x1x180x320xbf16>) -> tensor<1x1x180x320xbf16> loc(#loc307)
        xten_nn.output %462 : tensor<1x1x180x320xbf16> loc(#loc307)
      } -> tensor<1x1x180x320xbf16> loc(#loc307)
      xten_nn.output %461 : tensor<1x1x180x320xbf16> loc(#loc307)
    } -> tensor<1x1x180x320xbf16> loc(#loc307)
    return %449, %430, %410, %390, %458, %460 : tensor<1x16x90x160xbf16>, tensor<1x20x45x80xbf16>, tensor<1x40x23x40xbf16>, tensor<1x64x12x20xbf16>, tensor<1x3x180x320xbf16>, tensor<1x1x180x320xbf16> loc(#loc319)
  } loc(#loc319)
  func.func @forward(%arg0: tensor<1x180x320x4xui8> {onnx.name = "src"} loc(unknown), %arg1: tensor<1x90x160x16xbf16> {onnx.name = "r1i"} loc(unknown), %arg2: tensor<1x45x80x20xbf16> {onnx.name = "r2i"} loc(unknown), %arg3: tensor<1x23x40x40xbf16> {onnx.name = "r3i"} loc(unknown), %arg4: tensor<1x12x20x64xbf16> {onnx.name = "r4i"} loc(unknown)) -> (tensor<1x180x320x3xbf16> {onnx.name = "fgr"}, tensor<1x180x320x1xbf16> {onnx.name = "pha"}, tensor<1x90x160x16xbf16> {onnx.name = "r1o"}, tensor<1x45x80x20xbf16> {onnx.name = "r2o"}, tensor<1x23x40x40xbf16> {onnx.name = "r3o"}, tensor<1x12x20x64xbf16> {onnx.name = "r4o"}) {
    %0 = xten_nn.subgraph (%arg5 = %arg1: tensor<1x90x160x16xbf16>)  attributes {Message = "onnx.Transpose in function edge.", Reason = "CpuBecause"} {
      %12 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc309)
      %13 = tosa.transpose %arg5, %12 : (tensor<1x90x160x16xbf16>, tensor<4xi32>) -> tensor<1x16x90x160xbf16> loc(#loc309)
      xten_nn.output %13 : tensor<1x16x90x160xbf16> loc(#loc309)
    } -> tensor<1x16x90x160xbf16> loc(#loc309)
    %1 = xten_nn.subgraph (%arg5 = %arg2: tensor<1x45x80x20xbf16>)  attributes {Message = "onnx.Transpose in function edge.", Reason = "CpuBecause"} {
      %12 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc310)
      %13 = tosa.transpose %arg5, %12 : (tensor<1x45x80x20xbf16>, tensor<4xi32>) -> tensor<1x20x45x80xbf16> loc(#loc310)
      xten_nn.output %13 : tensor<1x20x45x80xbf16> loc(#loc310)
    } -> tensor<1x20x45x80xbf16> loc(#loc310)
    %2 = xten_nn.subgraph (%arg5 = %arg3: tensor<1x23x40x40xbf16>)  attributes {Message = "onnx.Transpose in function edge.", Reason = "CpuBecause"} {
      %12 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc311)
      %13 = tosa.transpose %arg5, %12 : (tensor<1x23x40x40xbf16>, tensor<4xi32>) -> tensor<1x40x23x40xbf16> loc(#loc311)
      xten_nn.output %13 : tensor<1x40x23x40xbf16> loc(#loc311)
    } -> tensor<1x40x23x40xbf16> loc(#loc311)
    %3 = tosa.cast %arg0 {LayerName = "Cast_0", OutputName = "Cast_0"} : (tensor<1x180x320x4xui8>) -> tensor<1x180x320x4xbf16> loc(#loc308)
    %4 = xten_nn.subgraph (%arg5 = %arg4: tensor<1x12x20x64xbf16>)  attributes {Message = "onnx.Transpose in function edge.", Reason = "CpuBecause"} {
      %12 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc312)
      %13 = tosa.transpose %arg5, %12 : (tensor<1x12x20x64xbf16>, tensor<4xi32>) -> tensor<1x64x12x20xbf16> loc(#loc312)
      xten_nn.output %13 : tensor<1x64x12x20xbf16> loc(#loc312)
    } -> tensor<1x64x12x20xbf16> loc(#loc312)
    %5:6 = call @forward_outlined_part_0(%3, %0, %1, %2, %4) : (tensor<1x180x320x4xbf16>, tensor<1x16x90x160xbf16>, tensor<1x20x45x80xbf16>, tensor<1x40x23x40xbf16>, tensor<1x64x12x20xbf16>) -> (tensor<1x16x90x160xbf16>, tensor<1x20x45x80xbf16>, tensor<1x40x23x40xbf16>, tensor<1x64x12x20xbf16>, tensor<1x3x180x320xbf16>, tensor<1x1x180x320xbf16>) loc(#loc319)
    %6 = xten_nn.subgraph (%arg5 = %5#4: tensor<1x3x180x320xbf16>)  attributes {Message = "onnx.Transpose in function edge.", Reason = "CpuBecause"} {
      %12 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc313)
      %13 = tosa.transpose %arg5, %12 : (tensor<1x3x180x320xbf16>, tensor<4xi32>) -> tensor<1x180x320x3xbf16> loc(#loc313)
      xten_nn.output %13 : tensor<1x180x320x3xbf16> loc(#loc313)
    } -> tensor<1x180x320x3xbf16> loc(#loc313)
    %7 = xten_nn.subgraph (%arg5 = %5#3: tensor<1x64x12x20xbf16>)  attributes {Message = "onnx.Transpose in function edge.", Reason = "CpuBecause"} {
      %12 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc314)
      %13 = tosa.transpose %arg5, %12 : (tensor<1x64x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x64xbf16> loc(#loc314)
      xten_nn.output %13 : tensor<1x12x20x64xbf16> loc(#loc314)
    } -> tensor<1x12x20x64xbf16> loc(#loc314)
    %8 = xten_nn.subgraph (%arg5 = %5#2: tensor<1x40x23x40xbf16>)  attributes {Message = "onnx.Transpose in function edge.", Reason = "CpuBecause"} {
      %12 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc315)
      %13 = tosa.transpose %arg5, %12 : (tensor<1x40x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x40xbf16> loc(#loc315)
      xten_nn.output %13 : tensor<1x23x40x40xbf16> loc(#loc315)
    } -> tensor<1x23x40x40xbf16> loc(#loc315)
    %9 = xten_nn.subgraph (%arg5 = %5#1: tensor<1x20x45x80xbf16>)  attributes {Message = "onnx.Transpose in function edge.", Reason = "CpuBecause"} {
      %12 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc316)
      %13 = tosa.transpose %arg5, %12 : (tensor<1x20x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x20xbf16> loc(#loc316)
      xten_nn.output %13 : tensor<1x45x80x20xbf16> loc(#loc316)
    } -> tensor<1x45x80x20xbf16> loc(#loc316)
    %10 = xten_nn.subgraph (%arg5 = %5#0: tensor<1x16x90x160xbf16>)  attributes {Message = "onnx.Transpose in function edge.", Reason = "CpuBecause"} {
      %12 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc317)
      %13 = tosa.transpose %arg5, %12 : (tensor<1x16x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x16xbf16> loc(#loc317)
      xten_nn.output %13 : tensor<1x90x160x16xbf16> loc(#loc317)
    } -> tensor<1x90x160x16xbf16> loc(#loc317)
    %11 = xten_nn.subgraph (%arg5 = %5#5: tensor<1x1x180x320xbf16>)  attributes {Message = "onnx.Transpose in function edge.", Reason = "CpuBecause"} {
      %12 = tosa.reshape %arg5 {new_shape = array<i64: 1, 180, 320, 1>} : (tensor<1x1x180x320xbf16>) -> tensor<1x180x320x1xbf16> loc(#loc318)
      xten_nn.output %12 : tensor<1x180x320x1xbf16> loc(#loc318)
    } -> tensor<1x180x320x1xbf16> loc(#loc318)
    return %6, %11, %10, %9, %8, %7 : tensor<1x180x320x3xbf16>, tensor<1x180x320x1xbf16>, tensor<1x90x160x16xbf16>, tensor<1x45x80x20xbf16>, tensor<1x23x40x40xbf16>, tensor<1x12x20x64xbf16> loc(#loc)
  } loc(#loc)
} loc(#loc)
#loc1 = loc("Div_2")
#loc2 = loc("Sub_431")
#loc3 = loc("Sub_411")
#loc4 = loc("Sub_385")
#loc5 = loc("Sub_359")
#loc6 = loc("Div_16")
#loc7 = loc("Sub_14")
#loc8 = loc("Initializer_398")
#loc9 = loc("Slice_7")
#loc10 = loc("CompilerGeneratedLoc")
#loc11 = loc("Add_445")
#loc12 = loc("AveragePool_346")
#loc13 = loc("AveragePool_347")
#loc14 = loc("AveragePool_348")
#loc15 = loc("Conv_17")
#loc16 = loc("Add_19")
#loc17 = loc("Clip_22")
#loc18 = loc("Div_24")
#loc19 = loc("Mul_25")
#loc20 = loc("Conv_26")
#loc21 = loc("Relu_27")
#loc22 = loc("Conv_28")
#loc23 = loc("Add_29")
#loc24 = loc("Conv_30")
#loc25 = loc("Relu_31")
#loc26 = loc("Conv_32")
#loc27 = loc("Relu_33")
#loc28 = loc("Conv_34")
#loc29 = loc("Conv_35")
#loc30 = loc("Relu_36")
#loc31 = loc("Conv_37")
#loc32 = loc("Relu_38")
#loc33 = loc("Conv_39")
#loc34 = loc("Add_40")
#loc35 = loc("Conv_41")
#loc36 = loc("Relu_42")
#loc37 = loc("Conv_43")
#loc38 = loc("Relu_44")
#loc39 = loc("GlobalAveragePool_45")
#loc40 = loc("Conv_46")
#loc41 = loc("Relu_47")
#loc42 = loc("Conv_48")
#loc43 = loc("Add_50")
#loc44 = loc("Clip_53")
#loc45 = loc("Div_55")
#loc46 = loc("Mul_56")
#loc47 = loc("Conv_57")
#loc48 = loc("Conv_58")
#loc49 = loc("Relu_59")
#loc50 = loc("Conv_60")
#loc51 = loc("Relu_61")
#loc52 = loc("GlobalAveragePool_62")
#loc53 = loc("Conv_63")
#loc54 = loc("Relu_64")
#loc55 = loc("Conv_65")
#loc56 = loc("Add_67")
#loc57 = loc("Clip_70")
#loc58 = loc("Div_72")
#loc59 = loc("Mul_73")
#loc60 = loc("Conv_74")
#loc61 = loc("Add_75")
#loc62 = loc("Conv_76")
#loc63 = loc("Relu_77")
#loc64 = loc("Conv_78")
#loc65 = loc("Relu_79")
#loc66 = loc("GlobalAveragePool_80")
#loc67 = loc("Conv_81")
#loc68 = loc("Relu_82")
#loc69 = loc("Conv_83")
#loc70 = loc("Add_85")
#loc71 = loc("Clip_88")
#loc72 = loc("Div_90")
#loc73 = loc("Mul_91")
#loc74 = loc("Conv_92")
#loc75 = loc("Add_93")
#loc76 = loc("Conv_94")
#loc77 = loc("Add_96")
#loc78 = loc("Clip_99")
#loc79 = loc("Div_101")
#loc80 = loc("Mul_102")
#loc81 = loc("Conv_103")
#loc82 = loc("Add_105")
#loc83 = loc("Clip_108")
#loc84 = loc("Div_110")
#loc85 = loc("Mul_111")
#loc86 = loc("Conv_112")
#loc87 = loc("Conv_113")
#loc88 = loc("Add_115")
#loc89 = loc("Clip_118")
#loc90 = loc("Div_120")
#loc91 = loc("Mul_121")
#loc92 = loc("Conv_122")
#loc93 = loc("Add_124")
#loc94 = loc("Clip_127")
#loc95 = loc("Div_129")
#loc96 = loc("Mul_130")
#loc97 = loc("Conv_131")
#loc98 = loc("Add_132")
#loc99 = loc("Conv_133")
#loc100 = loc("Add_135")
#loc101 = loc("Clip_138")
#loc102 = loc("Div_140")
#loc103 = loc("Mul_141")
#loc104 = loc("Conv_142")
#loc105 = loc("Add_144")
#loc106 = loc("Clip_147")
#loc107 = loc("Div_149")
#loc108 = loc("Mul_150")
#loc109 = loc("Conv_151")
#loc110 = loc("Add_152")
#loc111 = loc("Conv_153")
#loc112 = loc("Add_155")
#loc113 = loc("Clip_158")
#loc114 = loc("Div_160")
#loc115 = loc("Mul_161")
#loc116 = loc("Conv_162")
#loc117 = loc("Add_164")
#loc118 = loc("Clip_167")
#loc119 = loc("Div_169")
#loc120 = loc("Mul_170")
#loc121 = loc("Conv_171")
#loc122 = loc("Add_172")
#loc123 = loc("Conv_173")
#loc124 = loc("Add_175")
#loc125 = loc("Clip_178")
#loc126 = loc("Div_180")
#loc127 = loc("Mul_181")
#loc128 = loc("Conv_182")
#loc129 = loc("Add_184")
#loc130 = loc("Clip_187")
#loc131 = loc("Div_189")
#loc132 = loc("Mul_190")
#loc133 = loc("GlobalAveragePool_191")
#loc134 = loc("Conv_192")
#loc135 = loc("Relu_193")
#loc136 = loc("Conv_194")
#loc137 = loc("Add_196")
#loc138 = loc("Clip_199")
#loc139 = loc("Div_201")
#loc140 = loc("Mul_202")
#loc141 = loc("Conv_203")
#loc142 = loc("Conv_204")
#loc143 = loc("Add_206")
#loc144 = loc("Clip_209")
#loc145 = loc("Div_211")
#loc146 = loc("Mul_212")
#loc147 = loc("Conv_213")
#loc148 = loc("Add_215")
#loc149 = loc("Clip_218")
#loc150 = loc("Div_220")
#loc151 = loc("Mul_221")
#loc152 = loc("GlobalAveragePool_222")
#loc153 = loc("Conv_223")
#loc154 = loc("Relu_224")
#loc155 = loc("Conv_225")
#loc156 = loc("Add_227")
#loc157 = loc("Clip_230")
#loc158 = loc("Div_232")
#loc159 = loc("Mul_233")
#loc160 = loc("Conv_234")
#loc161 = loc("Add_235")
#loc162 = loc("Conv_236")
#loc163 = loc("Add_238")
#loc164 = loc("Clip_241")
#loc165 = loc("Div_243")
#loc166 = loc("Mul_244")
#loc167 = loc("Conv_245")
#loc168 = loc("Add_247")
#loc169 = loc("Clip_250")
#loc170 = loc("Div_252")
#loc171 = loc("Mul_253")
#loc172 = loc("GlobalAveragePool_254")
#loc173 = loc("Conv_255")
#loc174 = loc("Relu_256")
#loc175 = loc("Conv_257")
#loc176 = loc("Add_259")
#loc177 = loc("Clip_262")
#loc178 = loc("Div_264")
#loc179 = loc("Mul_265")
#loc180 = loc("Conv_266")
#loc181 = loc("Conv_267")
#loc182 = loc("Add_269")
#loc183 = loc("Clip_272")
#loc184 = loc("Div_274")
#loc185 = loc("Mul_275")
#loc186 = loc("Conv_276")
#loc187 = loc("Add_278")
#loc188 = loc("Clip_281")
#loc189 = loc("Div_283")
#loc190 = loc("Mul_284")
#loc191 = loc("GlobalAveragePool_285")
#loc192 = loc("Conv_286")
#loc193 = loc("Relu_287")
#loc194 = loc("Conv_288")
#loc195 = loc("Add_290")
#loc196 = loc("Clip_293")
#loc197 = loc("Div_295")
#loc198 = loc("Mul_296")
#loc199 = loc("Conv_297")
#loc200 = loc("Add_298")
#loc201 = loc("Conv_299")
#loc202 = loc("Add_301")
#loc203 = loc("Clip_304")
#loc204 = loc("Div_306")
#loc205 = loc("Mul_307")
#loc206 = loc("Conv_308")
#loc207 = loc("Add_310")
#loc208 = loc("Clip_313")
#loc209 = loc("Div_315")
#loc210 = loc("Mul_316")
#loc211 = loc("GlobalAveragePool_317")
#loc212 = loc("Conv_318")
#loc213 = loc("Relu_319")
#loc214 = loc("Conv_320")
#loc215 = loc("Add_322")
#loc216 = loc("Clip_325")
#loc217 = loc("Div_327")
#loc218 = loc("Mul_328")
#loc219 = loc("Conv_329")
#loc220 = loc("Add_330")
#loc221 = loc("Conv_331")
#loc222 = loc("Add_333")
#loc223 = loc("Clip_336")
#loc224 = loc("Div_338")
#loc225 = loc("Mul_339")
#loc226 = loc("GlobalAveragePool_342")
#loc227 = loc("Conv_343")
#loc228 = loc("Sigmoid_344")
#loc229 = loc("Mul_345")
#loc230 = loc("Conv_340")
#loc231 = loc("Relu_341")
#loc232 = loc("Split_349")
#loc233 = loc("Concat_350")
#loc234 = loc("Conv_351")
#loc235 = loc("Sigmoid_352")
#loc236 = loc("Split_353")
#loc237 = loc("Mul_360")
#loc238 = loc("Mul_354")
#loc239 = loc("Concat_355")
#loc240 = loc("Conv_356")
#loc241 = loc("Tanh_357")
#loc242 = loc("Mul_361")
#loc243 = loc("Add_362")
#loc244 = loc("Concat_363")
#loc245 = loc("Resize_365")
#loc246 = loc("Slice_371")
#loc247 = loc("Concat_372")
#loc248 = loc("Conv_373")
#loc249 = loc("Relu_374")
#loc250 = loc("Split_375")
#loc251 = loc("Concat_376")
#loc252 = loc("Conv_377")
#loc253 = loc("Sigmoid_378")
#loc254 = loc("Split_379")
#loc255 = loc("Mul_386")
#loc256 = loc("Mul_380")
#loc257 = loc("Concat_381")
#loc258 = loc("Conv_382")
#loc259 = loc("Tanh_383")
#loc260 = loc("Mul_387")
#loc261 = loc("Add_388")
#loc262 = loc("Concat_389")
#loc263 = loc("Resize_391")
#loc264 = loc("Slice_397")
#loc265 = loc("Concat_398")
#loc266 = loc("Conv_399")
#loc267 = loc("Relu_400")
#loc268 = loc("Split_401")
#loc269 = loc("Concat_402")
#loc270 = loc("Conv_403")
#loc271 = loc("Sigmoid_404")
#loc272 = loc("Split_405")
#loc273 = loc("Mul_412")
#loc274 = loc("Mul_406")
#loc275 = loc("Concat_407")
#loc276 = loc("Conv_408")
#loc277 = loc("Tanh_409")
#loc278 = loc("Mul_413")
#loc279 = loc("Add_414")
#loc280 = loc("Concat_415")
#loc281 = loc("Resize_417")
#loc282 = loc("Concat_418")
#loc283 = loc("Conv_419")
#loc284 = loc("Relu_420")
#loc285 = loc("Split_421")
#loc286 = loc("Concat_422")
#loc287 = loc("Conv_423")
#loc288 = loc("Sigmoid_424")
#loc289 = loc("Split_425")
#loc290 = loc("Mul_432")
#loc291 = loc("Mul_426")
#loc292 = loc("Concat_427")
#loc293 = loc("Conv_428")
#loc294 = loc("Tanh_429")
#loc295 = loc("Mul_433")
#loc296 = loc("Add_434")
#loc297 = loc("Concat_435")
#loc298 = loc("Resize_437")
#loc299 = loc("Concat_438")
#loc300 = loc("Conv_439")
#loc301 = loc("Relu_440")
#loc302 = loc("Conv_441")
#loc303 = loc("Relu_442")
#loc304 = loc("Conv_443")
#loc305 = loc("Split_444")
#loc306 = loc("Clip_446")
#loc307 = loc("Clip_447")
#loc313 = loc("Transpose_452")
#loc314 = loc("Transpose_451")
#loc315 = loc("Transpose_450")
#loc316 = loc("Transpose_449")
#loc317 = loc("Transpose_448")
#loc318 = loc("Transpose_453")
#loc319 = loc(fused[#loc1, #loc2, #loc3, #loc4, #loc5, #loc6, #loc7, #loc8, #loc9, #loc10, #loc11, #loc12, #loc13, #loc14, #loc15, #loc16, #loc17, #loc18, #loc19, #loc20, #loc21, #loc22, #loc23, #loc24, #loc25, #loc26, #loc27, #loc28, #loc29, #loc30, #loc31, #loc32, #loc33, #loc34, #loc35, #loc36, #loc37, #loc38, #loc39, #loc40, #loc41, #loc42, #loc43, #loc44, #loc45, #loc46, #loc47, #loc48, #loc49, #loc50, #loc51, #loc52, #loc53, #loc54, #loc55, #loc56, #loc57, #loc58, #loc59, #loc60, #loc61, #loc62, #loc63, #loc64, #loc65, #loc66, #loc67, #loc68, #loc69, #loc70, #loc71, #loc72, #loc73, #loc74, #loc75, #loc76, #loc77, #loc78, #loc79, #loc80, #loc81, #loc82, #loc83, #loc84, #loc85, #loc86, #loc87, #loc88, #loc89, #loc90, #loc91, #loc92, #loc93, #loc94, #loc95, #loc96, #loc97, #loc98, #loc99, #loc100, #loc101, #loc102, #loc103, #loc104, #loc105, #loc106, #loc107, #loc108, #loc109, #loc110, #loc111, #loc112, #loc113, #loc114, #loc115, #loc116, #loc117, #loc118, #loc119, #loc120, #loc121, #loc122, #loc123, #loc124, #loc125, #loc126, #loc127, #loc128, #loc129, #loc130, #loc131, #loc132, #loc133, #loc134, #loc135, #loc136, #loc137, #loc138, #loc139, #loc140, #loc141, #loc142, #loc143, #loc144, #loc145, #loc146, #loc147, #loc148, #loc149, #loc150, #loc151, #loc152, #loc153, #loc154, #loc155, #loc156, #loc157, #loc158, #loc159, #loc160, #loc161, #loc162, #loc163, #loc164, #loc165, #loc166, #loc167, #loc168, #loc169, #loc170, #loc171, #loc172, #loc173, #loc174, #loc175, #loc176, #loc177, #loc178, #loc179, #loc180, #loc181, #loc182, #loc183, #loc184, #loc185, #loc186, #loc187, #loc188, #loc189, #loc190, #loc191, #loc192, #loc193, #loc194, #loc195, #loc196, #loc197, #loc198, #loc199, #loc200, #loc201, #loc202, #loc203, #loc204, #loc205, #loc206, #loc207, #loc208, #loc209, #loc210, #loc211, #loc212, #loc213, #loc214, #loc215, #loc216, #loc217, #loc218, #loc219, #loc220, #loc221, #loc222, #loc223, #loc224, #loc225, #loc226, #loc227, #loc228, #loc229, #loc230, #loc231, #loc232, #loc233, #loc234, #loc235, #loc236, #loc237, #loc238, #loc239, #loc240, #loc241, #loc242, #loc243, #loc244, #loc245, #loc246, #loc247, #loc248, #loc249, #loc250, #loc251, #loc252, #loc253, #loc254, #loc255, #loc256, #loc257, #loc258, #loc259, #loc260, #loc261, #loc262, #loc263, #loc264, #loc265, #loc266, #loc267, #loc268, #loc269, #loc270, #loc271, #loc272, #loc273, #loc274, #loc275, #loc276, #loc277, #loc278, #loc279, #loc280, #loc281, #loc282, #loc283, #loc284, #loc285, #loc286, #loc287, #loc288, #loc289, #loc290, #loc291, #loc292, #loc293, #loc294, #loc295, #loc296, #loc297, #loc298, #loc299, #loc300, #loc301, #loc302, #loc303, #loc304, #loc305, #loc306, #loc307])
#loc320 = loc(fused[#loc7, #loc8])
#loc321 = loc(fused[#loc11, #loc9, #loc12])
#loc322 = loc(fused[#loc9, #loc12, #loc11])
#loc323 = loc(fused[#loc20, #loc21])
#loc324 = loc(fused[#loc22, #loc23])
#loc325 = loc(fused[#loc24, #loc25])
#loc326 = loc(fused[#loc26, #loc27])
#loc327 = loc(fused[#loc29, #loc30])
#loc328 = loc(fused[#loc31, #loc32])
#loc329 = loc(fused[#loc33, #loc34])
#loc330 = loc(fused[#loc35, #loc36])
#loc331 = loc(fused[#loc37, #loc38])
#loc332 = loc(fused[#loc40, #loc41])
#loc333 = loc(fused[#loc48, #loc49])
#loc334 = loc(fused[#loc50, #loc51])
#loc335 = loc(fused[#loc53, #loc54])
#loc336 = loc(fused[#loc60, #loc61])
#loc337 = loc(fused[#loc62, #loc63])
#loc338 = loc(fused[#loc64, #loc65])
#loc339 = loc(fused[#loc67, #loc68])
#loc340 = loc(fused[#loc74, #loc75])
#loc341 = loc(fused[#loc97, #loc98])
#loc342 = loc(fused[#loc109, #loc110])
#loc343 = loc(fused[#loc121, #loc122])
#loc344 = loc(fused[#loc132, #loc133])
#loc345 = loc(fused[#loc134, #loc135])
#loc346 = loc(fused[#loc151, #loc152])
#loc347 = loc(fused[#loc153, #loc154])
#loc348 = loc(fused[#loc160, #loc161])
#loc349 = loc(fused[#loc171, #loc172])
#loc350 = loc(fused[#loc173, #loc174])
#loc351 = loc(fused[#loc190, #loc191])
#loc352 = loc(fused[#loc192, #loc193])
#loc353 = loc(fused[#loc199, #loc200])
#loc354 = loc(fused[#loc210, #loc211])
#loc355 = loc(fused[#loc212, #loc213])
#loc356 = loc(fused[#loc219, #loc220])
#loc357 = loc(fused[#loc225, #loc226])
#loc358 = loc(fused[#loc230, #loc231, #loc229])
#loc359 = loc(fused[#loc230, #loc231])
#loc360 = loc(fused[#loc248, #loc249])
#loc361 = loc(fused[#loc266, #loc267])
#loc362 = loc(fused[#loc283, #loc284])
#loc363 = loc(fused[#loc300, #loc301])
#loc364 = loc(fused[#loc302, #loc303])