#loc = loc(unknown) #loc308 = loc("Cast_0") #loc309 = loc("Transpose_9") #loc310 = loc("Transpose_10") #loc311 = loc("Transpose_11") #loc312 = loc("Transpose_12") module attributes { llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-linux-gnu", "onnx-mlir.symbol-postfix" = "onnxmodel.onnx.mlir", vaimlconf.device = "stx", vaimlconf.device_models = "${vaimlconf.install_dir}/data/deviceModels", vaimlconf.install_dir = "/usr/local/lib/python3.10/dist-packages/flexml/flexml_extras", vaimlconf.library_metadata = ["${vaimlconf.install_dir}/data/libraryMetadata/L1", "${vaimlconf.install_dir}/data/libraryMetadata/L2", "${vaimlconf.install_dir}/../../vitis_mllib/L1/metadata", "${vaimlconf.install_dir}/../../vitis_mllib/L2/metadata", "${vaimlconf.install_dir}/share/microkernel-tiling/tiling-recipe-specs"], vaimlconf.single_core_compiler = "chess"} { func.func private @forward_outlined_part_0(%arg0: tensor<1x180x320x4xbf16> loc("Cast_0"), %arg1: tensor<1x16x90x160xbf16> loc("Transpose_9"), %arg2: tensor<1x20x45x80xbf16> loc("Transpose_10"), %arg3: tensor<1x40x23x40xbf16> loc("Transpose_11"), %arg4: tensor<1x64x12x20xbf16> loc("Transpose_12")) -> (tensor<1x16x90x160xbf16>, tensor<1x20x45x80xbf16>, tensor<1x40x23x40xbf16>, tensor<1x64x12x20xbf16>, tensor<1x3x180x320xbf16>, tensor<1x1x180x320xbf16>) attributes {aie_partition = 0 : i32, kernel} { %0 = xten_nn.subgraph (%arg5 = %arg0: tensor<1x180x320x4xbf16>) attributes { LayerName = "Div_2", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "386", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 180, 320, 4]> : vector<4xindex> } ], OutputName = "Div_2", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "387", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 180, 320, 4]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x180x320x4xbf16>) attributes { LayerName = "Div_2", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "386", Port = "data_io.ifm", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 180, 320, 4]> : vector<4xindex> } ], OutputName = "Div_2", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "387", Port = "data_io.ofm", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 180, 320, 4]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.906250e-03 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.906250e-03> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_2", OutputName = "Div_2", shift = 0 : i8} : (tensor<1x180x320x4xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x180x320x4xbf16> loc(#loc1) xten_nn.output %463 : tensor<1x180x320x4xbf16> loc(#loc1) } -> tensor<1x180x320x4xbf16> loc(#loc1) xten_nn.output %461 : tensor<1x180x320x4xbf16> loc(#loc1) } -> tensor<1x180x320x4xbf16> loc(#loc1) %1 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_443/biases"} -> tensor<4xbf16> loc(#loc) %2 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_443/weights"} -> tensor<4x16x1x1xbf16> loc(#loc) %3 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_441/biases"} -> tensor<16xbf16> loc(#loc) %4 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_441/weights"} -> tensor<16x16x3x3xbf16> loc(#loc) %5 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_439/biases"} -> tensor<16xbf16> loc(#loc) %6 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_439/weights"} -> tensor<16x35x3x3xbf16> loc(#loc) %7 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_428/biases"} -> tensor<16xbf16> loc(#loc) %8 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_428/weights"} -> tensor<16x32x3x3xbf16> loc(#loc) %9 = xten_nn.load_external_const {file = "constants.h5", key = "Sub_431/Constant_0_0"} -> tensor<1x16x90x160xbf16> loc(#loc2) %10 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_423/biases"} -> tensor<32xbf16> loc(#loc) %11 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_423/weights"} -> tensor<32x32x3x3xbf16> loc(#loc) %12 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_419/biases"} -> tensor<32xbf16> loc(#loc) %13 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_419/weights"} -> tensor<32x59x3x3xbf16> loc(#loc) %14 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_408/biases"} -> tensor<20xbf16> loc(#loc) %15 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_408/weights"} -> tensor<20x40x3x3xbf16> loc(#loc) %16 = xten_nn.load_external_const {file = "constants.h5", key = "Sub_411/Constant_0_0"} -> tensor<1x20x45x80xbf16> loc(#loc3) %17 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_403/biases"} -> tensor<40xbf16> loc(#loc) %18 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_403/weights"} -> tensor<40x40x3x3xbf16> loc(#loc) %19 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_399/biases"} -> tensor<40xbf16> loc(#loc) %20 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_399/weights"} -> tensor<40x107x3x3xbf16> loc(#loc) %21 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_382/biases"} -> tensor<40xbf16> loc(#loc) %22 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_382/weights"} -> tensor<40x80x3x3xbf16> loc(#loc) %23 = xten_nn.load_external_const {file = "constants.h5", key = "Sub_385/Constant_0_0"} -> tensor<1x40x23x40xbf16> loc(#loc4) %24 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_377/biases"} -> tensor<80xbf16> loc(#loc) %25 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_377/weights"} -> tensor<80x80x3x3xbf16> loc(#loc) %26 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_373/biases"} -> tensor<80xbf16> loc(#loc) %27 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_373/weights"} -> tensor<80x171x3x3xbf16> loc(#loc) %28 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_356/biases"} -> tensor<64xbf16> loc(#loc) %29 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_356/weights"} -> tensor<64x128x3x3xbf16> loc(#loc) %30 = xten_nn.load_external_const {file = "constants.h5", key = "Sub_359/Constant_0_0"} -> tensor<1x64x12x20xbf16> loc(#loc5) %31 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_351/biases"} -> tensor<128xbf16> loc(#loc) %32 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_351/weights"} -> tensor<128x128x3x3xbf16> loc(#loc) %33 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_340/biases"} -> tensor<128xbf16> loc(#loc) %34 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_340/weights"} -> tensor<128x960x1x1xbf16> loc(#loc) %35 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_343/biases"} -> tensor<128xbf16> loc(#loc) %36 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_343/weights"} -> tensor<128x960x1x1xbf16> loc(#loc) %37 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_331/biases"} -> tensor<960xbf16> loc(#loc) %38 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_331/weights"} -> tensor<960x160x1x1xbf16> loc(#loc) %39 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_329/biases"} -> tensor<160xbf16> loc(#loc) %40 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_329/weights"} -> tensor<160x960x1x1xbf16> loc(#loc) %41 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_320/biases"} -> tensor<960xbf16> loc(#loc) %42 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_320/weights"} -> tensor<960x240x1x1xbf16> loc(#loc) %43 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_318/biases"} -> tensor<240xbf16> loc(#loc) %44 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_318/weights"} -> tensor<240x960x1x1xbf16> loc(#loc) %45 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_308/biases"} -> tensor<960xbf16> loc(#loc) %46 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_308/weights"} -> tensor<960x1x9x9xbf16> loc(#loc) %47 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_299/biases"} -> tensor<960xbf16> loc(#loc) %48 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_299/weights"} -> tensor<960x160x1x1xbf16> loc(#loc) %49 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_297/biases"} -> tensor<160xbf16> loc(#loc) %50 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_297/weights"} -> tensor<160x960x1x1xbf16> loc(#loc) %51 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_288/biases"} -> tensor<960xbf16> loc(#loc) %52 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_288/weights"} -> tensor<960x240x1x1xbf16> loc(#loc) %53 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_286/biases"} -> tensor<240xbf16> loc(#loc) %54 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_286/weights"} -> tensor<240x960x1x1xbf16> loc(#loc) %55 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_276/biases"} -> tensor<960xbf16> loc(#loc) %56 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_276/weights"} -> tensor<960x1x9x9xbf16> loc(#loc) %57 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_267/biases"} -> tensor<960xbf16> loc(#loc) %58 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_267/weights"} -> tensor<960x160x1x1xbf16> loc(#loc) %59 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_266/biases"} -> tensor<160xbf16> loc(#loc) %60 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_266/weights"} -> tensor<160x672x1x1xbf16> loc(#loc) %61 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_257/biases"} -> tensor<672xbf16> loc(#loc) %62 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_257/weights"} -> tensor<672x168x1x1xbf16> loc(#loc) %63 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_255/biases"} -> tensor<168xbf16> loc(#loc) %64 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_255/weights"} -> tensor<168x672x1x1xbf16> loc(#loc) %65 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_245/biases"} -> tensor<672xbf16> loc(#loc) %66 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_245/weights"} -> tensor<672x1x9x9xbf16> loc(#loc) %67 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_236/biases"} -> tensor<672xbf16> loc(#loc) %68 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_236/weights"} -> tensor<672x112x1x1xbf16> loc(#loc) %69 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_234/biases"} -> tensor<112xbf16> loc(#loc) %70 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_234/weights"} -> tensor<112x672x1x1xbf16> loc(#loc) %71 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_225/biases"} -> tensor<672xbf16> loc(#loc) %72 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_225/weights"} -> tensor<672x168x1x1xbf16> loc(#loc) %73 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_223/biases"} -> tensor<168xbf16> loc(#loc) %74 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_223/weights"} -> tensor<168x672x1x1xbf16> loc(#loc) %75 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_213/biases"} -> tensor<672xbf16> loc(#loc) %76 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_213/weights"} -> tensor<672x1x3x3xbf16> loc(#loc) %77 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_204/biases"} -> tensor<672xbf16> loc(#loc) %78 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_204/weights"} -> tensor<672x112x1x1xbf16> loc(#loc) %79 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_203/biases"} -> tensor<112xbf16> loc(#loc) %80 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_203/weights"} -> tensor<112x480x1x1xbf16> loc(#loc) %81 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_194/biases"} -> tensor<480xbf16> loc(#loc) %82 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_194/weights"} -> tensor<480x120x1x1xbf16> loc(#loc) %83 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_192/biases"} -> tensor<120xbf16> loc(#loc) %84 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_192/weights"} -> tensor<120x480x1x1xbf16> loc(#loc) %85 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_182/biases"} -> tensor<480xbf16> loc(#loc) %86 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_182/weights"} -> tensor<480x1x3x3xbf16> loc(#loc) %87 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_173/biases"} -> tensor<480xbf16> loc(#loc) %88 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_173/weights"} -> tensor<480x80x1x1xbf16> loc(#loc) %89 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_171/biases"} -> tensor<80xbf16> loc(#loc) %90 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_171/weights"} -> tensor<80x184x1x1xbf16> loc(#loc) %91 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_162/biases"} -> tensor<184xbf16> loc(#loc) %92 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_162/weights"} -> tensor<184x1x3x3xbf16> loc(#loc) %93 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_153/biases"} -> tensor<184xbf16> loc(#loc) %94 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_153/weights"} -> tensor<184x80x1x1xbf16> loc(#loc) %95 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_151/biases"} -> tensor<80xbf16> loc(#loc) %96 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_151/weights"} -> tensor<80x184x1x1xbf16> loc(#loc) %97 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_142/biases"} -> tensor<184xbf16> loc(#loc) %98 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_142/weights"} -> tensor<184x1x3x3xbf16> loc(#loc) %99 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_133/biases"} -> tensor<184xbf16> loc(#loc) %100 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_133/weights"} -> tensor<184x80x1x1xbf16> loc(#loc) %101 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_131/biases"} -> tensor<80xbf16> loc(#loc) %102 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_131/weights"} -> tensor<80x200x1x1xbf16> loc(#loc) %103 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_122/biases"} -> tensor<200xbf16> loc(#loc) %104 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_122/weights"} -> tensor<200x1x3x3xbf16> loc(#loc) %105 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_113/biases"} -> tensor<200xbf16> loc(#loc) %106 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_113/weights"} -> tensor<200x80x1x1xbf16> loc(#loc) %107 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_112/biases"} -> tensor<80xbf16> loc(#loc) %108 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_112/weights"} -> tensor<80x240x1x1xbf16> loc(#loc) %109 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_103/biases"} -> tensor<240xbf16> loc(#loc) %110 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_103/weights"} -> tensor<240x1x3x3xbf16> loc(#loc) %111 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_94/biases"} -> tensor<240xbf16> loc(#loc) %112 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_94/weights"} -> tensor<240x40x1x1xbf16> loc(#loc) %113 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_92/biases"} -> tensor<40xbf16> loc(#loc) %114 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_92/weights"} -> tensor<40x120x1x1xbf16> loc(#loc) %115 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_83/biases"} -> tensor<120xbf16> loc(#loc) %116 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_83/weights"} -> tensor<120x32x1x1xbf16> loc(#loc) %117 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_81/biases"} -> tensor<32xbf16> loc(#loc) %118 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_81/weights"} -> tensor<32x120x1x1xbf16> loc(#loc) %119 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_78/biases"} -> tensor<120xbf16> loc(#loc) %120 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_78/weights"} -> tensor<120x1x5x5xbf16> loc(#loc) %121 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_76/biases"} -> tensor<120xbf16> loc(#loc) %122 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_76/weights"} -> tensor<120x40x1x1xbf16> loc(#loc) %123 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_74/biases"} -> tensor<40xbf16> loc(#loc) %124 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_74/weights"} -> tensor<40x120x1x1xbf16> loc(#loc) %125 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_65/biases"} -> tensor<120xbf16> loc(#loc) %126 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_65/weights"} -> tensor<120x32x1x1xbf16> loc(#loc) %127 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_63/biases"} -> tensor<32xbf16> loc(#loc) %128 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_63/weights"} -> tensor<32x120x1x1xbf16> loc(#loc) %129 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_60/biases"} -> tensor<120xbf16> loc(#loc) %130 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_60/weights"} -> tensor<120x1x5x5xbf16> loc(#loc) %131 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_58/biases"} -> tensor<120xbf16> loc(#loc) %132 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_58/weights"} -> tensor<120x40x1x1xbf16> loc(#loc) %133 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_57/biases"} -> tensor<40xbf16> loc(#loc) %134 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_57/weights"} -> tensor<40x72x1x1xbf16> loc(#loc) %135 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_48/biases"} -> tensor<72xbf16> loc(#loc) %136 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_48/weights"} -> tensor<72x24x1x1xbf16> loc(#loc) %137 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_46/biases"} -> tensor<24xbf16> loc(#loc) %138 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_46/weights"} -> tensor<24x72x1x1xbf16> loc(#loc) %139 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_43/biases"} -> tensor<72xbf16> loc(#loc) %140 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_43/weights"} -> tensor<72x1x5x5xbf16> loc(#loc) %141 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_41/biases"} -> tensor<72xbf16> loc(#loc) %142 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_41/weights"} -> tensor<72x24x1x1xbf16> loc(#loc) %143 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_39/biases"} -> tensor<24xbf16> loc(#loc) %144 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_39/weights"} -> tensor<24x72x1x1xbf16> loc(#loc) %145 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_37/biases"} -> tensor<72xbf16> loc(#loc) %146 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_37/weights"} -> tensor<72x1x3x3xbf16> loc(#loc) %147 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_35/biases"} -> tensor<72xbf16> loc(#loc) %148 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_35/weights"} -> tensor<72x24x1x1xbf16> loc(#loc) %149 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_34/biases"} -> tensor<24xbf16> loc(#loc) %150 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_34/weights"} -> tensor<24x64x1x1xbf16> loc(#loc) %151 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_32/biases"} -> tensor<64xbf16> loc(#loc) %152 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_32/weights"} -> tensor<64x1x3x3xbf16> loc(#loc) %153 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_30/biases"} -> tensor<64xbf16> loc(#loc) %154 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_30/weights"} -> tensor<64x16x1x1xbf16> loc(#loc) %155 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_28/biases"} -> tensor<16xbf16> loc(#loc) %156 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_28/weights"} -> tensor<16x16x1x1xbf16> loc(#loc) %157 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_26/biases"} -> tensor<16xbf16> loc(#loc) %158 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_26/weights"} -> tensor<16x1x3x3xbf16> loc(#loc) %159 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_17/biases"} -> tensor<16xbf16> loc(#loc) %160 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_17/weights"} -> tensor<16x3x3x3xbf16> loc(#loc) %161 = xten_nn.load_external_const {file = "constants.h5", key = "Div_16/Constant_1_0"} -> tensor<1x3x180x320xbf16> loc(#loc6) %162 = xten_nn.load_external_const {file = "constants.h5", key = "Sub_14/Constant_1_0"} -> tensor<1x3x180x320xbf16> loc(#loc320) %163 = xten_nn.subgraph (%arg5 = %0: tensor<1x180x320x4xbf16>) attributes { LayerName = "Slice_7_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "387", Port = "data_io.ifm", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 180, 320, 4]> : vector<4xindex> } ], OutputName = "Slice_7", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "392", Port = "data_io.ofm", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 180, 320, 3]> : vector<4xindex> } ], Specializes = "SliceHCWC8Adf", With = { config.aie_arch = "aie2p", config.axis_letter = "W", config.dim_c = 184 : ui32, config.dim_h = 320 : ui32, config.dim_w = 4 : ui32, config.dtype = "bfloat16", config.end = 3 : ui32, config.start = 0 : ui32, config.step = 1 : ui32 }} { %461 = tosa.slice %arg5 { LayerName = "Slice_7", OutputName = "Slice_7", size = array, start = array} : (tensor<1x180x320x4xbf16>) -> tensor<1x180x320x3xbf16> loc(#loc9) xten_nn.output %461 : tensor<1x180x320x3xbf16> loc(#loc9) } -> tensor<1x180x320x3xbf16> loc(#loc9) %164 = xten_nn.subgraph (%arg5 = %163: tensor<1x180x320x3xbf16>) attributes { LayerName = "CompilerGenerated_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ifm", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 180, 320, 3]> : vector<4xindex> } ], OutputName = "CompilerGenerated_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ofm", l3_extend_end = dense<[0, 4, 0, 5]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 180, 320, 3]> : vector<4xindex> } ], Specializes = "BufferPadAdf", With = { config.aie_arch = "aie2p", config.dim_0 = 320 : ui32, config.dim_0_padded = 320 : ui32, config.dim_1 = 23 : ui32, config.dim_1_padded = 23 : ui32, config.dim_2 = 3 : ui32, config.dim_2_padded = 8 : ui32, config.dim_3 = 8 : ui32, config.dim_3_padded = 8 : ui32, config.dtype = "bfloat16" }} { xten_nn.output %arg5 : tensor<1x180x320x3xbf16> loc(#loc10) } -> tensor<1x180x320x3xbf16> loc(#loc10) %165 = xten_nn.subgraph (%arg5 = %164: tensor<1x180x320x3xbf16>) attributes { LayerName = "Slice_7_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "387", Port = "data_io.ifm", l3_extend_end = dense<[0, 4, 0, 5]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 180, 320, 3]> : vector<4xindex> } ], OutputName = "Add_445_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "911", Port = "data_io.ofm", l3_extend_end = dense<[0, 5, 4, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], Specializes = "Transpose4dAdf", With = { config.aie_arch = "aie2p", config.dim_0 = 320 : ui32, config.dim_1 = 23 : ui32, config.dim_2 = 8 : ui32, config.dim_3 = 8 : ui32, config.dtype = "bfloat16", config.perm = 10 : ui32 }} { %461 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc11) %462 = tosa.transpose %arg5, %461 : (tensor<1x180x320x3xbf16>, tensor<4xi32>) -> tensor<1x3x180x320xbf16> loc(#loc322) xten_nn.output %462 : tensor<1x3x180x320xbf16> loc(#loc322) } -> tensor<1x3x180x320xbf16> loc(#loc321) %166 = xten_nn.subgraph (%arg5 = %165: tensor<1x3x180x320xbf16>) attributes { LayerName = "CompilerGenerated_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ifm", l3_extend_end = dense<[0, 5, 4, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], OutputName = "CompilerGenerated_Duplicated#1", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ofm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], Specializes = "BufferUnpadAdf", With = { config.aie_arch = "aie2p", config.dim_0 = 184 : ui32, config.dim_0_unpadded = 180 : ui32, config.dim_1 = 1 : ui32, config.dim_1_unpadded = 1 : ui32, config.dim_2 = 320 : ui32, config.dim_2_unpadded = 320 : ui32, config.dim_3 = 8 : ui32, config.dim_3_unpadded = 8 : ui32, config.dtype = "bfloat16" }} { xten_nn.output %arg5 : tensor<1x3x180x320xbf16> loc(#loc10) } -> tensor<1x3x180x320xbf16> loc(#loc10) %167 = xten_nn.subgraph (%arg5 = %166: tensor<1x3x180x320xbf16>) attributes { LayerName = "AveragePool_346", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "393", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], OutputName = "AveragePool_346", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "778", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 90, 160]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x3x180x320xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], HWPaddingNotCounted = [[0, 0], [0, 0]], LayerName = "AveragePool_346", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "393", Port = "data_io.ifm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], OutputName = "AveragePool_346", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "778", Port = "data_io.ofm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 90, 160]> : vector<4xindex> } ], Specializes = "AvgPool2dBf16", With = { config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.dtype = "bfloat16", config.ksize = 2 : ui8, config.stride_log2 = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc12) %464 = tosa.transpose %arg6, %463 : (tensor<1x3x180x320xbf16>, tensor<4xi32>) -> tensor<1x180x320x3xbf16> loc(#loc12) %465 = tosa.avg_pool2d %464 { PartOfLayerName = "AveragePool_346", PartOfOutputName = "AveragePool_346", acc_type = f32, kernel = array, pad = array, stride = array} : (tensor<1x180x320x3xbf16>) -> tensor<1x90x160x3xbf16> loc(#loc12) %466 = tosa.transpose %465, %462 : (tensor<1x90x160x3xbf16>, tensor<4xi32>) -> tensor<1x3x90x160xbf16> loc(#loc12) xten_nn.output %466 : tensor<1x3x90x160xbf16> loc(#loc12) } -> tensor<1x3x90x160xbf16> loc(#loc12) xten_nn.output %461 : tensor<1x3x90x160xbf16> loc(#loc12) } -> tensor<1x3x90x160xbf16> loc(#loc12) %168 = xten_nn.subgraph (%arg5 = %167: tensor<1x3x90x160xbf16>) attributes { LayerName = "AveragePool_347", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "778", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 90, 160]> : vector<4xindex> } ], OutputName = "AveragePool_347", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "779", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 45, 80]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x3x90x160xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], HWPaddingNotCounted = [[0, 0], [0, 0]], LayerName = "AveragePool_347", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "778", Port = "data_io.ifm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 90, 160]> : vector<4xindex> } ], OutputName = "AveragePool_347", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "779", Port = "data_io.ofm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 45, 80]> : vector<4xindex> } ], Specializes = "AvgPool2dBf16", With = { config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.dtype = "bfloat16", config.ksize = 2 : ui8, config.stride_log2 = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc13) %464 = tosa.transpose %arg6, %463 : (tensor<1x3x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x3xbf16> loc(#loc13) %465 = tosa.avg_pool2d %464 { PartOfLayerName = "AveragePool_347", PartOfOutputName = "AveragePool_347", acc_type = f32, kernel = array, pad = array, stride = array} : (tensor<1x90x160x3xbf16>) -> tensor<1x45x80x3xbf16> loc(#loc13) %466 = tosa.transpose %465, %462 : (tensor<1x45x80x3xbf16>, tensor<4xi32>) -> tensor<1x3x45x80xbf16> loc(#loc13) xten_nn.output %466 : tensor<1x3x45x80xbf16> loc(#loc13) } -> tensor<1x3x45x80xbf16> loc(#loc13) xten_nn.output %461 : tensor<1x3x45x80xbf16> loc(#loc13) } -> tensor<1x3x45x80xbf16> loc(#loc13) %169 = xten_nn.subgraph (%arg5 = %168: tensor<1x3x45x80xbf16>) attributes { LayerName = "AveragePool_348", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "779", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 45, 80]> : vector<4xindex> } ], OutputName = "AveragePool_348", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "780", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x3x45x80xbf16>) attributes { Dilations = array, HWPadding = [[0, 1], [0, 0]], HWPaddingNotCounted = [[0, 1], [0, 0]], LayerName = "AveragePool_348", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "779", Port = "data_io.ifm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 45, 80]> : vector<4xindex> } ], OutputName = "AveragePool_348", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "780", Port = "data_io.ofm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 23, 40]> : vector<4xindex> } ], Specializes = "AvgPool2dBf16", With = { config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.dtype = "bfloat16", config.ksize = 2 : ui8, config.stride_log2 = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc14) %464 = tosa.transpose %arg6, %463 : (tensor<1x3x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x3xbf16> loc(#loc14) %465 = tosa.avg_pool2d %464 { PartOfLayerName = "AveragePool_348", PartOfOutputName = "AveragePool_348", acc_type = f32, kernel = array, pad = array, stride = array} : (tensor<1x45x80x3xbf16>) -> tensor<1x23x40x3xbf16> loc(#loc14) %466 = tosa.transpose %465, %462 : (tensor<1x23x40x3xbf16>, tensor<4xi32>) -> tensor<1x3x23x40xbf16> loc(#loc14) xten_nn.output %466 : tensor<1x3x23x40xbf16> loc(#loc14) } -> tensor<1x3x23x40xbf16> loc(#loc14) xten_nn.output %461 : tensor<1x3x23x40xbf16> loc(#loc14) } -> tensor<1x3x23x40xbf16> loc(#loc14) %170 = xten_nn.subgraph (%arg5 = %166: tensor<1x3x180x320xbf16>, %arg6 = %162: tensor<1x3x180x320xbf16>) attributes { LayerName = "Sub_14", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "393", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "392", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], OutputName = "Sub_14", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "399", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x3x180x320xbf16>, %arg8 = %arg6: tensor<1x3x180x320xbf16>) attributes { LayerName = "Sub_14", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "393", Port = "data_io.ifm1", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "392", Port = "data_io.ifm2", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], OutputName = "Sub_14", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "399", Port = "data_io.ofm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], Specializes = "AddBf16", Traits = { Binary = true, Elementwise = true }, With = { config.act = 0 : ui8, config.act_type = "LINEAR", config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.add %arg7, %arg8 {LayerName = "Sub_14", OutputName = "Initializer_398"} : (tensor<1x3x180x320xbf16>, tensor<1x3x180x320xbf16>) -> tensor<1x3x180x320xbf16> loc(#loc320) xten_nn.output %462 : tensor<1x3x180x320xbf16> loc(#loc320) } -> tensor<1x3x180x320xbf16> loc(#loc320) xten_nn.output %461 : tensor<1x3x180x320xbf16> loc(#loc320) } -> tensor<1x3x180x320xbf16> loc(#loc320) %171 = xten_nn.subgraph (%arg5 = %170: tensor<1x3x180x320xbf16>, %arg6 = %161: tensor<1x3x180x320xbf16>) attributes { LayerName = "Div_16", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "399", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "393", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], OutputName = "Div_16", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "401", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x3x180x320xbf16>, %arg8 = %arg6: tensor<1x3x180x320xbf16>) attributes { LayerName = "Div_16", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "399", Port = "data_io.ifm1", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "393", Port = "data_io.ifm2", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], OutputName = "Div_16", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "401", Port = "data_io.ofm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { OutputName = "Div_16", PartOfLayerName = "Div_16", shift = 0 : i8} : (tensor<1x3x180x320xbf16>, tensor<1x3x180x320xbf16>) -> tensor<1x3x180x320xbf16> loc(#loc6) xten_nn.output %462 : tensor<1x3x180x320xbf16> loc(#loc6) } -> tensor<1x3x180x320xbf16> loc(#loc6) xten_nn.output %461 : tensor<1x3x180x320xbf16> loc(#loc6) } -> tensor<1x3x180x320xbf16> loc(#loc6) %172 = xten_nn.subgraph (%arg5 = %171: tensor<1x3x180x320xbf16>, %arg6 = %160: tensor<16x3x3x3xbf16>, %arg7 = %159: tensor<16xbf16>) attributes { LayerName = "Conv_17", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "401", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> }, { Name = "399", UnknownDataFormat = true, l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[16, 3, 3, 3]> : vector<4xindex> }, { Name = "929", UnknownDataFormat = true } ], OutputName = "Conv_17", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "928", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x3x180x320xbf16>, %arg9 = %arg6: tensor<16x3x3x3xbf16>, %arg10 = %arg7: tensor<16xbf16>) attributes { Dilations = array, HWPadding = [[1, 0], [1, 0]], LayerName = "Conv_17", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "401", Port = "data_io.ifm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> }, { Name = "399", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[16, 3, 3, 3]> : vector<4xindex> }, { Name = "929", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_17", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "928", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 3 : ui8, config.ksize.width = 3 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 2 : ui8, config.stride_w = 2 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = tosa.transpose %arg9, %463 : (tensor<16x3x3x3xbf16>, tensor<4xi32>) -> tensor<16x3x3x3xbf16> loc(#loc15) %465 = tosa.transpose %arg8, %463 : (tensor<1x3x180x320xbf16>, tensor<4xi32>) -> tensor<1x180x320x3xbf16> loc(#loc15) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_17", PartOfOutputName = "Conv_17", dilation = array, pad = array, stride = array} : (tensor<1x180x320x3xbf16>, tensor<16x3x3x3xbf16>, tensor<16xbf16>) -> tensor<1x90x160x16xbf16> loc(#loc15) %467 = tosa.transpose %466, %462 : (tensor<1x90x160x16xbf16>, tensor<4xi32>) -> tensor<1x16x90x160xbf16> loc(#loc15) xten_nn.output %467 : tensor<1x16x90x160xbf16> loc(#loc15) } -> tensor<1x16x90x160xbf16> loc(#loc15) xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc15) } -> tensor<1x16x90x160xbf16> loc(#loc15) %173 = xten_nn.subgraph (%arg5 = %172: tensor<1x16x90x160xbf16>) attributes { LayerName = "Add_19", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "928", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Add_19", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "405", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x16x90x160xbf16>) attributes { LayerName = "Add_19", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "928", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Add_19", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "405", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_19", OutputName = "Add_19"} : (tensor<1x16x90x160xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc16) xten_nn.output %463 : tensor<1x16x90x160xbf16> loc(#loc16) } -> tensor<1x16x90x160xbf16> loc(#loc16) xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc16) } -> tensor<1x16x90x160xbf16> loc(#loc16) %174 = xten_nn.subgraph (%arg5 = %173: tensor<1x16x90x160xbf16>) attributes { LayerName = "Clip_22", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "405", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Clip_22", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "408", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x16x90x160xbf16>) attributes { LayerName = "Clip_22", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "405", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Clip_22", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "408", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_22", OutputName = "Clip_22", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc17) xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc17) } -> tensor<1x16x90x160xbf16> loc(#loc17) xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc17) } -> tensor<1x16x90x160xbf16> loc(#loc17) %175 = xten_nn.subgraph (%arg5 = %174: tensor<1x16x90x160xbf16>) attributes { LayerName = "Div_24", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "408", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Div_24", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "410", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x16x90x160xbf16>) attributes { LayerName = "Div_24", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "408", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Div_24", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "410", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_24", OutputName = "Div_24", shift = 0 : i8} : (tensor<1x16x90x160xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc18) xten_nn.output %463 : tensor<1x16x90x160xbf16> loc(#loc18) } -> tensor<1x16x90x160xbf16> loc(#loc18) xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc18) } -> tensor<1x16x90x160xbf16> loc(#loc18) %176 = xten_nn.subgraph (%arg5 = %172: tensor<1x16x90x160xbf16>, %arg6 = %175: tensor<1x16x90x160xbf16>) attributes { LayerName = "Mul_25", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "928", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "401", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Mul_25", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "411", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x16x90x160xbf16>, %arg8 = %arg6: tensor<1x16x90x160xbf16>) attributes { LayerName = "Mul_25", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "928", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "401", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Mul_25", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "411", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_25", OutputName = "Mul_25", shift = 0 : i8} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc19) xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc19) } -> tensor<1x16x90x160xbf16> loc(#loc19) xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc19) } -> tensor<1x16x90x160xbf16> loc(#loc19) %177 = xten_nn.subgraph (%arg5 = %176: tensor<1x16x90x160xbf16>, %arg6 = %158: tensor<16x1x3x3xbf16>, %arg7 = %157: tensor<16xbf16>) attributes { LayerName = "Conv_26", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "411", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "928", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[16, 1, 3, 3]> : vector<4xindex> }, { Name = "411", UnknownDataFormat = true } ], OutputName = "Relu_27", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "414", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x16x90x160xbf16>, %arg9 = %arg6: tensor<16x1x3x3xbf16>, %arg10 = %arg7: tensor<16xbf16>) attributes { Dilations = array, HWPadding = [[1, 1], [1, 1]], LayerName = "Conv_26", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "411", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "928", Port = "data_io.wts", SubPort = "wts_data", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[16, 1, 3, 3]> : vector<4xindex> }, { Name = "411", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_27", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "414", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], Specializes = "DepthwiseConv2dBf16", Traits = { NonNegativeOut = true }, With = { config.act = 1 : ui8, config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.kernel_height = 3 : ui8, config.kernel_width = 3 : ui8, config.stride = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc323) %465 = tosa.transpose %arg9, %464 : (tensor<16x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x16x1xbf16> loc(#loc323) %466 = tosa.transpose %arg8, %463 : (tensor<1x16x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x16xbf16> loc(#loc323) %467 = tosa.depthwise_conv2d %466, %465, %arg10 { PartOfLayerName = "Conv_26", PartOfOutputName = "Conv_26", dilation = array, pad = array, stride = array} : (tensor<1x90x160x16xbf16>, tensor<3x3x16x1xbf16>, tensor<16xbf16>) -> tensor<1x90x160x16xbf16> loc(#loc20) %468 = tosa.clamp %467 { LayerName = "Relu_27", OutputName = "Relu_27", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x90x160x16xbf16>) -> tensor<1x90x160x16xbf16> loc(#loc21) %469 = tosa.transpose %468, %462 : (tensor<1x90x160x16xbf16>, tensor<4xi32>) -> tensor<1x16x90x160xbf16> loc(#loc323) xten_nn.output %469 : tensor<1x16x90x160xbf16> loc(#loc21) } -> tensor<1x16x90x160xbf16> loc(#loc323) xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc323) } -> tensor<1x16x90x160xbf16> loc(#loc323) %178 = xten_nn.subgraph (%arg5 = %177: tensor<1x16x90x160xbf16>, %arg6 = %156: tensor<16x16x1x1xbf16>, %arg7 = %155: tensor<16xbf16>, %arg8 = %176: tensor<1x16x90x160xbf16>) attributes { LayerName = "Conv_28", OfmShare = 3 : index, Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "414", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { Name = "931", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[16, 16, 1, 1]> : vector<4xindex> }, { Name = "935", UnknownDataFormat = true }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "414", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Add_29", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "417", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x16x90x160xbf16>, %arg10 = %arg6: tensor<16x16x1x1xbf16>, %arg11 = %arg7: tensor<16xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_28", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "414", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { Name = "931", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[16, 16, 1, 1]> : vector<4xindex> }, { Name = "935", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_28", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "934", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc22) %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<16x16x1x1xbf16>) -> tensor<16x1x1x16xbf16> loc(#loc22) %466 = tosa.transpose %arg9, %464 : (tensor<1x16x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x16xbf16> loc(#loc22) %467 = tosa.conv2d %466, %465, %arg11 { PartOfLayerName = "Conv_28", PartOfOutputName = "Conv_28", dilation = array, pad = array, stride = array} : (tensor<1x90x160x16xbf16>, tensor<16x1x1x16xbf16>, tensor<16xbf16>) -> tensor<1x90x160x16xbf16> loc(#loc22) %468 = tosa.transpose %467, %463 : (tensor<1x90x160x16xbf16>, tensor<4xi32>) -> tensor<1x16x90x160xbf16> loc(#loc22) xten_nn.output %468 : tensor<1x16x90x160xbf16> loc(#loc22) } -> tensor<1x16x90x160xbf16> loc(#loc22) %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x16x90x160xbf16>, %arg10 = %arg8: tensor<1x16x90x160xbf16>) attributes { LayerName = "Add_29", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "934", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "414", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Add_29", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "417", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], Specializes = "AddBf16", Traits = { Binary = true, Elementwise = true }, With = { config.act = 0 : ui8, config.act_type = "LINEAR", config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %463 = tosa.add %arg9, %arg10 {LayerName = "Add_29", OutputName = "Add_29"} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc23) xten_nn.output %463 : tensor<1x16x90x160xbf16> loc(#loc23) } -> tensor<1x16x90x160xbf16> loc(#loc23) xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc23) } -> tensor<1x16x90x160xbf16> loc(#loc324) %179 = xten_nn.subgraph (%arg5 = %178: tensor<1x16x90x160xbf16>, %arg6 = %154: tensor<64x16x1x1xbf16>, %arg7 = %153: tensor<64xbf16>) attributes { LayerName = "Conv_30", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "417", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { Name = "934", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[64, 16, 1, 1]> : vector<4xindex> }, { Name = "417", UnknownDataFormat = true } ], OutputName = "Relu_31", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "420", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 90, 160]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x16x90x160xbf16>, %arg9 = %arg6: tensor<64x16x1x1xbf16>, %arg10 = %arg7: tensor<64xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_30", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "417", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { Name = "934", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[64, 16, 1, 1]> : vector<4xindex> }, { Name = "417", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_31", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "420", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 90, 160]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true, NonNegativeOut = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 1 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 0.000000e+00 : bf16, config.lrelu_alpha_kernel = 0.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc325) %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<64x16x1x1xbf16>) -> tensor<64x1x1x16xbf16> loc(#loc325) %465 = tosa.transpose %arg8, %463 : (tensor<1x16x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x16xbf16> loc(#loc325) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_30", PartOfOutputName = "Conv_30", dilation = array, pad = array, stride = array} : (tensor<1x90x160x16xbf16>, tensor<64x1x1x16xbf16>, tensor<64xbf16>) -> tensor<1x90x160x64xbf16> loc(#loc24) %467 = tosa.clamp %466 { LayerName = "Relu_31", OutputName = "Relu_31", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x90x160x64xbf16>) -> tensor<1x90x160x64xbf16> loc(#loc25) %468 = tosa.transpose %467, %462 : (tensor<1x90x160x64xbf16>, tensor<4xi32>) -> tensor<1x64x90x160xbf16> loc(#loc325) xten_nn.output %468 : tensor<1x64x90x160xbf16> loc(#loc25) } -> tensor<1x64x90x160xbf16> loc(#loc325) xten_nn.output %461 : tensor<1x64x90x160xbf16> loc(#loc325) } -> tensor<1x64x90x160xbf16> loc(#loc325) %180 = xten_nn.subgraph (%arg5 = %179: tensor<1x64x90x160xbf16>, %arg6 = %152: tensor<64x1x3x3xbf16>, %arg7 = %151: tensor<64xbf16>) attributes { LayerName = "Conv_32", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "420", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "937", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[64, 1, 3, 3]> : vector<4xindex> }, { Name = "941", UnknownDataFormat = true } ], OutputName = "Relu_33", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "423", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 45, 80]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x64x90x160xbf16>, %arg9 = %arg6: tensor<64x1x3x3xbf16>, %arg10 = %arg7: tensor<64xbf16>) attributes { Dilations = array, HWPadding = [[1, 0], [1, 0]], LayerName = "Conv_32", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "420", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "937", Port = "data_io.wts", SubPort = "wts_data", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[64, 1, 3, 3]> : vector<4xindex> }, { Name = "941", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_33", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "423", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 45, 80]> : vector<4xindex> } ], Specializes = "DepthwiseConv2dBf16", Traits = { NonNegativeOut = true }, With = { config.act = 1 : ui8, config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.kernel_height = 3 : ui8, config.kernel_width = 3 : ui8, config.stride = 2 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc326) %465 = tosa.transpose %arg9, %464 : (tensor<64x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x64x1xbf16> loc(#loc326) %466 = tosa.transpose %arg8, %463 : (tensor<1x64x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x64xbf16> loc(#loc326) %467 = tosa.depthwise_conv2d %466, %465, %arg10 { PartOfLayerName = "Conv_32", PartOfOutputName = "Conv_32", dilation = array, pad = array, stride = array} : (tensor<1x90x160x64xbf16>, tensor<3x3x64x1xbf16>, tensor<64xbf16>) -> tensor<1x45x80x64xbf16> loc(#loc26) %468 = tosa.clamp %467 { LayerName = "Relu_33", OutputName = "Relu_33", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x45x80x64xbf16>) -> tensor<1x45x80x64xbf16> loc(#loc27) %469 = tosa.transpose %468, %462 : (tensor<1x45x80x64xbf16>, tensor<4xi32>) -> tensor<1x64x45x80xbf16> loc(#loc326) xten_nn.output %469 : tensor<1x64x45x80xbf16> loc(#loc27) } -> tensor<1x64x45x80xbf16> loc(#loc326) xten_nn.output %461 : tensor<1x64x45x80xbf16> loc(#loc326) } -> tensor<1x64x45x80xbf16> loc(#loc326) %181 = xten_nn.subgraph (%arg5 = %180: tensor<1x64x45x80xbf16>, %arg6 = %150: tensor<24x64x1x1xbf16>, %arg7 = %149: tensor<24xbf16>) attributes { LayerName = "Conv_34", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "423", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 45, 80]> : vector<4xindex> }, { Name = "940", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[24, 64, 1, 1]> : vector<4xindex> }, { Name = "944", UnknownDataFormat = true } ], OutputName = "Conv_34", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "943", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x64x45x80xbf16>, %arg9 = %arg6: tensor<24x64x1x1xbf16>, %arg10 = %arg7: tensor<24xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_34", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "423", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 45, 80]> : vector<4xindex> }, { Name = "940", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[24, 64, 1, 1]> : vector<4xindex> }, { Name = "944", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_34", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "943", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc28) %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<24x64x1x1xbf16>) -> tensor<24x1x1x64xbf16> loc(#loc28) %465 = tosa.transpose %arg8, %463 : (tensor<1x64x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x64xbf16> loc(#loc28) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_34", PartOfOutputName = "Conv_34", dilation = array, pad = array, stride = array} : (tensor<1x45x80x64xbf16>, tensor<24x1x1x64xbf16>, tensor<24xbf16>) -> tensor<1x45x80x24xbf16> loc(#loc28) %467 = tosa.transpose %466, %462 : (tensor<1x45x80x24xbf16>, tensor<4xi32>) -> tensor<1x24x45x80xbf16> loc(#loc28) xten_nn.output %467 : tensor<1x24x45x80xbf16> loc(#loc28) } -> tensor<1x24x45x80xbf16> loc(#loc28) xten_nn.output %461 : tensor<1x24x45x80xbf16> loc(#loc28) } -> tensor<1x24x45x80xbf16> loc(#loc28) %182 = xten_nn.subgraph (%arg5 = %181: tensor<1x24x45x80xbf16>, %arg6 = %148: tensor<72x24x1x1xbf16>, %arg7 = %147: tensor<72xbf16>) attributes { LayerName = "Conv_35", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "943", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> }, { Name = "423", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[72, 24, 1, 1]> : vector<4xindex> }, { Name = "947", UnknownDataFormat = true } ], OutputName = "Relu_36", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "428", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x24x45x80xbf16>, %arg9 = %arg6: tensor<72x24x1x1xbf16>, %arg10 = %arg7: tensor<72xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_35", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "943", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> }, { Name = "423", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[72, 24, 1, 1]> : vector<4xindex> }, { Name = "947", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_36", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "428", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true, NonNegativeOut = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 1 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 0.000000e+00 : bf16, config.lrelu_alpha_kernel = 0.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc327) %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<72x24x1x1xbf16>) -> tensor<72x1x1x24xbf16> loc(#loc327) %465 = tosa.transpose %arg8, %463 : (tensor<1x24x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x24xbf16> loc(#loc327) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_35", PartOfOutputName = "Conv_35", dilation = array, pad = array, stride = array} : (tensor<1x45x80x24xbf16>, tensor<72x1x1x24xbf16>, tensor<72xbf16>) -> tensor<1x45x80x72xbf16> loc(#loc29) %467 = tosa.clamp %466 { LayerName = "Relu_36", OutputName = "Relu_36", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x45x80x72xbf16>) -> tensor<1x45x80x72xbf16> loc(#loc30) %468 = tosa.transpose %467, %462 : (tensor<1x45x80x72xbf16>, tensor<4xi32>) -> tensor<1x72x45x80xbf16> loc(#loc327) xten_nn.output %468 : tensor<1x72x45x80xbf16> loc(#loc30) } -> tensor<1x72x45x80xbf16> loc(#loc327) xten_nn.output %461 : tensor<1x72x45x80xbf16> loc(#loc327) } -> tensor<1x72x45x80xbf16> loc(#loc327) %183 = xten_nn.subgraph (%arg5 = %182: tensor<1x72x45x80xbf16>, %arg6 = %146: tensor<72x1x3x3xbf16>, %arg7 = %145: tensor<72xbf16>) attributes { LayerName = "Conv_37", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "428", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "946", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[72, 1, 3, 3]> : vector<4xindex> }, { Name = "950", UnknownDataFormat = true } ], OutputName = "Relu_38", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "431", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x72x45x80xbf16>, %arg9 = %arg6: tensor<72x1x3x3xbf16>, %arg10 = %arg7: tensor<72xbf16>) attributes { Dilations = array, HWPadding = [[1, 1], [1, 1]], LayerName = "Conv_37", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "428", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "946", Port = "data_io.wts", SubPort = "wts_data", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[72, 1, 3, 3]> : vector<4xindex> }, { Name = "950", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_38", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "431", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> } ], Specializes = "DepthwiseConv2dBf16", Traits = { NonNegativeOut = true }, With = { config.act = 1 : ui8, config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.kernel_height = 3 : ui8, config.kernel_width = 3 : ui8, config.stride = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc328) %465 = tosa.transpose %arg9, %464 : (tensor<72x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x72x1xbf16> loc(#loc328) %466 = tosa.transpose %arg8, %463 : (tensor<1x72x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x72xbf16> loc(#loc328) %467 = tosa.depthwise_conv2d %466, %465, %arg10 { PartOfLayerName = "Conv_37", PartOfOutputName = "Conv_37", dilation = array, pad = array, stride = array} : (tensor<1x45x80x72xbf16>, tensor<3x3x72x1xbf16>, tensor<72xbf16>) -> tensor<1x45x80x72xbf16> loc(#loc31) %468 = tosa.clamp %467 { LayerName = "Relu_38", OutputName = "Relu_38", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x45x80x72xbf16>) -> tensor<1x45x80x72xbf16> loc(#loc32) %469 = tosa.transpose %468, %462 : (tensor<1x45x80x72xbf16>, tensor<4xi32>) -> tensor<1x72x45x80xbf16> loc(#loc328) xten_nn.output %469 : tensor<1x72x45x80xbf16> loc(#loc32) } -> tensor<1x72x45x80xbf16> loc(#loc328) xten_nn.output %461 : tensor<1x72x45x80xbf16> loc(#loc328) } -> tensor<1x72x45x80xbf16> loc(#loc328) %184 = xten_nn.subgraph (%arg5 = %183: tensor<1x72x45x80xbf16>, %arg6 = %144: tensor<24x72x1x1xbf16>, %arg7 = %143: tensor<24xbf16>, %arg8 = %181: tensor<1x24x45x80xbf16>) attributes { LayerName = "Conv_39", OfmShare = 3 : index, Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "431", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> }, { Name = "949", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[24, 72, 1, 1]> : vector<4xindex> }, { Name = "953", UnknownDataFormat = true }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "431", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> } ], OutputName = "Add_40", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "434", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x72x45x80xbf16>, %arg10 = %arg6: tensor<24x72x1x1xbf16>, %arg11 = %arg7: tensor<24xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_39", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "431", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> }, { Name = "949", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[24, 72, 1, 1]> : vector<4xindex> }, { Name = "953", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_39", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "952", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc33) %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<24x72x1x1xbf16>) -> tensor<24x1x1x72xbf16> loc(#loc33) %466 = tosa.transpose %arg9, %464 : (tensor<1x72x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x72xbf16> loc(#loc33) %467 = tosa.conv2d %466, %465, %arg11 { PartOfLayerName = "Conv_39", PartOfOutputName = "Conv_39", dilation = array, pad = array, stride = array} : (tensor<1x45x80x72xbf16>, tensor<24x1x1x72xbf16>, tensor<24xbf16>) -> tensor<1x45x80x24xbf16> loc(#loc33) %468 = tosa.transpose %467, %463 : (tensor<1x45x80x24xbf16>, tensor<4xi32>) -> tensor<1x24x45x80xbf16> loc(#loc33) xten_nn.output %468 : tensor<1x24x45x80xbf16> loc(#loc33) } -> tensor<1x24x45x80xbf16> loc(#loc33) %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x24x45x80xbf16>, %arg10 = %arg8: tensor<1x24x45x80xbf16>) attributes { LayerName = "Add_40", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "952", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "431", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> } ], OutputName = "Add_40", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "434", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> } ], Specializes = "AddBf16", Traits = { Binary = true, Elementwise = true }, With = { config.act = 0 : ui8, config.act_type = "LINEAR", config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %463 = tosa.add %arg9, %arg10 {LayerName = "Add_40", OutputName = "Add_40"} : (tensor<1x24x45x80xbf16>, tensor<1x24x45x80xbf16>) -> tensor<1x24x45x80xbf16> loc(#loc34) xten_nn.output %463 : tensor<1x24x45x80xbf16> loc(#loc34) } -> tensor<1x24x45x80xbf16> loc(#loc34) xten_nn.output %462 : tensor<1x24x45x80xbf16> loc(#loc34) } -> tensor<1x24x45x80xbf16> loc(#loc329) %185 = xten_nn.subgraph (%arg5 = %184: tensor<1x24x45x80xbf16>, %arg6 = %142: tensor<72x24x1x1xbf16>, %arg7 = %141: tensor<72xbf16>) attributes { LayerName = "Conv_41", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "434", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> }, { Name = "952", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[72, 24, 1, 1]> : vector<4xindex> }, { Name = "434", UnknownDataFormat = true } ], OutputName = "Relu_42", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "437", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x24x45x80xbf16>, %arg9 = %arg6: tensor<72x24x1x1xbf16>, %arg10 = %arg7: tensor<72xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_41", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "434", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> }, { Name = "952", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[72, 24, 1, 1]> : vector<4xindex> }, { Name = "434", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_42", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "437", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true, NonNegativeOut = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 1 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 0.000000e+00 : bf16, config.lrelu_alpha_kernel = 0.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc330) %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<72x24x1x1xbf16>) -> tensor<72x1x1x24xbf16> loc(#loc330) %465 = tosa.transpose %arg8, %463 : (tensor<1x24x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x24xbf16> loc(#loc330) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_41", PartOfOutputName = "Conv_41", dilation = array, pad = array, stride = array} : (tensor<1x45x80x24xbf16>, tensor<72x1x1x24xbf16>, tensor<72xbf16>) -> tensor<1x45x80x72xbf16> loc(#loc35) %467 = tosa.clamp %466 { LayerName = "Relu_42", OutputName = "Relu_42", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x45x80x72xbf16>) -> tensor<1x45x80x72xbf16> loc(#loc36) %468 = tosa.transpose %467, %462 : (tensor<1x45x80x72xbf16>, tensor<4xi32>) -> tensor<1x72x45x80xbf16> loc(#loc330) xten_nn.output %468 : tensor<1x72x45x80xbf16> loc(#loc36) } -> tensor<1x72x45x80xbf16> loc(#loc330) xten_nn.output %461 : tensor<1x72x45x80xbf16> loc(#loc330) } -> tensor<1x72x45x80xbf16> loc(#loc330) %186 = xten_nn.subgraph (%arg5 = %185: tensor<1x72x45x80xbf16>, %arg6 = %140: tensor<72x1x5x5xbf16>, %arg7 = %139: tensor<72xbf16>) attributes { LayerName = "Conv_43", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "437", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "955", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[72, 1, 5, 5]> : vector<4xindex> }, { Name = "959", UnknownDataFormat = true } ], OutputName = "Relu_44", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "440", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x72x45x80xbf16>, %arg9 = %arg6: tensor<72x1x5x5xbf16>, %arg10 = %arg7: tensor<72xbf16>) attributes { Dilations = array, HWPadding = [[2, 2], [2, 1]], LayerName = "Conv_43", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "437", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "955", Port = "data_io.wts", SubPort = "wts_data", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[72, 1, 5, 5]> : vector<4xindex> }, { Name = "959", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_44", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "440", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> } ], Specializes = "DepthwiseConv2dBf16", Traits = { NonNegativeOut = true }, With = { config.act = 1 : ui8, config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.kernel_height = 5 : ui8, config.kernel_width = 5 : ui8, config.stride = 2 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc331) %465 = tosa.transpose %arg9, %464 : (tensor<72x1x5x5xbf16>, tensor<4xi32>) -> tensor<5x5x72x1xbf16> loc(#loc331) %466 = tosa.transpose %arg8, %463 : (tensor<1x72x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x72xbf16> loc(#loc331) %467 = tosa.depthwise_conv2d %466, %465, %arg10 { PartOfLayerName = "Conv_43", PartOfOutputName = "Conv_43", dilation = array, pad = array, stride = array} : (tensor<1x45x80x72xbf16>, tensor<5x5x72x1xbf16>, tensor<72xbf16>) -> tensor<1x23x40x72xbf16> loc(#loc37) %468 = tosa.clamp %467 { LayerName = "Relu_44", OutputName = "Relu_44", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x23x40x72xbf16>) -> tensor<1x23x40x72xbf16> loc(#loc38) %469 = tosa.transpose %468, %462 : (tensor<1x23x40x72xbf16>, tensor<4xi32>) -> tensor<1x72x23x40xbf16> loc(#loc331) xten_nn.output %469 : tensor<1x72x23x40xbf16> loc(#loc38) } -> tensor<1x72x23x40xbf16> loc(#loc331) xten_nn.output %461 : tensor<1x72x23x40xbf16> loc(#loc331) } -> tensor<1x72x23x40xbf16> loc(#loc331) %187 = xten_nn.subgraph (%arg5 = %186: tensor<1x72x23x40xbf16>) attributes { LayerName = "GlobalAveragePool_45_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "440", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_45_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "441", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 920]> : vector<4xindex> } ], Specializes = "Transpose4dAdf", With = { config.aie_arch = "aie2p", config.dim_0 = 23 : ui32, config.dim_1 = 9 : ui32, config.dim_2 = 40 : ui32, config.dim_3 = 8 : ui32, config.dtype = "bfloat16", config.perm = 6 : ui32 }} { %461 = tosa.reshape %arg5 {new_shape = array} : (tensor<1x72x23x40xbf16>) -> tensor<1x72x1x920xbf16> loc(#loc39) xten_nn.output %461 : tensor<1x72x1x920xbf16> loc(#loc39) } -> tensor<1x72x1x920xbf16> loc(#loc39) %188 = xten_nn.subgraph (%arg5 = %187: tensor<1x72x1x920xbf16>) attributes { LayerName = "GlobalAveragePool_45_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "440", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 920]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_45_Duplicated#1", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "441", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x72x1x920xbf16>) attributes { LayerName = "GlobalAveragePool_45_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "440", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 920]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_45_Duplicated#1", PadValue = 0.000000e+00 : bf16, Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "441", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> } ], Specializes = "ReduceMeanC8Bf16", Traits = { Reduce = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.full_channel = 72 : ui32, config.full_height = 1 : ui32, config.full_width = 920 : ui32, config.reduce_dim = "W" }} { %462 = xten_nn.reduce_mean %arg6 {axes = array, keepdims = 1 : i64} : (tensor<1x72x1x920xbf16>) -> tensor<1x72x1x1xbf16> loc(#loc39) xten_nn.output %462 : tensor<1x72x1x1xbf16> loc(#loc39) } -> tensor<1x72x1x1xbf16> loc(#loc39) xten_nn.output %461 : tensor<1x72x1x1xbf16> loc(#loc39) } -> tensor<1x72x1x1xbf16> loc(#loc39) %189 = xten_nn.subgraph (%arg5 = %188: tensor<1x72x1x1xbf16>, %arg6 = %138: tensor<24x72x1x1xbf16>, %arg7 = %137: tensor<24xbf16>) attributes { LayerName = "Conv_46", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "441", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> }, { Name = "440", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[24, 72, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.4.block.2.fc1.weight", UnknownDataFormat = true } ], OutputName = "Relu_47", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "443", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 24, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x72x1x1xbf16>, %arg9 = %arg6: tensor<24x72x1x1xbf16>, %arg10 = %arg7: tensor<24xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_46", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "441", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> }, { Name = "440", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[24, 72, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.4.block.2.fc1.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_47", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "443", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 24, 1, 1]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true, NonNegativeOut = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 1 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 0.000000e+00 : bf16, config.lrelu_alpha_kernel = 0.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<24x72x1x1xbf16>) -> tensor<24x1x1x72xbf16> loc(#loc332) %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x72x1x1xbf16>) -> tensor<1x1x1x72xbf16> loc(#loc332) %464 = tosa.conv2d %463, %462, %arg10 { PartOfLayerName = "Conv_46", PartOfOutputName = "Conv_46", dilation = array, pad = array, stride = array} : (tensor<1x1x1x72xbf16>, tensor<24x1x1x72xbf16>, tensor<24xbf16>) -> tensor<1x1x1x24xbf16> loc(#loc40) %465 = tosa.clamp %464 { LayerName = "Relu_47", OutputName = "Relu_47", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x1x1x24xbf16>) -> tensor<1x1x1x24xbf16> loc(#loc41) %466 = tosa.reshape %465 {new_shape = array} : (tensor<1x1x1x24xbf16>) -> tensor<1x24x1x1xbf16> loc(#loc332) xten_nn.output %466 : tensor<1x24x1x1xbf16> loc(#loc41) } -> tensor<1x24x1x1xbf16> loc(#loc332) xten_nn.output %461 : tensor<1x24x1x1xbf16> loc(#loc332) } -> tensor<1x24x1x1xbf16> loc(#loc332) %190 = xten_nn.subgraph (%arg5 = %189: tensor<1x24x1x1xbf16>, %arg6 = %136: tensor<72x24x1x1xbf16>, %arg7 = %135: tensor<72xbf16>) attributes { LayerName = "Conv_48", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "443", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 24, 1, 1]> : vector<4xindex> }, { Name = "442", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[72, 24, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.4.block.2.fc2.weight", UnknownDataFormat = true } ], OutputName = "Conv_48", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "444", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x24x1x1xbf16>, %arg9 = %arg6: tensor<72x24x1x1xbf16>, %arg10 = %arg7: tensor<72xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_48", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "443", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 24, 1, 1]> : vector<4xindex> }, { Name = "442", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[72, 24, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.4.block.2.fc2.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_48", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "444", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<72x24x1x1xbf16>) -> tensor<72x1x1x24xbf16> loc(#loc42) %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x24x1x1xbf16>) -> tensor<1x1x1x24xbf16> loc(#loc42) %464 = tosa.conv2d %463, %462, %arg10 { PartOfLayerName = "Conv_48", PartOfOutputName = "Conv_48", dilation = array, pad = array, stride = array} : (tensor<1x1x1x24xbf16>, tensor<72x1x1x24xbf16>, tensor<72xbf16>) -> tensor<1x1x1x72xbf16> loc(#loc42) %465 = tosa.reshape %464 {new_shape = array} : (tensor<1x1x1x72xbf16>) -> tensor<1x72x1x1xbf16> loc(#loc42) xten_nn.output %465 : tensor<1x72x1x1xbf16> loc(#loc42) } -> tensor<1x72x1x1xbf16> loc(#loc42) xten_nn.output %461 : tensor<1x72x1x1xbf16> loc(#loc42) } -> tensor<1x72x1x1xbf16> loc(#loc42) %191 = xten_nn.subgraph (%arg5 = %190: tensor<1x72x1x1xbf16>) attributes { LayerName = "Add_50", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "444", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> } ], OutputName = "Add_50", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "446", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x72x1x1xbf16>) attributes { LayerName = "Add_50", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "444", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> } ], OutputName = "Add_50", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "446", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_50", OutputName = "Add_50"} : (tensor<1x72x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x72x1x1xbf16> loc(#loc43) xten_nn.output %463 : tensor<1x72x1x1xbf16> loc(#loc43) } -> tensor<1x72x1x1xbf16> loc(#loc43) xten_nn.output %461 : tensor<1x72x1x1xbf16> loc(#loc43) } -> tensor<1x72x1x1xbf16> loc(#loc43) %192 = xten_nn.subgraph (%arg5 = %191: tensor<1x72x1x1xbf16>) attributes { LayerName = "Clip_53", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "446", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> } ], OutputName = "Clip_53", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "449", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x72x1x1xbf16>) attributes { LayerName = "Clip_53", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "446", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> } ], OutputName = "Clip_53", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "449", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_53", OutputName = "Clip_53", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x72x1x1xbf16>) -> tensor<1x72x1x1xbf16> loc(#loc44) xten_nn.output %462 : tensor<1x72x1x1xbf16> loc(#loc44) } -> tensor<1x72x1x1xbf16> loc(#loc44) xten_nn.output %461 : tensor<1x72x1x1xbf16> loc(#loc44) } -> tensor<1x72x1x1xbf16> loc(#loc44) %193 = xten_nn.subgraph (%arg5 = %192: tensor<1x72x1x1xbf16>) attributes { LayerName = "Div_55", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "449", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> } ], OutputName = "Div_55", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "451", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x72x1x1xbf16>) attributes { LayerName = "Div_55", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "449", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> } ], OutputName = "Div_55", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "451", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_55", OutputName = "Div_55", shift = 0 : i8} : (tensor<1x72x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x72x1x1xbf16> loc(#loc45) xten_nn.output %463 : tensor<1x72x1x1xbf16> loc(#loc45) } -> tensor<1x72x1x1xbf16> loc(#loc45) xten_nn.output %461 : tensor<1x72x1x1xbf16> loc(#loc45) } -> tensor<1x72x1x1xbf16> loc(#loc45) %194 = xten_nn.subgraph (%arg5 = %193: tensor<1x72x1x1xbf16>) attributes { LayerName = "Mul_56_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "451", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> } ], OutputName = "Mul_56_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "452", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> } ], Specializes = "TileAdf", With = { config.aie_arch = "aie2p", config.dtype = "bfloat16", config.i_dim_c = 72 : ui32, config.i_dim_h = 1 : ui32, config.i_dim_n = 1 : ui32, config.i_dim_w = 1 : ui32, config.rep_dim_c = 1 : ui32, config.rep_dim_h = 23 : ui32, config.rep_dim_w = 40 : ui32 }} { %461 = tosa.tile %arg5 {multiples = array} : (tensor<1x72x1x1xbf16>) -> tensor<1x72x23x40xbf16> loc(#loc46) xten_nn.output %461 : tensor<1x72x23x40xbf16> loc(#loc46) } -> tensor<1x72x23x40xbf16> loc(#loc46) %195 = xten_nn.subgraph (%arg5 = %194: tensor<1x72x23x40xbf16>, %arg6 = %186: tensor<1x72x23x40xbf16>) attributes { LayerName = "Mul_56_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "451", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "449", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> } ], OutputName = "Mul_56_Duplicated#1", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "452", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x72x23x40xbf16>, %arg8 = %arg6: tensor<1x72x23x40xbf16>) attributes { LayerName = "Mul_56_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "451", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "449", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> } ], OutputName = "Mul_56_Duplicated#1", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "452", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_56", OutputName = "Mul_56", shift = 0 : i8} : (tensor<1x72x23x40xbf16>, tensor<1x72x23x40xbf16>) -> tensor<1x72x23x40xbf16> loc(#loc46) xten_nn.output %462 : tensor<1x72x23x40xbf16> loc(#loc46) } -> tensor<1x72x23x40xbf16> loc(#loc46) xten_nn.output %461 : tensor<1x72x23x40xbf16> loc(#loc46) } -> tensor<1x72x23x40xbf16> loc(#loc46) %196 = xten_nn.subgraph (%arg5 = %195: tensor<1x72x23x40xbf16>, %arg6 = %134: tensor<40x72x1x1xbf16>, %arg7 = %133: tensor<40xbf16>) attributes { LayerName = "Conv_57", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "452", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> }, { Name = "451", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[40, 72, 1, 1]> : vector<4xindex> }, { Name = "452", UnknownDataFormat = true } ], OutputName = "Conv_57", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "961", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x72x23x40xbf16>, %arg9 = %arg6: tensor<40x72x1x1xbf16>, %arg10 = %arg7: tensor<40xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_57", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "452", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> }, { Name = "451", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[40, 72, 1, 1]> : vector<4xindex> }, { Name = "452", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_57", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "961", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc47) %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<40x72x1x1xbf16>) -> tensor<40x1x1x72xbf16> loc(#loc47) %465 = tosa.transpose %arg8, %463 : (tensor<1x72x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x72xbf16> loc(#loc47) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_57", PartOfOutputName = "Conv_57", dilation = array, pad = array, stride = array} : (tensor<1x23x40x72xbf16>, tensor<40x1x1x72xbf16>, tensor<40xbf16>) -> tensor<1x23x40x40xbf16> loc(#loc47) %467 = tosa.transpose %466, %462 : (tensor<1x23x40x40xbf16>, tensor<4xi32>) -> tensor<1x40x23x40xbf16> loc(#loc47) xten_nn.output %467 : tensor<1x40x23x40xbf16> loc(#loc47) } -> tensor<1x40x23x40xbf16> loc(#loc47) xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc47) } -> tensor<1x40x23x40xbf16> loc(#loc47) %197 = xten_nn.subgraph (%arg5 = %196: tensor<1x40x23x40xbf16>, %arg6 = %132: tensor<120x40x1x1xbf16>, %arg7 = %131: tensor<120xbf16>) attributes { LayerName = "Conv_58", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "961", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { Name = "452", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[120, 40, 1, 1]> : vector<4xindex> }, { Name = "965", UnknownDataFormat = true } ], OutputName = "Relu_59", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "457", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x40x23x40xbf16>, %arg9 = %arg6: tensor<120x40x1x1xbf16>, %arg10 = %arg7: tensor<120xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_58", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "961", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { Name = "452", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[120, 40, 1, 1]> : vector<4xindex> }, { Name = "965", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_59", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "457", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true, NonNegativeOut = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 1 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 0.000000e+00 : bf16, config.lrelu_alpha_kernel = 0.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc333) %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<120x40x1x1xbf16>) -> tensor<120x1x1x40xbf16> loc(#loc333) %465 = tosa.transpose %arg8, %463 : (tensor<1x40x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x40xbf16> loc(#loc333) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_58", PartOfOutputName = "Conv_58", dilation = array, pad = array, stride = array} : (tensor<1x23x40x40xbf16>, tensor<120x1x1x40xbf16>, tensor<120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc48) %467 = tosa.clamp %466 { LayerName = "Relu_59", OutputName = "Relu_59", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x23x40x120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc49) %468 = tosa.transpose %467, %462 : (tensor<1x23x40x120xbf16>, tensor<4xi32>) -> tensor<1x120x23x40xbf16> loc(#loc333) xten_nn.output %468 : tensor<1x120x23x40xbf16> loc(#loc49) } -> tensor<1x120x23x40xbf16> loc(#loc333) xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc333) } -> tensor<1x120x23x40xbf16> loc(#loc333) %198 = xten_nn.subgraph (%arg5 = %197: tensor<1x120x23x40xbf16>, %arg6 = %130: tensor<120x1x5x5xbf16>, %arg7 = %129: tensor<120xbf16>) attributes { LayerName = "Conv_60", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "457", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "964", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[120, 1, 5, 5]> : vector<4xindex> }, { Name = "968", UnknownDataFormat = true } ], OutputName = "Relu_61", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "460", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x120x23x40xbf16>, %arg9 = %arg6: tensor<120x1x5x5xbf16>, %arg10 = %arg7: tensor<120xbf16>) attributes { Dilations = array, HWPadding = [[2, 2], [2, 2]], LayerName = "Conv_60", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "457", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "964", Port = "data_io.wts", SubPort = "wts_data", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[120, 1, 5, 5]> : vector<4xindex> }, { Name = "968", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_61", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "460", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> } ], Specializes = "DepthwiseConv2dBf16", Traits = { NonNegativeOut = true }, With = { config.act = 1 : ui8, config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.kernel_height = 5 : ui8, config.kernel_width = 5 : ui8, config.stride = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc334) %465 = tosa.transpose %arg9, %464 : (tensor<120x1x5x5xbf16>, tensor<4xi32>) -> tensor<5x5x120x1xbf16> loc(#loc334) %466 = tosa.transpose %arg8, %463 : (tensor<1x120x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x120xbf16> loc(#loc334) %467 = tosa.depthwise_conv2d %466, %465, %arg10 { PartOfLayerName = "Conv_60", PartOfOutputName = "Conv_60", dilation = array, pad = array, stride = array} : (tensor<1x23x40x120xbf16>, tensor<5x5x120x1xbf16>, tensor<120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc50) %468 = tosa.clamp %467 { LayerName = "Relu_61", OutputName = "Relu_61", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x23x40x120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc51) %469 = tosa.transpose %468, %462 : (tensor<1x23x40x120xbf16>, tensor<4xi32>) -> tensor<1x120x23x40xbf16> loc(#loc334) xten_nn.output %469 : tensor<1x120x23x40xbf16> loc(#loc51) } -> tensor<1x120x23x40xbf16> loc(#loc334) xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc334) } -> tensor<1x120x23x40xbf16> loc(#loc334) %199 = xten_nn.subgraph (%arg5 = %198: tensor<1x120x23x40xbf16>) attributes { LayerName = "GlobalAveragePool_62_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "460", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_62_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "461", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 920]> : vector<4xindex> } ], Specializes = "Transpose4dAdf", With = { config.aie_arch = "aie2p", config.dim_0 = 23 : ui32, config.dim_1 = 15 : ui32, config.dim_2 = 40 : ui32, config.dim_3 = 8 : ui32, config.dtype = "bfloat16", config.perm = 6 : ui32 }} { %461 = tosa.reshape %arg5 {new_shape = array} : (tensor<1x120x23x40xbf16>) -> tensor<1x120x1x920xbf16> loc(#loc52) xten_nn.output %461 : tensor<1x120x1x920xbf16> loc(#loc52) } -> tensor<1x120x1x920xbf16> loc(#loc52) %200 = xten_nn.subgraph (%arg5 = %199: tensor<1x120x1x920xbf16>) attributes { LayerName = "GlobalAveragePool_62_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "460", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 920]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_62_Duplicated#1", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "461", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x920xbf16>) attributes { LayerName = "GlobalAveragePool_62_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "460", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 920]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_62_Duplicated#1", PadValue = 0.000000e+00 : bf16, Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "461", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], Specializes = "ReduceMeanC8Bf16", Traits = { Reduce = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.full_channel = 120 : ui32, config.full_height = 1 : ui32, config.full_width = 920 : ui32, config.reduce_dim = "W" }} { %462 = xten_nn.reduce_mean %arg6 {axes = array, keepdims = 1 : i64} : (tensor<1x120x1x920xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc52) xten_nn.output %462 : tensor<1x120x1x1xbf16> loc(#loc52) } -> tensor<1x120x1x1xbf16> loc(#loc52) xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc52) } -> tensor<1x120x1x1xbf16> loc(#loc52) %201 = xten_nn.subgraph (%arg5 = %200: tensor<1x120x1x1xbf16>, %arg6 = %128: tensor<32x120x1x1xbf16>, %arg7 = %127: tensor<32xbf16>) attributes { LayerName = "Conv_63", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "461", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> }, { Name = "460", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[32, 120, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.5.block.2.fc1.weight", UnknownDataFormat = true } ], OutputName = "Relu_64", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "463", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x120x1x1xbf16>, %arg9 = %arg6: tensor<32x120x1x1xbf16>, %arg10 = %arg7: tensor<32xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_63", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "461", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> }, { Name = "460", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[32, 120, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.5.block.2.fc1.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_64", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "463", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true, NonNegativeOut = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 1 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 0.000000e+00 : bf16, config.lrelu_alpha_kernel = 0.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<32x120x1x1xbf16>) -> tensor<32x1x1x120xbf16> loc(#loc335) %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x120x1x1xbf16>) -> tensor<1x1x1x120xbf16> loc(#loc335) %464 = tosa.conv2d %463, %462, %arg10 { PartOfLayerName = "Conv_63", PartOfOutputName = "Conv_63", dilation = array, pad = array, stride = array} : (tensor<1x1x1x120xbf16>, tensor<32x1x1x120xbf16>, tensor<32xbf16>) -> tensor<1x1x1x32xbf16> loc(#loc53) %465 = tosa.clamp %464 { LayerName = "Relu_64", OutputName = "Relu_64", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x1x1x32xbf16>) -> tensor<1x1x1x32xbf16> loc(#loc54) %466 = tosa.reshape %465 {new_shape = array} : (tensor<1x1x1x32xbf16>) -> tensor<1x32x1x1xbf16> loc(#loc335) xten_nn.output %466 : tensor<1x32x1x1xbf16> loc(#loc54) } -> tensor<1x32x1x1xbf16> loc(#loc335) xten_nn.output %461 : tensor<1x32x1x1xbf16> loc(#loc335) } -> tensor<1x32x1x1xbf16> loc(#loc335) %202 = xten_nn.subgraph (%arg5 = %201: tensor<1x32x1x1xbf16>, %arg6 = %126: tensor<120x32x1x1xbf16>, %arg7 = %125: tensor<120xbf16>) attributes { LayerName = "Conv_65", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "463", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex> }, { Name = "462", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[120, 32, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.5.block.2.fc2.weight", UnknownDataFormat = true } ], OutputName = "Conv_65", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "464", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x32x1x1xbf16>, %arg9 = %arg6: tensor<120x32x1x1xbf16>, %arg10 = %arg7: tensor<120xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_65", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "463", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex> }, { Name = "462", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[120, 32, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.5.block.2.fc2.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_65", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "464", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<120x32x1x1xbf16>) -> tensor<120x1x1x32xbf16> loc(#loc55) %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x32x1x1xbf16>) -> tensor<1x1x1x32xbf16> loc(#loc55) %464 = tosa.conv2d %463, %462, %arg10 { PartOfLayerName = "Conv_65", PartOfOutputName = "Conv_65", dilation = array, pad = array, stride = array} : (tensor<1x1x1x32xbf16>, tensor<120x1x1x32xbf16>, tensor<120xbf16>) -> tensor<1x1x1x120xbf16> loc(#loc55) %465 = tosa.reshape %464 {new_shape = array} : (tensor<1x1x1x120xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc55) xten_nn.output %465 : tensor<1x120x1x1xbf16> loc(#loc55) } -> tensor<1x120x1x1xbf16> loc(#loc55) xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc55) } -> tensor<1x120x1x1xbf16> loc(#loc55) %203 = xten_nn.subgraph (%arg5 = %202: tensor<1x120x1x1xbf16>) attributes { LayerName = "Add_67", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "464", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], OutputName = "Add_67", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "466", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x1xbf16>) attributes { LayerName = "Add_67", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "464", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], OutputName = "Add_67", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "466", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_67", OutputName = "Add_67"} : (tensor<1x120x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc56) xten_nn.output %463 : tensor<1x120x1x1xbf16> loc(#loc56) } -> tensor<1x120x1x1xbf16> loc(#loc56) xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc56) } -> tensor<1x120x1x1xbf16> loc(#loc56) %204 = xten_nn.subgraph (%arg5 = %203: tensor<1x120x1x1xbf16>) attributes { LayerName = "Clip_70", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "466", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], OutputName = "Clip_70", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "469", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x1xbf16>) attributes { LayerName = "Clip_70", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "466", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], OutputName = "Clip_70", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "469", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_70", OutputName = "Clip_70", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x120x1x1xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc57) xten_nn.output %462 : tensor<1x120x1x1xbf16> loc(#loc57) } -> tensor<1x120x1x1xbf16> loc(#loc57) xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc57) } -> tensor<1x120x1x1xbf16> loc(#loc57) %205 = xten_nn.subgraph (%arg5 = %204: tensor<1x120x1x1xbf16>) attributes { LayerName = "Div_72", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "469", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], OutputName = "Div_72", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "471", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x1xbf16>) attributes { LayerName = "Div_72", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "469", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], OutputName = "Div_72", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "471", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_72", OutputName = "Div_72", shift = 0 : i8} : (tensor<1x120x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc58) xten_nn.output %463 : tensor<1x120x1x1xbf16> loc(#loc58) } -> tensor<1x120x1x1xbf16> loc(#loc58) xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc58) } -> tensor<1x120x1x1xbf16> loc(#loc58) %206 = xten_nn.subgraph (%arg5 = %205: tensor<1x120x1x1xbf16>) attributes { LayerName = "Mul_73_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "471", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], OutputName = "Mul_73_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "472", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> } ], Specializes = "TileAdf", With = { config.aie_arch = "aie2p", config.dtype = "bfloat16", config.i_dim_c = 120 : ui32, config.i_dim_h = 1 : ui32, config.i_dim_n = 1 : ui32, config.i_dim_w = 1 : ui32, config.rep_dim_c = 1 : ui32, config.rep_dim_h = 23 : ui32, config.rep_dim_w = 40 : ui32 }} { %461 = tosa.tile %arg5 {multiples = array} : (tensor<1x120x1x1xbf16>) -> tensor<1x120x23x40xbf16> loc(#loc59) xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc59) } -> tensor<1x120x23x40xbf16> loc(#loc59) %207 = xten_nn.subgraph (%arg5 = %206: tensor<1x120x23x40xbf16>, %arg6 = %198: tensor<1x120x23x40xbf16>) attributes { LayerName = "Mul_73_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "471", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "469", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> } ], OutputName = "Mul_73_Duplicated#1", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "472", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x120x23x40xbf16>, %arg8 = %arg6: tensor<1x120x23x40xbf16>) attributes { LayerName = "Mul_73_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "471", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "469", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> } ], OutputName = "Mul_73_Duplicated#1", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "472", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_73", OutputName = "Mul_73", shift = 0 : i8} : (tensor<1x120x23x40xbf16>, tensor<1x120x23x40xbf16>) -> tensor<1x120x23x40xbf16> loc(#loc59) xten_nn.output %462 : tensor<1x120x23x40xbf16> loc(#loc59) } -> tensor<1x120x23x40xbf16> loc(#loc59) xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc59) } -> tensor<1x120x23x40xbf16> loc(#loc59) %208 = xten_nn.subgraph (%arg5 = %207: tensor<1x120x23x40xbf16>, %arg6 = %124: tensor<40x120x1x1xbf16>, %arg7 = %123: tensor<40xbf16>, %arg8 = %196: tensor<1x40x23x40xbf16>) attributes { LayerName = "Conv_74", OfmShare = 3 : index, Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "472", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> }, { Name = "471", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[40, 120, 1, 1]> : vector<4xindex> }, { Name = "472", UnknownDataFormat = true }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "472", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], OutputName = "Add_75", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "475", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x120x23x40xbf16>, %arg10 = %arg6: tensor<40x120x1x1xbf16>, %arg11 = %arg7: tensor<40xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_74", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "472", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> }, { Name = "471", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[40, 120, 1, 1]> : vector<4xindex> }, { Name = "472", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_74", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "970", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc60) %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<40x120x1x1xbf16>) -> tensor<40x1x1x120xbf16> loc(#loc60) %466 = tosa.transpose %arg9, %464 : (tensor<1x120x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x120xbf16> loc(#loc60) %467 = tosa.conv2d %466, %465, %arg11 { PartOfLayerName = "Conv_74", PartOfOutputName = "Conv_74", dilation = array, pad = array, stride = array} : (tensor<1x23x40x120xbf16>, tensor<40x1x1x120xbf16>, tensor<40xbf16>) -> tensor<1x23x40x40xbf16> loc(#loc60) %468 = tosa.transpose %467, %463 : (tensor<1x23x40x40xbf16>, tensor<4xi32>) -> tensor<1x40x23x40xbf16> loc(#loc60) xten_nn.output %468 : tensor<1x40x23x40xbf16> loc(#loc60) } -> tensor<1x40x23x40xbf16> loc(#loc60) %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x40x23x40xbf16>, %arg10 = %arg8: tensor<1x40x23x40xbf16>) attributes { LayerName = "Add_75", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "970", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "472", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], OutputName = "Add_75", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "475", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], Specializes = "AddBf16", Traits = { Binary = true, Elementwise = true }, With = { config.act = 0 : ui8, config.act_type = "LINEAR", config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %463 = tosa.add %arg9, %arg10 {LayerName = "Add_75", OutputName = "Add_75"} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc61) xten_nn.output %463 : tensor<1x40x23x40xbf16> loc(#loc61) } -> tensor<1x40x23x40xbf16> loc(#loc61) xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc61) } -> tensor<1x40x23x40xbf16> loc(#loc336) %209 = xten_nn.subgraph (%arg5 = %208: tensor<1x40x23x40xbf16>, %arg6 = %122: tensor<120x40x1x1xbf16>, %arg7 = %121: tensor<120xbf16>) attributes { LayerName = "Conv_76", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "475", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { Name = "970", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[120, 40, 1, 1]> : vector<4xindex> }, { Name = "475", UnknownDataFormat = true } ], OutputName = "Relu_77", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "478", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x40x23x40xbf16>, %arg9 = %arg6: tensor<120x40x1x1xbf16>, %arg10 = %arg7: tensor<120xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_76", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "475", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { Name = "970", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[120, 40, 1, 1]> : vector<4xindex> }, { Name = "475", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_77", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "478", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true, NonNegativeOut = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 1 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 0.000000e+00 : bf16, config.lrelu_alpha_kernel = 0.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc337) %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<120x40x1x1xbf16>) -> tensor<120x1x1x40xbf16> loc(#loc337) %465 = tosa.transpose %arg8, %463 : (tensor<1x40x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x40xbf16> loc(#loc337) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_76", PartOfOutputName = "Conv_76", dilation = array, pad = array, stride = array} : (tensor<1x23x40x40xbf16>, tensor<120x1x1x40xbf16>, tensor<120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc62) %467 = tosa.clamp %466 { LayerName = "Relu_77", OutputName = "Relu_77", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x23x40x120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc63) %468 = tosa.transpose %467, %462 : (tensor<1x23x40x120xbf16>, tensor<4xi32>) -> tensor<1x120x23x40xbf16> loc(#loc337) xten_nn.output %468 : tensor<1x120x23x40xbf16> loc(#loc63) } -> tensor<1x120x23x40xbf16> loc(#loc337) xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc337) } -> tensor<1x120x23x40xbf16> loc(#loc337) %210 = xten_nn.subgraph (%arg5 = %209: tensor<1x120x23x40xbf16>, %arg6 = %120: tensor<120x1x5x5xbf16>, %arg7 = %119: tensor<120xbf16>) attributes { LayerName = "Conv_78", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "478", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "973", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[120, 1, 5, 5]> : vector<4xindex> }, { Name = "977", UnknownDataFormat = true } ], OutputName = "Relu_79", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "481", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x120x23x40xbf16>, %arg9 = %arg6: tensor<120x1x5x5xbf16>, %arg10 = %arg7: tensor<120xbf16>) attributes { Dilations = array, HWPadding = [[2, 2], [2, 2]], LayerName = "Conv_78", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "478", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "973", Port = "data_io.wts", SubPort = "wts_data", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[120, 1, 5, 5]> : vector<4xindex> }, { Name = "977", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_79", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "481", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> } ], Specializes = "DepthwiseConv2dBf16", Traits = { NonNegativeOut = true }, With = { config.act = 1 : ui8, config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.kernel_height = 5 : ui8, config.kernel_width = 5 : ui8, config.stride = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc338) %465 = tosa.transpose %arg9, %464 : (tensor<120x1x5x5xbf16>, tensor<4xi32>) -> tensor<5x5x120x1xbf16> loc(#loc338) %466 = tosa.transpose %arg8, %463 : (tensor<1x120x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x120xbf16> loc(#loc338) %467 = tosa.depthwise_conv2d %466, %465, %arg10 { PartOfLayerName = "Conv_78", PartOfOutputName = "Conv_78", dilation = array, pad = array, stride = array} : (tensor<1x23x40x120xbf16>, tensor<5x5x120x1xbf16>, tensor<120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc64) %468 = tosa.clamp %467 { LayerName = "Relu_79", OutputName = "Relu_79", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x23x40x120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc65) %469 = tosa.transpose %468, %462 : (tensor<1x23x40x120xbf16>, tensor<4xi32>) -> tensor<1x120x23x40xbf16> loc(#loc338) xten_nn.output %469 : tensor<1x120x23x40xbf16> loc(#loc65) } -> tensor<1x120x23x40xbf16> loc(#loc338) xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc338) } -> tensor<1x120x23x40xbf16> loc(#loc338) %211 = xten_nn.subgraph (%arg5 = %210: tensor<1x120x23x40xbf16>) attributes { LayerName = "GlobalAveragePool_80_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "481", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_80_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "482", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 920]> : vector<4xindex> } ], Specializes = "Transpose4dAdf", With = { config.aie_arch = "aie2p", config.dim_0 = 23 : ui32, config.dim_1 = 15 : ui32, config.dim_2 = 40 : ui32, config.dim_3 = 8 : ui32, config.dtype = "bfloat16", config.perm = 6 : ui32 }} { %461 = tosa.reshape %arg5 {new_shape = array} : (tensor<1x120x23x40xbf16>) -> tensor<1x120x1x920xbf16> loc(#loc66) xten_nn.output %461 : tensor<1x120x1x920xbf16> loc(#loc66) } -> tensor<1x120x1x920xbf16> loc(#loc66) %212 = xten_nn.subgraph (%arg5 = %211: tensor<1x120x1x920xbf16>) attributes { LayerName = "GlobalAveragePool_80_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "481", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 920]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_80_Duplicated#1", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "482", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x920xbf16>) attributes { LayerName = "GlobalAveragePool_80_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "481", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 920]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_80_Duplicated#1", PadValue = 0.000000e+00 : bf16, Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "482", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], Specializes = "ReduceMeanC8Bf16", Traits = { Reduce = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.full_channel = 120 : ui32, config.full_height = 1 : ui32, config.full_width = 920 : ui32, config.reduce_dim = "W" }} { %462 = xten_nn.reduce_mean %arg6 {axes = array, keepdims = 1 : i64} : (tensor<1x120x1x920xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc66) xten_nn.output %462 : tensor<1x120x1x1xbf16> loc(#loc66) } -> tensor<1x120x1x1xbf16> loc(#loc66) xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc66) } -> tensor<1x120x1x1xbf16> loc(#loc66) %213 = xten_nn.subgraph (%arg5 = %212: tensor<1x120x1x1xbf16>, %arg6 = %118: tensor<32x120x1x1xbf16>, %arg7 = %117: tensor<32xbf16>) attributes { LayerName = "Conv_81", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "482", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> }, { Name = "481", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[32, 120, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.6.block.2.fc1.weight", UnknownDataFormat = true } ], OutputName = "Relu_82", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "484", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x120x1x1xbf16>, %arg9 = %arg6: tensor<32x120x1x1xbf16>, %arg10 = %arg7: tensor<32xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_81", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "482", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> }, { Name = "481", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[32, 120, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.6.block.2.fc1.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_82", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "484", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true, NonNegativeOut = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 1 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 0.000000e+00 : bf16, config.lrelu_alpha_kernel = 0.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<32x120x1x1xbf16>) -> tensor<32x1x1x120xbf16> loc(#loc339) %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x120x1x1xbf16>) -> tensor<1x1x1x120xbf16> loc(#loc339) %464 = tosa.conv2d %463, %462, %arg10 { PartOfLayerName = "Conv_81", PartOfOutputName = "Conv_81", dilation = array, pad = array, stride = array} : (tensor<1x1x1x120xbf16>, tensor<32x1x1x120xbf16>, tensor<32xbf16>) -> tensor<1x1x1x32xbf16> loc(#loc67) %465 = tosa.clamp %464 { LayerName = "Relu_82", OutputName = "Relu_82", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x1x1x32xbf16>) -> tensor<1x1x1x32xbf16> loc(#loc68) %466 = tosa.reshape %465 {new_shape = array} : (tensor<1x1x1x32xbf16>) -> tensor<1x32x1x1xbf16> loc(#loc339) xten_nn.output %466 : tensor<1x32x1x1xbf16> loc(#loc68) } -> tensor<1x32x1x1xbf16> loc(#loc339) xten_nn.output %461 : tensor<1x32x1x1xbf16> loc(#loc339) } -> tensor<1x32x1x1xbf16> loc(#loc339) %214 = xten_nn.subgraph (%arg5 = %213: tensor<1x32x1x1xbf16>, %arg6 = %116: tensor<120x32x1x1xbf16>, %arg7 = %115: tensor<120xbf16>) attributes { LayerName = "Conv_83", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "484", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex> }, { Name = "483", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[120, 32, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.6.block.2.fc2.weight", UnknownDataFormat = true } ], OutputName = "Conv_83", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "485", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x32x1x1xbf16>, %arg9 = %arg6: tensor<120x32x1x1xbf16>, %arg10 = %arg7: tensor<120xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_83", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "484", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex> }, { Name = "483", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[120, 32, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.6.block.2.fc2.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_83", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "485", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<120x32x1x1xbf16>) -> tensor<120x1x1x32xbf16> loc(#loc69) %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x32x1x1xbf16>) -> tensor<1x1x1x32xbf16> loc(#loc69) %464 = tosa.conv2d %463, %462, %arg10 { PartOfLayerName = "Conv_83", PartOfOutputName = "Conv_83", dilation = array, pad = array, stride = array} : (tensor<1x1x1x32xbf16>, tensor<120x1x1x32xbf16>, tensor<120xbf16>) -> tensor<1x1x1x120xbf16> loc(#loc69) %465 = tosa.reshape %464 {new_shape = array} : (tensor<1x1x1x120xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc69) xten_nn.output %465 : tensor<1x120x1x1xbf16> loc(#loc69) } -> tensor<1x120x1x1xbf16> loc(#loc69) xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc69) } -> tensor<1x120x1x1xbf16> loc(#loc69) %215 = xten_nn.subgraph (%arg5 = %214: tensor<1x120x1x1xbf16>) attributes { LayerName = "Add_85", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "485", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], OutputName = "Add_85", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "487", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x1xbf16>) attributes { LayerName = "Add_85", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "485", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], OutputName = "Add_85", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "487", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_85", OutputName = "Add_85"} : (tensor<1x120x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc70) xten_nn.output %463 : tensor<1x120x1x1xbf16> loc(#loc70) } -> tensor<1x120x1x1xbf16> loc(#loc70) xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc70) } -> tensor<1x120x1x1xbf16> loc(#loc70) %216 = xten_nn.subgraph (%arg5 = %215: tensor<1x120x1x1xbf16>) attributes { LayerName = "Clip_88", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "487", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], OutputName = "Clip_88", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "490", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x1xbf16>) attributes { LayerName = "Clip_88", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "487", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], OutputName = "Clip_88", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "490", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_88", OutputName = "Clip_88", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x120x1x1xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc71) xten_nn.output %462 : tensor<1x120x1x1xbf16> loc(#loc71) } -> tensor<1x120x1x1xbf16> loc(#loc71) xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc71) } -> tensor<1x120x1x1xbf16> loc(#loc71) %217 = xten_nn.subgraph (%arg5 = %216: tensor<1x120x1x1xbf16>) attributes { LayerName = "Div_90", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "490", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], OutputName = "Div_90", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "492", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x1xbf16>) attributes { LayerName = "Div_90", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "490", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], OutputName = "Div_90", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "492", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_90", OutputName = "Div_90", shift = 0 : i8} : (tensor<1x120x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc72) xten_nn.output %463 : tensor<1x120x1x1xbf16> loc(#loc72) } -> tensor<1x120x1x1xbf16> loc(#loc72) xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc72) } -> tensor<1x120x1x1xbf16> loc(#loc72) %218 = xten_nn.subgraph (%arg5 = %217: tensor<1x120x1x1xbf16>) attributes { LayerName = "Mul_91_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "492", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], OutputName = "Mul_91_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "493", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> } ], Specializes = "TileAdf", With = { config.aie_arch = "aie2p", config.dtype = "bfloat16", config.i_dim_c = 120 : ui32, config.i_dim_h = 1 : ui32, config.i_dim_n = 1 : ui32, config.i_dim_w = 1 : ui32, config.rep_dim_c = 1 : ui32, config.rep_dim_h = 23 : ui32, config.rep_dim_w = 40 : ui32 }} { %461 = tosa.tile %arg5 {multiples = array} : (tensor<1x120x1x1xbf16>) -> tensor<1x120x23x40xbf16> loc(#loc73) xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc73) } -> tensor<1x120x23x40xbf16> loc(#loc73) %219 = xten_nn.subgraph (%arg5 = %218: tensor<1x120x23x40xbf16>, %arg6 = %210: tensor<1x120x23x40xbf16>) attributes { LayerName = "Mul_91_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "492", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "490", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> } ], OutputName = "Mul_91_Duplicated#1", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "493", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x120x23x40xbf16>, %arg8 = %arg6: tensor<1x120x23x40xbf16>) attributes { LayerName = "Mul_91_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "492", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "490", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> } ], OutputName = "Mul_91_Duplicated#1", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "493", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_91", OutputName = "Mul_91", shift = 0 : i8} : (tensor<1x120x23x40xbf16>, tensor<1x120x23x40xbf16>) -> tensor<1x120x23x40xbf16> loc(#loc73) xten_nn.output %462 : tensor<1x120x23x40xbf16> loc(#loc73) } -> tensor<1x120x23x40xbf16> loc(#loc73) xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc73) } -> tensor<1x120x23x40xbf16> loc(#loc73) %220 = xten_nn.subgraph (%arg5 = %219: tensor<1x120x23x40xbf16>, %arg6 = %114: tensor<40x120x1x1xbf16>, %arg7 = %113: tensor<40xbf16>, %arg8 = %208: tensor<1x40x23x40xbf16>) attributes { LayerName = "Conv_92", OfmShare = 3 : index, Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "493", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> }, { Name = "492", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[40, 120, 1, 1]> : vector<4xindex> }, { Name = "493", UnknownDataFormat = true }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "493", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], OutputName = "Add_93", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "496", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x120x23x40xbf16>, %arg10 = %arg6: tensor<40x120x1x1xbf16>, %arg11 = %arg7: tensor<40xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_92", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "493", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> }, { Name = "492", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[40, 120, 1, 1]> : vector<4xindex> }, { Name = "493", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_92", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "979", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc74) %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<40x120x1x1xbf16>) -> tensor<40x1x1x120xbf16> loc(#loc74) %466 = tosa.transpose %arg9, %464 : (tensor<1x120x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x120xbf16> loc(#loc74) %467 = tosa.conv2d %466, %465, %arg11 { PartOfLayerName = "Conv_92", PartOfOutputName = "Conv_92", dilation = array, pad = array, stride = array} : (tensor<1x23x40x120xbf16>, tensor<40x1x1x120xbf16>, tensor<40xbf16>) -> tensor<1x23x40x40xbf16> loc(#loc74) %468 = tosa.transpose %467, %463 : (tensor<1x23x40x40xbf16>, tensor<4xi32>) -> tensor<1x40x23x40xbf16> loc(#loc74) xten_nn.output %468 : tensor<1x40x23x40xbf16> loc(#loc74) } -> tensor<1x40x23x40xbf16> loc(#loc74) %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x40x23x40xbf16>, %arg10 = %arg8: tensor<1x40x23x40xbf16>) attributes { LayerName = "Add_93", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "979", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "493", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], OutputName = "Add_93", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "496", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], Specializes = "AddBf16", Traits = { Binary = true, Elementwise = true }, With = { config.act = 0 : ui8, config.act_type = "LINEAR", config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %463 = tosa.add %arg9, %arg10 {LayerName = "Add_93", OutputName = "Add_93"} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc75) xten_nn.output %463 : tensor<1x40x23x40xbf16> loc(#loc75) } -> tensor<1x40x23x40xbf16> loc(#loc75) xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc75) } -> tensor<1x40x23x40xbf16> loc(#loc340) %221 = xten_nn.subgraph (%arg5 = %220: tensor<1x40x23x40xbf16>, %arg6 = %112: tensor<240x40x1x1xbf16>, %arg7 = %111: tensor<240xbf16>) attributes { LayerName = "Conv_94", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "496", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { Name = "979", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[240, 40, 1, 1]> : vector<4xindex> }, { Name = "496", UnknownDataFormat = true } ], OutputName = "Conv_94", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "982", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x40x23x40xbf16>, %arg9 = %arg6: tensor<240x40x1x1xbf16>, %arg10 = %arg7: tensor<240xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_94", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "496", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { Name = "979", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[240, 40, 1, 1]> : vector<4xindex> }, { Name = "496", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_94", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "982", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc76) %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<240x40x1x1xbf16>) -> tensor<240x1x1x40xbf16> loc(#loc76) %465 = tosa.transpose %arg8, %463 : (tensor<1x40x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x40xbf16> loc(#loc76) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_94", PartOfOutputName = "Conv_94", dilation = array, pad = array, stride = array} : (tensor<1x23x40x40xbf16>, tensor<240x1x1x40xbf16>, tensor<240xbf16>) -> tensor<1x23x40x240xbf16> loc(#loc76) %467 = tosa.transpose %466, %462 : (tensor<1x23x40x240xbf16>, tensor<4xi32>) -> tensor<1x240x23x40xbf16> loc(#loc76) xten_nn.output %467 : tensor<1x240x23x40xbf16> loc(#loc76) } -> tensor<1x240x23x40xbf16> loc(#loc76) xten_nn.output %461 : tensor<1x240x23x40xbf16> loc(#loc76) } -> tensor<1x240x23x40xbf16> loc(#loc76) %222 = xten_nn.subgraph (%arg5 = %221: tensor<1x240x23x40xbf16>) attributes { LayerName = "Add_96", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "982", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> } ], OutputName = "Add_96", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "500", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x240x23x40xbf16>) attributes { LayerName = "Add_96", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "982", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> } ], OutputName = "Add_96", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "500", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_96", OutputName = "Add_96"} : (tensor<1x240x23x40xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x240x23x40xbf16> loc(#loc77) xten_nn.output %463 : tensor<1x240x23x40xbf16> loc(#loc77) } -> tensor<1x240x23x40xbf16> loc(#loc77) xten_nn.output %461 : tensor<1x240x23x40xbf16> loc(#loc77) } -> tensor<1x240x23x40xbf16> loc(#loc77) %223 = xten_nn.subgraph (%arg5 = %222: tensor<1x240x23x40xbf16>) attributes { LayerName = "Clip_99", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "500", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> } ], OutputName = "Clip_99", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "503", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x240x23x40xbf16>) attributes { LayerName = "Clip_99", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "500", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> } ], OutputName = "Clip_99", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "503", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_99", OutputName = "Clip_99", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x240x23x40xbf16>) -> tensor<1x240x23x40xbf16> loc(#loc78) xten_nn.output %462 : tensor<1x240x23x40xbf16> loc(#loc78) } -> tensor<1x240x23x40xbf16> loc(#loc78) xten_nn.output %461 : tensor<1x240x23x40xbf16> loc(#loc78) } -> tensor<1x240x23x40xbf16> loc(#loc78) %224 = xten_nn.subgraph (%arg5 = %223: tensor<1x240x23x40xbf16>) attributes { LayerName = "Div_101", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "503", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> } ], OutputName = "Div_101", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "505", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x240x23x40xbf16>) attributes { LayerName = "Div_101", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "503", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> } ], OutputName = "Div_101", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "505", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_101", OutputName = "Div_101", shift = 0 : i8} : (tensor<1x240x23x40xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x240x23x40xbf16> loc(#loc79) xten_nn.output %463 : tensor<1x240x23x40xbf16> loc(#loc79) } -> tensor<1x240x23x40xbf16> loc(#loc79) xten_nn.output %461 : tensor<1x240x23x40xbf16> loc(#loc79) } -> tensor<1x240x23x40xbf16> loc(#loc79) %225 = xten_nn.subgraph (%arg5 = %221: tensor<1x240x23x40xbf16>, %arg6 = %224: tensor<1x240x23x40xbf16>) attributes { LayerName = "Mul_102", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "982", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "496", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> } ], OutputName = "Mul_102", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "506", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x240x23x40xbf16>, %arg8 = %arg6: tensor<1x240x23x40xbf16>) attributes { LayerName = "Mul_102", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "982", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "496", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> } ], OutputName = "Mul_102", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "506", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_102", OutputName = "Mul_102", shift = 0 : i8} : (tensor<1x240x23x40xbf16>, tensor<1x240x23x40xbf16>) -> tensor<1x240x23x40xbf16> loc(#loc80) xten_nn.output %462 : tensor<1x240x23x40xbf16> loc(#loc80) } -> tensor<1x240x23x40xbf16> loc(#loc80) xten_nn.output %461 : tensor<1x240x23x40xbf16> loc(#loc80) } -> tensor<1x240x23x40xbf16> loc(#loc80) %226 = xten_nn.subgraph (%arg5 = %225: tensor<1x240x23x40xbf16>, %arg6 = %110: tensor<240x1x3x3xbf16>, %arg7 = %109: tensor<240xbf16>) attributes { LayerName = "Conv_103", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "506", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "982", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[240, 1, 3, 3]> : vector<4xindex> }, { Name = "506", UnknownDataFormat = true } ], OutputName = "Conv_103", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "985", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x240x23x40xbf16>, %arg9 = %arg6: tensor<240x1x3x3xbf16>, %arg10 = %arg7: tensor<240xbf16>) attributes { Dilations = array, HWPadding = [[1, 1], [1, 0]], LayerName = "Conv_103", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "506", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "982", Port = "data_io.wts", SubPort = "wts_data", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[240, 1, 3, 3]> : vector<4xindex> }, { Name = "506", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_103", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "985", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> } ], Specializes = "DepthwiseConv2dBf16", With = { config.act = 0 : ui8, config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.kernel_height = 3 : ui8, config.kernel_width = 3 : ui8, config.stride = 2 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc81) %465 = tosa.transpose %arg9, %464 : (tensor<240x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x240x1xbf16> loc(#loc81) %466 = tosa.transpose %arg8, %463 : (tensor<1x240x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x240xbf16> loc(#loc81) %467 = tosa.depthwise_conv2d %466, %465, %arg10 { PartOfLayerName = "Conv_103", PartOfOutputName = "Conv_103", dilation = array, pad = array, stride = array} : (tensor<1x23x40x240xbf16>, tensor<3x3x240x1xbf16>, tensor<240xbf16>) -> tensor<1x12x20x240xbf16> loc(#loc81) %468 = tosa.transpose %467, %462 : (tensor<1x12x20x240xbf16>, tensor<4xi32>) -> tensor<1x240x12x20xbf16> loc(#loc81) xten_nn.output %468 : tensor<1x240x12x20xbf16> loc(#loc81) } -> tensor<1x240x12x20xbf16> loc(#loc81) xten_nn.output %461 : tensor<1x240x12x20xbf16> loc(#loc81) } -> tensor<1x240x12x20xbf16> loc(#loc81) %227 = xten_nn.subgraph (%arg5 = %226: tensor<1x240x12x20xbf16>) attributes { LayerName = "Add_105", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "985", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> } ], OutputName = "Add_105", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "510", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x240x12x20xbf16>) attributes { LayerName = "Add_105", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "985", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> } ], OutputName = "Add_105", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "510", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_105", OutputName = "Add_105"} : (tensor<1x240x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x240x12x20xbf16> loc(#loc82) xten_nn.output %463 : tensor<1x240x12x20xbf16> loc(#loc82) } -> tensor<1x240x12x20xbf16> loc(#loc82) xten_nn.output %461 : tensor<1x240x12x20xbf16> loc(#loc82) } -> tensor<1x240x12x20xbf16> loc(#loc82) %228 = xten_nn.subgraph (%arg5 = %227: tensor<1x240x12x20xbf16>) attributes { LayerName = "Clip_108", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "510", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_108", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "513", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x240x12x20xbf16>) attributes { LayerName = "Clip_108", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "510", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_108", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "513", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_108", OutputName = "Clip_108", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x240x12x20xbf16>) -> tensor<1x240x12x20xbf16> loc(#loc83) xten_nn.output %462 : tensor<1x240x12x20xbf16> loc(#loc83) } -> tensor<1x240x12x20xbf16> loc(#loc83) xten_nn.output %461 : tensor<1x240x12x20xbf16> loc(#loc83) } -> tensor<1x240x12x20xbf16> loc(#loc83) %229 = xten_nn.subgraph (%arg5 = %228: tensor<1x240x12x20xbf16>) attributes { LayerName = "Div_110", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "513", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> } ], OutputName = "Div_110", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "515", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x240x12x20xbf16>) attributes { LayerName = "Div_110", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "513", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> } ], OutputName = "Div_110", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "515", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_110", OutputName = "Div_110", shift = 0 : i8} : (tensor<1x240x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x240x12x20xbf16> loc(#loc84) xten_nn.output %463 : tensor<1x240x12x20xbf16> loc(#loc84) } -> tensor<1x240x12x20xbf16> loc(#loc84) xten_nn.output %461 : tensor<1x240x12x20xbf16> loc(#loc84) } -> tensor<1x240x12x20xbf16> loc(#loc84) %230 = xten_nn.subgraph (%arg5 = %226: tensor<1x240x12x20xbf16>, %arg6 = %229: tensor<1x240x12x20xbf16>) attributes { LayerName = "Mul_111", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "985", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "506", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_111", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "516", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x240x12x20xbf16>, %arg8 = %arg6: tensor<1x240x12x20xbf16>) attributes { LayerName = "Mul_111", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "985", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "506", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_111", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "516", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_111", OutputName = "Mul_111", shift = 0 : i8} : (tensor<1x240x12x20xbf16>, tensor<1x240x12x20xbf16>) -> tensor<1x240x12x20xbf16> loc(#loc85) xten_nn.output %462 : tensor<1x240x12x20xbf16> loc(#loc85) } -> tensor<1x240x12x20xbf16> loc(#loc85) xten_nn.output %461 : tensor<1x240x12x20xbf16> loc(#loc85) } -> tensor<1x240x12x20xbf16> loc(#loc85) %231 = xten_nn.subgraph (%arg5 = %230: tensor<1x240x12x20xbf16>, %arg6 = %108: tensor<80x240x1x1xbf16>, %arg7 = %107: tensor<80xbf16>) attributes { LayerName = "Conv_112", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "516", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> }, { Name = "985", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[80, 240, 1, 1]> : vector<4xindex> }, { Name = "516", UnknownDataFormat = true } ], OutputName = "Conv_112", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "988", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x240x12x20xbf16>, %arg9 = %arg6: tensor<80x240x1x1xbf16>, %arg10 = %arg7: tensor<80xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_112", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "516", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> }, { Name = "985", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[80, 240, 1, 1]> : vector<4xindex> }, { Name = "516", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_112", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "988", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc86) %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<80x240x1x1xbf16>) -> tensor<80x1x1x240xbf16> loc(#loc86) %465 = tosa.transpose %arg8, %463 : (tensor<1x240x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x240xbf16> loc(#loc86) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_112", PartOfOutputName = "Conv_112", dilation = array, pad = array, stride = array} : (tensor<1x12x20x240xbf16>, tensor<80x1x1x240xbf16>, tensor<80xbf16>) -> tensor<1x12x20x80xbf16> loc(#loc86) %467 = tosa.transpose %466, %462 : (tensor<1x12x20x80xbf16>, tensor<4xi32>) -> tensor<1x80x12x20xbf16> loc(#loc86) xten_nn.output %467 : tensor<1x80x12x20xbf16> loc(#loc86) } -> tensor<1x80x12x20xbf16> loc(#loc86) xten_nn.output %461 : tensor<1x80x12x20xbf16> loc(#loc86) } -> tensor<1x80x12x20xbf16> loc(#loc86) %232 = xten_nn.subgraph (%arg5 = %231: tensor<1x80x12x20xbf16>, %arg6 = %106: tensor<200x80x1x1xbf16>, %arg7 = %105: tensor<200xbf16>) attributes { LayerName = "Conv_113", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "988", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> }, { Name = "516", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[200, 80, 1, 1]> : vector<4xindex> }, { Name = "992", UnknownDataFormat = true } ], OutputName = "Conv_113", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "991", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x80x12x20xbf16>, %arg9 = %arg6: tensor<200x80x1x1xbf16>, %arg10 = %arg7: tensor<200xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_113", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "988", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> }, { Name = "516", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[200, 80, 1, 1]> : vector<4xindex> }, { Name = "992", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_113", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "991", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc87) %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<200x80x1x1xbf16>) -> tensor<200x1x1x80xbf16> loc(#loc87) %465 = tosa.transpose %arg8, %463 : (tensor<1x80x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x80xbf16> loc(#loc87) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_113", PartOfOutputName = "Conv_113", dilation = array, pad = array, stride = array} : (tensor<1x12x20x80xbf16>, tensor<200x1x1x80xbf16>, tensor<200xbf16>) -> tensor<1x12x20x200xbf16> loc(#loc87) %467 = tosa.transpose %466, %462 : (tensor<1x12x20x200xbf16>, tensor<4xi32>) -> tensor<1x200x12x20xbf16> loc(#loc87) xten_nn.output %467 : tensor<1x200x12x20xbf16> loc(#loc87) } -> tensor<1x200x12x20xbf16> loc(#loc87) xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc87) } -> tensor<1x200x12x20xbf16> loc(#loc87) %233 = xten_nn.subgraph (%arg5 = %232: tensor<1x200x12x20xbf16>) attributes { LayerName = "Add_115", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "991", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], OutputName = "Add_115", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "522", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x200x12x20xbf16>) attributes { LayerName = "Add_115", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "991", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], OutputName = "Add_115", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "522", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_115", OutputName = "Add_115"} : (tensor<1x200x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc88) xten_nn.output %463 : tensor<1x200x12x20xbf16> loc(#loc88) } -> tensor<1x200x12x20xbf16> loc(#loc88) xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc88) } -> tensor<1x200x12x20xbf16> loc(#loc88) %234 = xten_nn.subgraph (%arg5 = %233: tensor<1x200x12x20xbf16>) attributes { LayerName = "Clip_118", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "522", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_118", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "525", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x200x12x20xbf16>) attributes { LayerName = "Clip_118", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "522", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_118", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "525", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_118", OutputName = "Clip_118", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x200x12x20xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc89) xten_nn.output %462 : tensor<1x200x12x20xbf16> loc(#loc89) } -> tensor<1x200x12x20xbf16> loc(#loc89) xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc89) } -> tensor<1x200x12x20xbf16> loc(#loc89) %235 = xten_nn.subgraph (%arg5 = %234: tensor<1x200x12x20xbf16>) attributes { LayerName = "Div_120", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "525", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], OutputName = "Div_120", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "527", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x200x12x20xbf16>) attributes { LayerName = "Div_120", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "525", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], OutputName = "Div_120", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "527", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_120", OutputName = "Div_120", shift = 0 : i8} : (tensor<1x200x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc90) xten_nn.output %463 : tensor<1x200x12x20xbf16> loc(#loc90) } -> tensor<1x200x12x20xbf16> loc(#loc90) xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc90) } -> tensor<1x200x12x20xbf16> loc(#loc90) %236 = xten_nn.subgraph (%arg5 = %232: tensor<1x200x12x20xbf16>, %arg6 = %235: tensor<1x200x12x20xbf16>) attributes { LayerName = "Mul_121", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "991", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "988", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_121", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "528", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x200x12x20xbf16>, %arg8 = %arg6: tensor<1x200x12x20xbf16>) attributes { LayerName = "Mul_121", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "991", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "988", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_121", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "528", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_121", OutputName = "Mul_121", shift = 0 : i8} : (tensor<1x200x12x20xbf16>, tensor<1x200x12x20xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc91) xten_nn.output %462 : tensor<1x200x12x20xbf16> loc(#loc91) } -> tensor<1x200x12x20xbf16> loc(#loc91) xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc91) } -> tensor<1x200x12x20xbf16> loc(#loc91) %237 = xten_nn.subgraph (%arg5 = %236: tensor<1x200x12x20xbf16>, %arg6 = %104: tensor<200x1x3x3xbf16>, %arg7 = %103: tensor<200xbf16>) attributes { LayerName = "Conv_122", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "528", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "991", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[200, 1, 3, 3]> : vector<4xindex> }, { Name = "528", UnknownDataFormat = true } ], OutputName = "Conv_122", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "994", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x200x12x20xbf16>, %arg9 = %arg6: tensor<200x1x3x3xbf16>, %arg10 = %arg7: tensor<200xbf16>) attributes { Dilations = array, HWPadding = [[1, 1], [1, 1]], LayerName = "Conv_122", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "528", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "991", Port = "data_io.wts", SubPort = "wts_data", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[200, 1, 3, 3]> : vector<4xindex> }, { Name = "528", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_122", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "994", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], Specializes = "DepthwiseConv2dBf16", With = { config.act = 0 : ui8, config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.kernel_height = 3 : ui8, config.kernel_width = 3 : ui8, config.stride = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc92) %465 = tosa.transpose %arg9, %464 : (tensor<200x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x200x1xbf16> loc(#loc92) %466 = tosa.transpose %arg8, %463 : (tensor<1x200x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x200xbf16> loc(#loc92) %467 = tosa.depthwise_conv2d %466, %465, %arg10 { PartOfLayerName = "Conv_122", PartOfOutputName = "Conv_122", dilation = array, pad = array, stride = array} : (tensor<1x12x20x200xbf16>, tensor<3x3x200x1xbf16>, tensor<200xbf16>) -> tensor<1x12x20x200xbf16> loc(#loc92) %468 = tosa.transpose %467, %462 : (tensor<1x12x20x200xbf16>, tensor<4xi32>) -> tensor<1x200x12x20xbf16> loc(#loc92) xten_nn.output %468 : tensor<1x200x12x20xbf16> loc(#loc92) } -> tensor<1x200x12x20xbf16> loc(#loc92) xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc92) } -> tensor<1x200x12x20xbf16> loc(#loc92) %238 = xten_nn.subgraph (%arg5 = %237: tensor<1x200x12x20xbf16>) attributes { LayerName = "Add_124", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "994", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], OutputName = "Add_124", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "532", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x200x12x20xbf16>) attributes { LayerName = "Add_124", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "994", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], OutputName = "Add_124", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "532", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_124", OutputName = "Add_124"} : (tensor<1x200x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc93) xten_nn.output %463 : tensor<1x200x12x20xbf16> loc(#loc93) } -> tensor<1x200x12x20xbf16> loc(#loc93) xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc93) } -> tensor<1x200x12x20xbf16> loc(#loc93) %239 = xten_nn.subgraph (%arg5 = %238: tensor<1x200x12x20xbf16>) attributes { LayerName = "Clip_127", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "532", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_127", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "535", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x200x12x20xbf16>) attributes { LayerName = "Clip_127", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "532", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_127", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "535", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_127", OutputName = "Clip_127", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x200x12x20xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc94) xten_nn.output %462 : tensor<1x200x12x20xbf16> loc(#loc94) } -> tensor<1x200x12x20xbf16> loc(#loc94) xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc94) } -> tensor<1x200x12x20xbf16> loc(#loc94) %240 = xten_nn.subgraph (%arg5 = %239: tensor<1x200x12x20xbf16>) attributes { LayerName = "Div_129", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "535", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], OutputName = "Div_129", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "537", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x200x12x20xbf16>) attributes { LayerName = "Div_129", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "535", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], OutputName = "Div_129", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "537", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_129", OutputName = "Div_129", shift = 0 : i8} : (tensor<1x200x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc95) xten_nn.output %463 : tensor<1x200x12x20xbf16> loc(#loc95) } -> tensor<1x200x12x20xbf16> loc(#loc95) xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc95) } -> tensor<1x200x12x20xbf16> loc(#loc95) %241 = xten_nn.subgraph (%arg5 = %237: tensor<1x200x12x20xbf16>, %arg6 = %240: tensor<1x200x12x20xbf16>) attributes { LayerName = "Mul_130", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "994", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "528", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_130", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "538", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x200x12x20xbf16>, %arg8 = %arg6: tensor<1x200x12x20xbf16>) attributes { LayerName = "Mul_130", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "994", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "528", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_130", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "538", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_130", OutputName = "Mul_130", shift = 0 : i8} : (tensor<1x200x12x20xbf16>, tensor<1x200x12x20xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc96) xten_nn.output %462 : tensor<1x200x12x20xbf16> loc(#loc96) } -> tensor<1x200x12x20xbf16> loc(#loc96) xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc96) } -> tensor<1x200x12x20xbf16> loc(#loc96) %242 = xten_nn.subgraph (%arg5 = %241: tensor<1x200x12x20xbf16>, %arg6 = %102: tensor<80x200x1x1xbf16>, %arg7 = %101: tensor<80xbf16>, %arg8 = %231: tensor<1x80x12x20xbf16>) attributes { LayerName = "Conv_131", OfmShare = 3 : index, Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "538", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> }, { Name = "994", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[80, 200, 1, 1]> : vector<4xindex> }, { Name = "538", UnknownDataFormat = true }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "538", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> } ], OutputName = "Add_132", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "541", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x200x12x20xbf16>, %arg10 = %arg6: tensor<80x200x1x1xbf16>, %arg11 = %arg7: tensor<80xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_131", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "538", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> }, { Name = "994", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[80, 200, 1, 1]> : vector<4xindex> }, { Name = "538", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_131", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "997", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc97) %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<80x200x1x1xbf16>) -> tensor<80x1x1x200xbf16> loc(#loc97) %466 = tosa.transpose %arg9, %464 : (tensor<1x200x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x200xbf16> loc(#loc97) %467 = tosa.conv2d %466, %465, %arg11 { PartOfLayerName = "Conv_131", PartOfOutputName = "Conv_131", dilation = array, pad = array, stride = array} : (tensor<1x12x20x200xbf16>, tensor<80x1x1x200xbf16>, tensor<80xbf16>) -> tensor<1x12x20x80xbf16> loc(#loc97) %468 = tosa.transpose %467, %463 : (tensor<1x12x20x80xbf16>, tensor<4xi32>) -> tensor<1x80x12x20xbf16> loc(#loc97) xten_nn.output %468 : tensor<1x80x12x20xbf16> loc(#loc97) } -> tensor<1x80x12x20xbf16> loc(#loc97) %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x80x12x20xbf16>, %arg10 = %arg8: tensor<1x80x12x20xbf16>) attributes { LayerName = "Add_132", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "997", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "538", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> } ], OutputName = "Add_132", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "541", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> } ], Specializes = "AddBf16", Traits = { Binary = true, Elementwise = true }, With = { config.act = 0 : ui8, config.act_type = "LINEAR", config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %463 = tosa.add %arg9, %arg10 {LayerName = "Add_132", OutputName = "Add_132"} : (tensor<1x80x12x20xbf16>, tensor<1x80x12x20xbf16>) -> tensor<1x80x12x20xbf16> loc(#loc98) xten_nn.output %463 : tensor<1x80x12x20xbf16> loc(#loc98) } -> tensor<1x80x12x20xbf16> loc(#loc98) xten_nn.output %462 : tensor<1x80x12x20xbf16> loc(#loc98) } -> tensor<1x80x12x20xbf16> loc(#loc341) %243 = xten_nn.subgraph (%arg5 = %242: tensor<1x80x12x20xbf16>, %arg6 = %100: tensor<184x80x1x1xbf16>, %arg7 = %99: tensor<184xbf16>) attributes { LayerName = "Conv_133", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "541", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> }, { Name = "997", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[184, 80, 1, 1]> : vector<4xindex> }, { Name = "541", UnknownDataFormat = true } ], OutputName = "Conv_133", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1000", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x80x12x20xbf16>, %arg9 = %arg6: tensor<184x80x1x1xbf16>, %arg10 = %arg7: tensor<184xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_133", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "541", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> }, { Name = "997", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[184, 80, 1, 1]> : vector<4xindex> }, { Name = "541", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_133", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1000", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc99) %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<184x80x1x1xbf16>) -> tensor<184x1x1x80xbf16> loc(#loc99) %465 = tosa.transpose %arg8, %463 : (tensor<1x80x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x80xbf16> loc(#loc99) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_133", PartOfOutputName = "Conv_133", dilation = array, pad = array, stride = array} : (tensor<1x12x20x80xbf16>, tensor<184x1x1x80xbf16>, tensor<184xbf16>) -> tensor<1x12x20x184xbf16> loc(#loc99) %467 = tosa.transpose %466, %462 : (tensor<1x12x20x184xbf16>, tensor<4xi32>) -> tensor<1x184x12x20xbf16> loc(#loc99) xten_nn.output %467 : tensor<1x184x12x20xbf16> loc(#loc99) } -> tensor<1x184x12x20xbf16> loc(#loc99) xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc99) } -> tensor<1x184x12x20xbf16> loc(#loc99) %244 = xten_nn.subgraph (%arg5 = %243: tensor<1x184x12x20xbf16>) attributes { LayerName = "Add_135", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1000", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Add_135", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "545", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { LayerName = "Add_135", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1000", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Add_135", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "545", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_135", OutputName = "Add_135"} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc100) xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc100) } -> tensor<1x184x12x20xbf16> loc(#loc100) xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc100) } -> tensor<1x184x12x20xbf16> loc(#loc100) %245 = xten_nn.subgraph (%arg5 = %244: tensor<1x184x12x20xbf16>) attributes { LayerName = "Clip_138", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "545", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_138", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "548", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { LayerName = "Clip_138", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "545", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_138", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "548", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_138", OutputName = "Clip_138", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc101) xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc101) } -> tensor<1x184x12x20xbf16> loc(#loc101) xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc101) } -> tensor<1x184x12x20xbf16> loc(#loc101) %246 = xten_nn.subgraph (%arg5 = %245: tensor<1x184x12x20xbf16>) attributes { LayerName = "Div_140", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "548", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Div_140", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "550", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { LayerName = "Div_140", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "548", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Div_140", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "550", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_140", OutputName = "Div_140", shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc102) xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc102) } -> tensor<1x184x12x20xbf16> loc(#loc102) xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc102) } -> tensor<1x184x12x20xbf16> loc(#loc102) %247 = xten_nn.subgraph (%arg5 = %243: tensor<1x184x12x20xbf16>, %arg6 = %246: tensor<1x184x12x20xbf16>) attributes { LayerName = "Mul_141", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1000", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "541", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_141", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "551", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x184x12x20xbf16>, %arg8 = %arg6: tensor<1x184x12x20xbf16>) attributes { LayerName = "Mul_141", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1000", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "541", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_141", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "551", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_141", OutputName = "Mul_141", shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc103) xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc103) } -> tensor<1x184x12x20xbf16> loc(#loc103) xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc103) } -> tensor<1x184x12x20xbf16> loc(#loc103) %248 = xten_nn.subgraph (%arg5 = %247: tensor<1x184x12x20xbf16>, %arg6 = %98: tensor<184x1x3x3xbf16>, %arg7 = %97: tensor<184xbf16>) attributes { LayerName = "Conv_142", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "551", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1000", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[184, 1, 3, 3]> : vector<4xindex> }, { Name = "551", UnknownDataFormat = true } ], OutputName = "Conv_142", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1003", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x184x12x20xbf16>, %arg9 = %arg6: tensor<184x1x3x3xbf16>, %arg10 = %arg7: tensor<184xbf16>) attributes { Dilations = array, HWPadding = [[1, 1], [1, 1]], LayerName = "Conv_142", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "551", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1000", Port = "data_io.wts", SubPort = "wts_data", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[184, 1, 3, 3]> : vector<4xindex> }, { Name = "551", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_142", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1003", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], Specializes = "DepthwiseConv2dBf16", With = { config.act = 0 : ui8, config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.kernel_height = 3 : ui8, config.kernel_width = 3 : ui8, config.stride = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc104) %465 = tosa.transpose %arg9, %464 : (tensor<184x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x184x1xbf16> loc(#loc104) %466 = tosa.transpose %arg8, %463 : (tensor<1x184x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x184xbf16> loc(#loc104) %467 = tosa.depthwise_conv2d %466, %465, %arg10 { PartOfLayerName = "Conv_142", PartOfOutputName = "Conv_142", dilation = array, pad = array, stride = array} : (tensor<1x12x20x184xbf16>, tensor<3x3x184x1xbf16>, tensor<184xbf16>) -> tensor<1x12x20x184xbf16> loc(#loc104) %468 = tosa.transpose %467, %462 : (tensor<1x12x20x184xbf16>, tensor<4xi32>) -> tensor<1x184x12x20xbf16> loc(#loc104) xten_nn.output %468 : tensor<1x184x12x20xbf16> loc(#loc104) } -> tensor<1x184x12x20xbf16> loc(#loc104) xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc104) } -> tensor<1x184x12x20xbf16> loc(#loc104) %249 = xten_nn.subgraph (%arg5 = %248: tensor<1x184x12x20xbf16>) attributes { LayerName = "Add_144", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1003", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Add_144", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "555", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { LayerName = "Add_144", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1003", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Add_144", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "555", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_144", OutputName = "Add_144"} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc105) xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc105) } -> tensor<1x184x12x20xbf16> loc(#loc105) xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc105) } -> tensor<1x184x12x20xbf16> loc(#loc105) %250 = xten_nn.subgraph (%arg5 = %249: tensor<1x184x12x20xbf16>) attributes { LayerName = "Clip_147", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "555", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_147", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "558", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { LayerName = "Clip_147", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "555", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_147", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "558", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_147", OutputName = "Clip_147", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc106) xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc106) } -> tensor<1x184x12x20xbf16> loc(#loc106) xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc106) } -> tensor<1x184x12x20xbf16> loc(#loc106) %251 = xten_nn.subgraph (%arg5 = %250: tensor<1x184x12x20xbf16>) attributes { LayerName = "Div_149", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "558", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Div_149", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "560", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { LayerName = "Div_149", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "558", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Div_149", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "560", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_149", OutputName = "Div_149", shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc107) xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc107) } -> tensor<1x184x12x20xbf16> loc(#loc107) xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc107) } -> tensor<1x184x12x20xbf16> loc(#loc107) %252 = xten_nn.subgraph (%arg5 = %248: tensor<1x184x12x20xbf16>, %arg6 = %251: tensor<1x184x12x20xbf16>) attributes { LayerName = "Mul_150", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1003", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "551", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_150", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "561", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x184x12x20xbf16>, %arg8 = %arg6: tensor<1x184x12x20xbf16>) attributes { LayerName = "Mul_150", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1003", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "551", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_150", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "561", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_150", OutputName = "Mul_150", shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc108) xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc108) } -> tensor<1x184x12x20xbf16> loc(#loc108) xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc108) } -> tensor<1x184x12x20xbf16> loc(#loc108) %253 = xten_nn.subgraph (%arg5 = %252: tensor<1x184x12x20xbf16>, %arg6 = %96: tensor<80x184x1x1xbf16>, %arg7 = %95: tensor<80xbf16>, %arg8 = %242: tensor<1x80x12x20xbf16>) attributes { LayerName = "Conv_151", OfmShare = 3 : index, Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "561", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> }, { Name = "1003", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[80, 184, 1, 1]> : vector<4xindex> }, { Name = "561", UnknownDataFormat = true }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "561", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> } ], OutputName = "Add_152", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "564", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x184x12x20xbf16>, %arg10 = %arg6: tensor<80x184x1x1xbf16>, %arg11 = %arg7: tensor<80xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_151", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "561", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> }, { Name = "1003", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[80, 184, 1, 1]> : vector<4xindex> }, { Name = "561", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_151", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1006", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc109) %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<80x184x1x1xbf16>) -> tensor<80x1x1x184xbf16> loc(#loc109) %466 = tosa.transpose %arg9, %464 : (tensor<1x184x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x184xbf16> loc(#loc109) %467 = tosa.conv2d %466, %465, %arg11 { PartOfLayerName = "Conv_151", PartOfOutputName = "Conv_151", dilation = array, pad = array, stride = array} : (tensor<1x12x20x184xbf16>, tensor<80x1x1x184xbf16>, tensor<80xbf16>) -> tensor<1x12x20x80xbf16> loc(#loc109) %468 = tosa.transpose %467, %463 : (tensor<1x12x20x80xbf16>, tensor<4xi32>) -> tensor<1x80x12x20xbf16> loc(#loc109) xten_nn.output %468 : tensor<1x80x12x20xbf16> loc(#loc109) } -> tensor<1x80x12x20xbf16> loc(#loc109) %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x80x12x20xbf16>, %arg10 = %arg8: tensor<1x80x12x20xbf16>) attributes { LayerName = "Add_152", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1006", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "561", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> } ], OutputName = "Add_152", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "564", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> } ], Specializes = "AddBf16", Traits = { Binary = true, Elementwise = true }, With = { config.act = 0 : ui8, config.act_type = "LINEAR", config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %463 = tosa.add %arg9, %arg10 {LayerName = "Add_152", OutputName = "Add_152"} : (tensor<1x80x12x20xbf16>, tensor<1x80x12x20xbf16>) -> tensor<1x80x12x20xbf16> loc(#loc110) xten_nn.output %463 : tensor<1x80x12x20xbf16> loc(#loc110) } -> tensor<1x80x12x20xbf16> loc(#loc110) xten_nn.output %462 : tensor<1x80x12x20xbf16> loc(#loc110) } -> tensor<1x80x12x20xbf16> loc(#loc342) %254 = xten_nn.subgraph (%arg5 = %253: tensor<1x80x12x20xbf16>, %arg6 = %94: tensor<184x80x1x1xbf16>, %arg7 = %93: tensor<184xbf16>) attributes { LayerName = "Conv_153", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "564", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> }, { Name = "1006", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[184, 80, 1, 1]> : vector<4xindex> }, { Name = "564", UnknownDataFormat = true } ], OutputName = "Conv_153", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1009", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x80x12x20xbf16>, %arg9 = %arg6: tensor<184x80x1x1xbf16>, %arg10 = %arg7: tensor<184xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_153", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "564", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> }, { Name = "1006", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[184, 80, 1, 1]> : vector<4xindex> }, { Name = "564", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_153", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1009", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc111) %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<184x80x1x1xbf16>) -> tensor<184x1x1x80xbf16> loc(#loc111) %465 = tosa.transpose %arg8, %463 : (tensor<1x80x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x80xbf16> loc(#loc111) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_153", PartOfOutputName = "Conv_153", dilation = array, pad = array, stride = array} : (tensor<1x12x20x80xbf16>, tensor<184x1x1x80xbf16>, tensor<184xbf16>) -> tensor<1x12x20x184xbf16> loc(#loc111) %467 = tosa.transpose %466, %462 : (tensor<1x12x20x184xbf16>, tensor<4xi32>) -> tensor<1x184x12x20xbf16> loc(#loc111) xten_nn.output %467 : tensor<1x184x12x20xbf16> loc(#loc111) } -> tensor<1x184x12x20xbf16> loc(#loc111) xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc111) } -> tensor<1x184x12x20xbf16> loc(#loc111) %255 = xten_nn.subgraph (%arg5 = %254: tensor<1x184x12x20xbf16>) attributes { LayerName = "Add_155", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1009", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Add_155", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "568", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { LayerName = "Add_155", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1009", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Add_155", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "568", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_155", OutputName = "Add_155"} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc112) xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc112) } -> tensor<1x184x12x20xbf16> loc(#loc112) xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc112) } -> tensor<1x184x12x20xbf16> loc(#loc112) %256 = xten_nn.subgraph (%arg5 = %255: tensor<1x184x12x20xbf16>) attributes { LayerName = "Clip_158", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "568", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_158", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "571", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { LayerName = "Clip_158", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "568", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_158", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "571", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_158", OutputName = "Clip_158", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc113) xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc113) } -> tensor<1x184x12x20xbf16> loc(#loc113) xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc113) } -> tensor<1x184x12x20xbf16> loc(#loc113) %257 = xten_nn.subgraph (%arg5 = %256: tensor<1x184x12x20xbf16>) attributes { LayerName = "Div_160", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "571", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Div_160", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "573", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { LayerName = "Div_160", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "571", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Div_160", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "573", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_160", OutputName = "Div_160", shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc114) xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc114) } -> tensor<1x184x12x20xbf16> loc(#loc114) xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc114) } -> tensor<1x184x12x20xbf16> loc(#loc114) %258 = xten_nn.subgraph (%arg5 = %254: tensor<1x184x12x20xbf16>, %arg6 = %257: tensor<1x184x12x20xbf16>) attributes { LayerName = "Mul_161", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1009", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "564", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_161", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "574", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x184x12x20xbf16>, %arg8 = %arg6: tensor<1x184x12x20xbf16>) attributes { LayerName = "Mul_161", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1009", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "564", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_161", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "574", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_161", OutputName = "Mul_161", shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc115) xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc115) } -> tensor<1x184x12x20xbf16> loc(#loc115) xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc115) } -> tensor<1x184x12x20xbf16> loc(#loc115) %259 = xten_nn.subgraph (%arg5 = %258: tensor<1x184x12x20xbf16>, %arg6 = %92: tensor<184x1x3x3xbf16>, %arg7 = %91: tensor<184xbf16>) attributes { LayerName = "Conv_162", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "574", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1009", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[184, 1, 3, 3]> : vector<4xindex> }, { Name = "574", UnknownDataFormat = true } ], OutputName = "Conv_162", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1012", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x184x12x20xbf16>, %arg9 = %arg6: tensor<184x1x3x3xbf16>, %arg10 = %arg7: tensor<184xbf16>) attributes { Dilations = array, HWPadding = [[1, 1], [1, 1]], LayerName = "Conv_162", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "574", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1009", Port = "data_io.wts", SubPort = "wts_data", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[184, 1, 3, 3]> : vector<4xindex> }, { Name = "574", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_162", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1012", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], Specializes = "DepthwiseConv2dBf16", With = { config.act = 0 : ui8, config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.kernel_height = 3 : ui8, config.kernel_width = 3 : ui8, config.stride = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc116) %465 = tosa.transpose %arg9, %464 : (tensor<184x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x184x1xbf16> loc(#loc116) %466 = tosa.transpose %arg8, %463 : (tensor<1x184x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x184xbf16> loc(#loc116) %467 = tosa.depthwise_conv2d %466, %465, %arg10 { PartOfLayerName = "Conv_162", PartOfOutputName = "Conv_162", dilation = array, pad = array, stride = array} : (tensor<1x12x20x184xbf16>, tensor<3x3x184x1xbf16>, tensor<184xbf16>) -> tensor<1x12x20x184xbf16> loc(#loc116) %468 = tosa.transpose %467, %462 : (tensor<1x12x20x184xbf16>, tensor<4xi32>) -> tensor<1x184x12x20xbf16> loc(#loc116) xten_nn.output %468 : tensor<1x184x12x20xbf16> loc(#loc116) } -> tensor<1x184x12x20xbf16> loc(#loc116) xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc116) } -> tensor<1x184x12x20xbf16> loc(#loc116) %260 = xten_nn.subgraph (%arg5 = %259: tensor<1x184x12x20xbf16>) attributes { LayerName = "Add_164", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1012", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Add_164", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "578", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { LayerName = "Add_164", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1012", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Add_164", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "578", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_164", OutputName = "Add_164"} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc117) xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc117) } -> tensor<1x184x12x20xbf16> loc(#loc117) xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc117) } -> tensor<1x184x12x20xbf16> loc(#loc117) %261 = xten_nn.subgraph (%arg5 = %260: tensor<1x184x12x20xbf16>) attributes { LayerName = "Clip_167", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "578", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_167", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "581", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { LayerName = "Clip_167", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "578", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_167", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "581", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_167", OutputName = "Clip_167", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc118) xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc118) } -> tensor<1x184x12x20xbf16> loc(#loc118) xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc118) } -> tensor<1x184x12x20xbf16> loc(#loc118) %262 = xten_nn.subgraph (%arg5 = %261: tensor<1x184x12x20xbf16>) attributes { LayerName = "Div_169", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "581", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Div_169", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "583", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { LayerName = "Div_169", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "581", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Div_169", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "583", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_169", OutputName = "Div_169", shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc119) xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc119) } -> tensor<1x184x12x20xbf16> loc(#loc119) xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc119) } -> tensor<1x184x12x20xbf16> loc(#loc119) %263 = xten_nn.subgraph (%arg5 = %259: tensor<1x184x12x20xbf16>, %arg6 = %262: tensor<1x184x12x20xbf16>) attributes { LayerName = "Mul_170", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1012", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "574", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_170", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "584", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x184x12x20xbf16>, %arg8 = %arg6: tensor<1x184x12x20xbf16>) attributes { LayerName = "Mul_170", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1012", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "574", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_170", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "584", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_170", OutputName = "Mul_170", shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc120) xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc120) } -> tensor<1x184x12x20xbf16> loc(#loc120) xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc120) } -> tensor<1x184x12x20xbf16> loc(#loc120) %264 = xten_nn.subgraph (%arg5 = %263: tensor<1x184x12x20xbf16>, %arg6 = %90: tensor<80x184x1x1xbf16>, %arg7 = %89: tensor<80xbf16>, %arg8 = %253: tensor<1x80x12x20xbf16>) attributes { LayerName = "Conv_171", OfmShare = 3 : index, Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "584", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> }, { Name = "1012", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[80, 184, 1, 1]> : vector<4xindex> }, { Name = "584", UnknownDataFormat = true }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "584", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> } ], OutputName = "Add_172", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "587", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x184x12x20xbf16>, %arg10 = %arg6: tensor<80x184x1x1xbf16>, %arg11 = %arg7: tensor<80xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_171", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "584", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> }, { Name = "1012", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[80, 184, 1, 1]> : vector<4xindex> }, { Name = "584", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_171", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1015", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc121) %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<80x184x1x1xbf16>) -> tensor<80x1x1x184xbf16> loc(#loc121) %466 = tosa.transpose %arg9, %464 : (tensor<1x184x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x184xbf16> loc(#loc121) %467 = tosa.conv2d %466, %465, %arg11 { PartOfLayerName = "Conv_171", PartOfOutputName = "Conv_171", dilation = array, pad = array, stride = array} : (tensor<1x12x20x184xbf16>, tensor<80x1x1x184xbf16>, tensor<80xbf16>) -> tensor<1x12x20x80xbf16> loc(#loc121) %468 = tosa.transpose %467, %463 : (tensor<1x12x20x80xbf16>, tensor<4xi32>) -> tensor<1x80x12x20xbf16> loc(#loc121) xten_nn.output %468 : tensor<1x80x12x20xbf16> loc(#loc121) } -> tensor<1x80x12x20xbf16> loc(#loc121) %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x80x12x20xbf16>, %arg10 = %arg8: tensor<1x80x12x20xbf16>) attributes { LayerName = "Add_172", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1015", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "584", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> } ], OutputName = "Add_172", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "587", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> } ], Specializes = "AddBf16", Traits = { Binary = true, Elementwise = true }, With = { config.act = 0 : ui8, config.act_type = "LINEAR", config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %463 = tosa.add %arg9, %arg10 {LayerName = "Add_172", OutputName = "Add_172"} : (tensor<1x80x12x20xbf16>, tensor<1x80x12x20xbf16>) -> tensor<1x80x12x20xbf16> loc(#loc122) xten_nn.output %463 : tensor<1x80x12x20xbf16> loc(#loc122) } -> tensor<1x80x12x20xbf16> loc(#loc122) xten_nn.output %462 : tensor<1x80x12x20xbf16> loc(#loc122) } -> tensor<1x80x12x20xbf16> loc(#loc343) %265 = xten_nn.subgraph (%arg5 = %264: tensor<1x80x12x20xbf16>, %arg6 = %88: tensor<480x80x1x1xbf16>, %arg7 = %87: tensor<480xbf16>) attributes { LayerName = "Conv_173", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "587", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> }, { Name = "1015", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[480, 80, 1, 1]> : vector<4xindex> }, { Name = "587", UnknownDataFormat = true } ], OutputName = "Conv_173", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1018", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x80x12x20xbf16>, %arg9 = %arg6: tensor<480x80x1x1xbf16>, %arg10 = %arg7: tensor<480xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_173", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "587", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> }, { Name = "1015", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[480, 80, 1, 1]> : vector<4xindex> }, { Name = "587", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_173", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1018", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc123) %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<480x80x1x1xbf16>) -> tensor<480x1x1x80xbf16> loc(#loc123) %465 = tosa.transpose %arg8, %463 : (tensor<1x80x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x80xbf16> loc(#loc123) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_173", PartOfOutputName = "Conv_173", dilation = array, pad = array, stride = array} : (tensor<1x12x20x80xbf16>, tensor<480x1x1x80xbf16>, tensor<480xbf16>) -> tensor<1x12x20x480xbf16> loc(#loc123) %467 = tosa.transpose %466, %462 : (tensor<1x12x20x480xbf16>, tensor<4xi32>) -> tensor<1x480x12x20xbf16> loc(#loc123) xten_nn.output %467 : tensor<1x480x12x20xbf16> loc(#loc123) } -> tensor<1x480x12x20xbf16> loc(#loc123) xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc123) } -> tensor<1x480x12x20xbf16> loc(#loc123) %266 = xten_nn.subgraph (%arg5 = %265: tensor<1x480x12x20xbf16>) attributes { LayerName = "Add_175", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1018", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], OutputName = "Add_175", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "591", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x12x20xbf16>) attributes { LayerName = "Add_175", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1018", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], OutputName = "Add_175", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "591", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_175", OutputName = "Add_175"} : (tensor<1x480x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc124) xten_nn.output %463 : tensor<1x480x12x20xbf16> loc(#loc124) } -> tensor<1x480x12x20xbf16> loc(#loc124) xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc124) } -> tensor<1x480x12x20xbf16> loc(#loc124) %267 = xten_nn.subgraph (%arg5 = %266: tensor<1x480x12x20xbf16>) attributes { LayerName = "Clip_178", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "591", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_178", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "594", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x12x20xbf16>) attributes { LayerName = "Clip_178", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "591", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_178", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "594", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_178", OutputName = "Clip_178", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x480x12x20xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc125) xten_nn.output %462 : tensor<1x480x12x20xbf16> loc(#loc125) } -> tensor<1x480x12x20xbf16> loc(#loc125) xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc125) } -> tensor<1x480x12x20xbf16> loc(#loc125) %268 = xten_nn.subgraph (%arg5 = %267: tensor<1x480x12x20xbf16>) attributes { LayerName = "Div_180", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "594", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], OutputName = "Div_180", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "596", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x12x20xbf16>) attributes { LayerName = "Div_180", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "594", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], OutputName = "Div_180", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "596", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_180", OutputName = "Div_180", shift = 0 : i8} : (tensor<1x480x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc126) xten_nn.output %463 : tensor<1x480x12x20xbf16> loc(#loc126) } -> tensor<1x480x12x20xbf16> loc(#loc126) xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc126) } -> tensor<1x480x12x20xbf16> loc(#loc126) %269 = xten_nn.subgraph (%arg5 = %265: tensor<1x480x12x20xbf16>, %arg6 = %268: tensor<1x480x12x20xbf16>) attributes { LayerName = "Mul_181", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1018", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "587", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_181", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "597", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x480x12x20xbf16>, %arg8 = %arg6: tensor<1x480x12x20xbf16>) attributes { LayerName = "Mul_181", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1018", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "587", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_181", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "597", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_181", OutputName = "Mul_181", shift = 0 : i8} : (tensor<1x480x12x20xbf16>, tensor<1x480x12x20xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc127) xten_nn.output %462 : tensor<1x480x12x20xbf16> loc(#loc127) } -> tensor<1x480x12x20xbf16> loc(#loc127) xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc127) } -> tensor<1x480x12x20xbf16> loc(#loc127) %270 = xten_nn.subgraph (%arg5 = %269: tensor<1x480x12x20xbf16>, %arg6 = %86: tensor<480x1x3x3xbf16>, %arg7 = %85: tensor<480xbf16>) attributes { LayerName = "Conv_182", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "597", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1018", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[480, 1, 3, 3]> : vector<4xindex> }, { Name = "597", UnknownDataFormat = true } ], OutputName = "Conv_182", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1021", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x480x12x20xbf16>, %arg9 = %arg6: tensor<480x1x3x3xbf16>, %arg10 = %arg7: tensor<480xbf16>) attributes { Dilations = array, HWPadding = [[1, 1], [1, 1]], LayerName = "Conv_182", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "597", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1018", Port = "data_io.wts", SubPort = "wts_data", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[480, 1, 3, 3]> : vector<4xindex> }, { Name = "597", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_182", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1021", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], Specializes = "DepthwiseConv2dBf16", With = { config.act = 0 : ui8, config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.kernel_height = 3 : ui8, config.kernel_width = 3 : ui8, config.stride = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc128) %465 = tosa.transpose %arg9, %464 : (tensor<480x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x480x1xbf16> loc(#loc128) %466 = tosa.transpose %arg8, %463 : (tensor<1x480x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x480xbf16> loc(#loc128) %467 = tosa.depthwise_conv2d %466, %465, %arg10 { PartOfLayerName = "Conv_182", PartOfOutputName = "Conv_182", dilation = array, pad = array, stride = array} : (tensor<1x12x20x480xbf16>, tensor<3x3x480x1xbf16>, tensor<480xbf16>) -> tensor<1x12x20x480xbf16> loc(#loc128) %468 = tosa.transpose %467, %462 : (tensor<1x12x20x480xbf16>, tensor<4xi32>) -> tensor<1x480x12x20xbf16> loc(#loc128) xten_nn.output %468 : tensor<1x480x12x20xbf16> loc(#loc128) } -> tensor<1x480x12x20xbf16> loc(#loc128) xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc128) } -> tensor<1x480x12x20xbf16> loc(#loc128) %271 = xten_nn.subgraph (%arg5 = %270: tensor<1x480x12x20xbf16>) attributes { LayerName = "Add_184", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1021", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], OutputName = "Add_184", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "601", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x12x20xbf16>) attributes { LayerName = "Add_184", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1021", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], OutputName = "Add_184", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "601", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_184", OutputName = "Add_184"} : (tensor<1x480x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc129) xten_nn.output %463 : tensor<1x480x12x20xbf16> loc(#loc129) } -> tensor<1x480x12x20xbf16> loc(#loc129) xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc129) } -> tensor<1x480x12x20xbf16> loc(#loc129) %272 = xten_nn.subgraph (%arg5 = %271: tensor<1x480x12x20xbf16>) attributes { LayerName = "Clip_187", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "601", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_187", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "604", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x12x20xbf16>) attributes { LayerName = "Clip_187", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "601", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_187", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "604", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_187", OutputName = "Clip_187", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x480x12x20xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc130) xten_nn.output %462 : tensor<1x480x12x20xbf16> loc(#loc130) } -> tensor<1x480x12x20xbf16> loc(#loc130) xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc130) } -> tensor<1x480x12x20xbf16> loc(#loc130) %273 = xten_nn.subgraph (%arg5 = %272: tensor<1x480x12x20xbf16>) attributes { LayerName = "Div_189", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "604", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], OutputName = "Div_189", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "606", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x12x20xbf16>) attributes { LayerName = "Div_189", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "604", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], OutputName = "Div_189", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "606", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_189", OutputName = "Div_189", shift = 0 : i8} : (tensor<1x480x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc131) xten_nn.output %463 : tensor<1x480x12x20xbf16> loc(#loc131) } -> tensor<1x480x12x20xbf16> loc(#loc131) xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc131) } -> tensor<1x480x12x20xbf16> loc(#loc131) %274 = xten_nn.subgraph (%arg5 = %270: tensor<1x480x12x20xbf16>, %arg6 = %273: tensor<1x480x12x20xbf16>) attributes { LayerName = "Mul_190_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1021", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "597", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_190", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "607", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x480x12x20xbf16>, %arg8 = %arg6: tensor<1x480x12x20xbf16>) attributes { LayerName = "Mul_190_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1021", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "597", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_190", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "607", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_190", OutputName = "Mul_190", shift = 0 : i8} : (tensor<1x480x12x20xbf16>, tensor<1x480x12x20xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc132) xten_nn.output %462 : tensor<1x480x12x20xbf16> loc(#loc132) } -> tensor<1x480x12x20xbf16> loc(#loc132) xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc132) } -> tensor<1x480x12x20xbf16> loc(#loc132) %275 = xten_nn.subgraph (%arg5 = %274: tensor<1x480x12x20xbf16>) attributes { LayerName = "Mul_190_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1021", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_191_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "608", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 240]> : vector<4xindex> } ], Specializes = "Transpose4dAdf", With = { config.aie_arch = "aie2p", config.dim_0 = 12 : ui32, config.dim_1 = 60 : ui32, config.dim_2 = 20 : ui32, config.dim_3 = 8 : ui32, config.dtype = "bfloat16", config.perm = 6 : ui32 }} { %461 = tosa.reshape %arg5 {new_shape = array} : (tensor<1x480x12x20xbf16>) -> tensor<1x480x1x240xbf16> loc(#loc344) xten_nn.output %461 : tensor<1x480x1x240xbf16> loc(#loc344) } -> tensor<1x480x1x240xbf16> loc(#loc344) %276 = xten_nn.subgraph (%arg5 = %275: tensor<1x480x1x240xbf16>) attributes { LayerName = "GlobalAveragePool_191", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "607", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 240]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_191_Duplicated#1", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "608", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x1x240xbf16>) attributes { LayerName = "GlobalAveragePool_191", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "607", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 240]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_191_Duplicated#1", PadValue = 0.000000e+00 : bf16, Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "608", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> } ], Specializes = "ReduceMeanC8Bf16", Traits = { Reduce = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.full_channel = 480 : ui32, config.full_height = 1 : ui32, config.full_width = 240 : ui32, config.reduce_dim = "W" }} { %462 = xten_nn.reduce_mean %arg6 {axes = array, keepdims = 1 : i64} : (tensor<1x480x1x240xbf16>) -> tensor<1x480x1x1xbf16> loc(#loc133) xten_nn.output %462 : tensor<1x480x1x1xbf16> loc(#loc133) } -> tensor<1x480x1x1xbf16> loc(#loc133) xten_nn.output %461 : tensor<1x480x1x1xbf16> loc(#loc133) } -> tensor<1x480x1x1xbf16> loc(#loc133) %277 = xten_nn.subgraph (%arg5 = %276: tensor<1x480x1x1xbf16>, %arg6 = %84: tensor<120x480x1x1xbf16>, %arg7 = %83: tensor<120xbf16>) attributes { LayerName = "Conv_192", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "608", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> }, { Name = "607", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[120, 480, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.11.block.2.fc1.weight", UnknownDataFormat = true } ], OutputName = "Relu_193", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "610", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x480x1x1xbf16>, %arg9 = %arg6: tensor<120x480x1x1xbf16>, %arg10 = %arg7: tensor<120xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_192", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "608", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> }, { Name = "607", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[120, 480, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.11.block.2.fc1.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_193", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "610", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true, NonNegativeOut = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 1 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 0.000000e+00 : bf16, config.lrelu_alpha_kernel = 0.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<120x480x1x1xbf16>) -> tensor<120x1x1x480xbf16> loc(#loc345) %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x480x1x1xbf16>) -> tensor<1x1x1x480xbf16> loc(#loc345) %464 = tosa.conv2d %463, %462, %arg10 { PartOfLayerName = "Conv_192", PartOfOutputName = "Conv_192", dilation = array, pad = array, stride = array} : (tensor<1x1x1x480xbf16>, tensor<120x1x1x480xbf16>, tensor<120xbf16>) -> tensor<1x1x1x120xbf16> loc(#loc134) %465 = tosa.clamp %464 { LayerName = "Relu_193", OutputName = "Relu_193", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x1x1x120xbf16>) -> tensor<1x1x1x120xbf16> loc(#loc135) %466 = tosa.reshape %465 {new_shape = array} : (tensor<1x1x1x120xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc345) xten_nn.output %466 : tensor<1x120x1x1xbf16> loc(#loc135) } -> tensor<1x120x1x1xbf16> loc(#loc345) xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc345) } -> tensor<1x120x1x1xbf16> loc(#loc345) %278 = xten_nn.subgraph (%arg5 = %277: tensor<1x120x1x1xbf16>, %arg6 = %82: tensor<480x120x1x1xbf16>, %arg7 = %81: tensor<480xbf16>) attributes { LayerName = "Conv_194", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "610", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> }, { Name = "609", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[480, 120, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.11.block.2.fc2.weight", UnknownDataFormat = true } ], OutputName = "Conv_194", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "611", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x120x1x1xbf16>, %arg9 = %arg6: tensor<480x120x1x1xbf16>, %arg10 = %arg7: tensor<480xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_194", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "610", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> }, { Name = "609", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[480, 120, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.11.block.2.fc2.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_194", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "611", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<480x120x1x1xbf16>) -> tensor<480x1x1x120xbf16> loc(#loc136) %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x120x1x1xbf16>) -> tensor<1x1x1x120xbf16> loc(#loc136) %464 = tosa.conv2d %463, %462, %arg10 { PartOfLayerName = "Conv_194", PartOfOutputName = "Conv_194", dilation = array, pad = array, stride = array} : (tensor<1x1x1x120xbf16>, tensor<480x1x1x120xbf16>, tensor<480xbf16>) -> tensor<1x1x1x480xbf16> loc(#loc136) %465 = tosa.reshape %464 {new_shape = array} : (tensor<1x1x1x480xbf16>) -> tensor<1x480x1x1xbf16> loc(#loc136) xten_nn.output %465 : tensor<1x480x1x1xbf16> loc(#loc136) } -> tensor<1x480x1x1xbf16> loc(#loc136) xten_nn.output %461 : tensor<1x480x1x1xbf16> loc(#loc136) } -> tensor<1x480x1x1xbf16> loc(#loc136) %279 = xten_nn.subgraph (%arg5 = %278: tensor<1x480x1x1xbf16>) attributes { LayerName = "Add_196", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "611", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> } ], OutputName = "Add_196", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "613", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x1x1xbf16>) attributes { LayerName = "Add_196", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "611", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> } ], OutputName = "Add_196", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "613", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_196", OutputName = "Add_196"} : (tensor<1x480x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x480x1x1xbf16> loc(#loc137) xten_nn.output %463 : tensor<1x480x1x1xbf16> loc(#loc137) } -> tensor<1x480x1x1xbf16> loc(#loc137) xten_nn.output %461 : tensor<1x480x1x1xbf16> loc(#loc137) } -> tensor<1x480x1x1xbf16> loc(#loc137) %280 = xten_nn.subgraph (%arg5 = %279: tensor<1x480x1x1xbf16>) attributes { LayerName = "Clip_199", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "613", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> } ], OutputName = "Clip_199", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "616", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x1x1xbf16>) attributes { LayerName = "Clip_199", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "613", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> } ], OutputName = "Clip_199", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "616", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_199", OutputName = "Clip_199", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x480x1x1xbf16>) -> tensor<1x480x1x1xbf16> loc(#loc138) xten_nn.output %462 : tensor<1x480x1x1xbf16> loc(#loc138) } -> tensor<1x480x1x1xbf16> loc(#loc138) xten_nn.output %461 : tensor<1x480x1x1xbf16> loc(#loc138) } -> tensor<1x480x1x1xbf16> loc(#loc138) %281 = xten_nn.subgraph (%arg5 = %280: tensor<1x480x1x1xbf16>) attributes { LayerName = "Div_201", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "616", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> } ], OutputName = "Div_201", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "618", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x1x1xbf16>) attributes { LayerName = "Div_201", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "616", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> } ], OutputName = "Div_201", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "618", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_201", OutputName = "Div_201", shift = 0 : i8} : (tensor<1x480x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x480x1x1xbf16> loc(#loc139) xten_nn.output %463 : tensor<1x480x1x1xbf16> loc(#loc139) } -> tensor<1x480x1x1xbf16> loc(#loc139) xten_nn.output %461 : tensor<1x480x1x1xbf16> loc(#loc139) } -> tensor<1x480x1x1xbf16> loc(#loc139) %282 = xten_nn.subgraph (%arg5 = %281: tensor<1x480x1x1xbf16>) attributes { LayerName = "Mul_202_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "618", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> } ], OutputName = "Mul_202_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "619", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], Specializes = "TileAdf", With = { config.aie_arch = "aie2p", config.dtype = "bfloat16", config.i_dim_c = 480 : ui32, config.i_dim_h = 1 : ui32, config.i_dim_n = 1 : ui32, config.i_dim_w = 1 : ui32, config.rep_dim_c = 1 : ui32, config.rep_dim_h = 12 : ui32, config.rep_dim_w = 20 : ui32 }} { %461 = tosa.tile %arg5 {multiples = array} : (tensor<1x480x1x1xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc140) xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc140) } -> tensor<1x480x12x20xbf16> loc(#loc140) %283 = xten_nn.subgraph (%arg5 = %282: tensor<1x480x12x20xbf16>, %arg6 = %274: tensor<1x480x12x20xbf16>) attributes { LayerName = "Mul_202_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "618", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "616", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_202_Duplicated#1", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "619", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x480x12x20xbf16>, %arg8 = %arg6: tensor<1x480x12x20xbf16>) attributes { LayerName = "Mul_202_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "618", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "616", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_202_Duplicated#1", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "619", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_202", OutputName = "Mul_202", shift = 0 : i8} : (tensor<1x480x12x20xbf16>, tensor<1x480x12x20xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc140) xten_nn.output %462 : tensor<1x480x12x20xbf16> loc(#loc140) } -> tensor<1x480x12x20xbf16> loc(#loc140) xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc140) } -> tensor<1x480x12x20xbf16> loc(#loc140) %284 = xten_nn.subgraph (%arg5 = %283: tensor<1x480x12x20xbf16>, %arg6 = %80: tensor<112x480x1x1xbf16>, %arg7 = %79: tensor<112xbf16>) attributes { LayerName = "Conv_203", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "619", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> }, { Name = "618", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[112, 480, 1, 1]> : vector<4xindex> }, { Name = "619", UnknownDataFormat = true } ], OutputName = "Conv_203", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1024", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x480x12x20xbf16>, %arg9 = %arg6: tensor<112x480x1x1xbf16>, %arg10 = %arg7: tensor<112xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_203", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "619", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> }, { Name = "618", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[112, 480, 1, 1]> : vector<4xindex> }, { Name = "619", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_203", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1024", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc141) %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<112x480x1x1xbf16>) -> tensor<112x1x1x480xbf16> loc(#loc141) %465 = tosa.transpose %arg8, %463 : (tensor<1x480x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x480xbf16> loc(#loc141) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_203", PartOfOutputName = "Conv_203", dilation = array, pad = array, stride = array} : (tensor<1x12x20x480xbf16>, tensor<112x1x1x480xbf16>, tensor<112xbf16>) -> tensor<1x12x20x112xbf16> loc(#loc141) %467 = tosa.transpose %466, %462 : (tensor<1x12x20x112xbf16>, tensor<4xi32>) -> tensor<1x112x12x20xbf16> loc(#loc141) xten_nn.output %467 : tensor<1x112x12x20xbf16> loc(#loc141) } -> tensor<1x112x12x20xbf16> loc(#loc141) xten_nn.output %461 : tensor<1x112x12x20xbf16> loc(#loc141) } -> tensor<1x112x12x20xbf16> loc(#loc141) %285 = xten_nn.subgraph (%arg5 = %284: tensor<1x112x12x20xbf16>, %arg6 = %78: tensor<672x112x1x1xbf16>, %arg7 = %77: tensor<672xbf16>) attributes { LayerName = "Conv_204", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1024", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> }, { Name = "619", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[672, 112, 1, 1]> : vector<4xindex> }, { Name = "1028", UnknownDataFormat = true } ], OutputName = "Conv_204", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1027", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x112x12x20xbf16>, %arg9 = %arg6: tensor<672x112x1x1xbf16>, %arg10 = %arg7: tensor<672xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_204", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1024", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> }, { Name = "619", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[672, 112, 1, 1]> : vector<4xindex> }, { Name = "1028", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_204", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1027", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc142) %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<672x112x1x1xbf16>) -> tensor<672x1x1x112xbf16> loc(#loc142) %465 = tosa.transpose %arg8, %463 : (tensor<1x112x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x112xbf16> loc(#loc142) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_204", PartOfOutputName = "Conv_204", dilation = array, pad = array, stride = array} : (tensor<1x12x20x112xbf16>, tensor<672x1x1x112xbf16>, tensor<672xbf16>) -> tensor<1x12x20x672xbf16> loc(#loc142) %467 = tosa.transpose %466, %462 : (tensor<1x12x20x672xbf16>, tensor<4xi32>) -> tensor<1x672x12x20xbf16> loc(#loc142) xten_nn.output %467 : tensor<1x672x12x20xbf16> loc(#loc142) } -> tensor<1x672x12x20xbf16> loc(#loc142) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc142) } -> tensor<1x672x12x20xbf16> loc(#loc142) %286 = xten_nn.subgraph (%arg5 = %285: tensor<1x672x12x20xbf16>) attributes { LayerName = "Add_206", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1027", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Add_206", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "625", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { LayerName = "Add_206", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1027", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Add_206", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "625", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_206", OutputName = "Add_206"} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc143) xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc143) } -> tensor<1x672x12x20xbf16> loc(#loc143) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc143) } -> tensor<1x672x12x20xbf16> loc(#loc143) %287 = xten_nn.subgraph (%arg5 = %286: tensor<1x672x12x20xbf16>) attributes { LayerName = "Clip_209", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "625", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_209", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "628", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { LayerName = "Clip_209", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "625", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_209", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "628", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_209", OutputName = "Clip_209", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc144) xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc144) } -> tensor<1x672x12x20xbf16> loc(#loc144) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc144) } -> tensor<1x672x12x20xbf16> loc(#loc144) %288 = xten_nn.subgraph (%arg5 = %287: tensor<1x672x12x20xbf16>) attributes { LayerName = "Div_211", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "628", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Div_211", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "630", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { LayerName = "Div_211", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "628", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Div_211", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "630", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_211", OutputName = "Div_211", shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc145) xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc145) } -> tensor<1x672x12x20xbf16> loc(#loc145) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc145) } -> tensor<1x672x12x20xbf16> loc(#loc145) %289 = xten_nn.subgraph (%arg5 = %285: tensor<1x672x12x20xbf16>, %arg6 = %288: tensor<1x672x12x20xbf16>) attributes { LayerName = "Mul_212", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1027", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1024", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_212", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "631", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x672x12x20xbf16>, %arg8 = %arg6: tensor<1x672x12x20xbf16>) attributes { LayerName = "Mul_212", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1027", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1024", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_212", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "631", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_212", OutputName = "Mul_212", shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc146) xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc146) } -> tensor<1x672x12x20xbf16> loc(#loc146) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc146) } -> tensor<1x672x12x20xbf16> loc(#loc146) %290 = xten_nn.subgraph (%arg5 = %289: tensor<1x672x12x20xbf16>, %arg6 = %76: tensor<672x1x3x3xbf16>, %arg7 = %75: tensor<672xbf16>) attributes { LayerName = "Conv_213", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "631", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1027", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[672, 1, 3, 3]> : vector<4xindex> }, { Name = "631", UnknownDataFormat = true } ], OutputName = "Conv_213", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1030", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x672x12x20xbf16>, %arg9 = %arg6: tensor<672x1x3x3xbf16>, %arg10 = %arg7: tensor<672xbf16>) attributes { Dilations = array, HWPadding = [[1, 1], [1, 1]], LayerName = "Conv_213", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "631", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1027", Port = "data_io.wts", SubPort = "wts_data", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[672, 1, 3, 3]> : vector<4xindex> }, { Name = "631", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_213", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1030", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "DepthwiseConv2dBf16", With = { config.act = 0 : ui8, config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.kernel_height = 3 : ui8, config.kernel_width = 3 : ui8, config.stride = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc147) %465 = tosa.transpose %arg9, %464 : (tensor<672x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x672x1xbf16> loc(#loc147) %466 = tosa.transpose %arg8, %463 : (tensor<1x672x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x672xbf16> loc(#loc147) %467 = tosa.depthwise_conv2d %466, %465, %arg10 { PartOfLayerName = "Conv_213", PartOfOutputName = "Conv_213", dilation = array, pad = array, stride = array} : (tensor<1x12x20x672xbf16>, tensor<3x3x672x1xbf16>, tensor<672xbf16>) -> tensor<1x12x20x672xbf16> loc(#loc147) %468 = tosa.transpose %467, %462 : (tensor<1x12x20x672xbf16>, tensor<4xi32>) -> tensor<1x672x12x20xbf16> loc(#loc147) xten_nn.output %468 : tensor<1x672x12x20xbf16> loc(#loc147) } -> tensor<1x672x12x20xbf16> loc(#loc147) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc147) } -> tensor<1x672x12x20xbf16> loc(#loc147) %291 = xten_nn.subgraph (%arg5 = %290: tensor<1x672x12x20xbf16>) attributes { LayerName = "Add_215", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1030", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Add_215", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "635", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { LayerName = "Add_215", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1030", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Add_215", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "635", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_215", OutputName = "Add_215"} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc148) xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc148) } -> tensor<1x672x12x20xbf16> loc(#loc148) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc148) } -> tensor<1x672x12x20xbf16> loc(#loc148) %292 = xten_nn.subgraph (%arg5 = %291: tensor<1x672x12x20xbf16>) attributes { LayerName = "Clip_218", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "635", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_218", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "638", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { LayerName = "Clip_218", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "635", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_218", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "638", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_218", OutputName = "Clip_218", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc149) xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc149) } -> tensor<1x672x12x20xbf16> loc(#loc149) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc149) } -> tensor<1x672x12x20xbf16> loc(#loc149) %293 = xten_nn.subgraph (%arg5 = %292: tensor<1x672x12x20xbf16>) attributes { LayerName = "Div_220", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "638", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Div_220", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "640", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { LayerName = "Div_220", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "638", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Div_220", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "640", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_220", OutputName = "Div_220", shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc150) xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc150) } -> tensor<1x672x12x20xbf16> loc(#loc150) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc150) } -> tensor<1x672x12x20xbf16> loc(#loc150) %294 = xten_nn.subgraph (%arg5 = %290: tensor<1x672x12x20xbf16>, %arg6 = %293: tensor<1x672x12x20xbf16>) attributes { LayerName = "Mul_221_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1030", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "631", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_221", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "641", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x672x12x20xbf16>, %arg8 = %arg6: tensor<1x672x12x20xbf16>) attributes { LayerName = "Mul_221_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1030", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "631", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_221", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "641", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_221", OutputName = "Mul_221", shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc151) xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc151) } -> tensor<1x672x12x20xbf16> loc(#loc151) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc151) } -> tensor<1x672x12x20xbf16> loc(#loc151) %295 = xten_nn.subgraph (%arg5 = %294: tensor<1x672x12x20xbf16>) attributes { LayerName = "Mul_221_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1030", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_222_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "642", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 240]> : vector<4xindex> } ], Specializes = "Transpose4dAdf", With = { config.aie_arch = "aie2p", config.dim_0 = 12 : ui32, config.dim_1 = 84 : ui32, config.dim_2 = 20 : ui32, config.dim_3 = 8 : ui32, config.dtype = "bfloat16", config.perm = 6 : ui32 }} { %461 = tosa.reshape %arg5 {new_shape = array} : (tensor<1x672x12x20xbf16>) -> tensor<1x672x1x240xbf16> loc(#loc346) xten_nn.output %461 : tensor<1x672x1x240xbf16> loc(#loc346) } -> tensor<1x672x1x240xbf16> loc(#loc346) %296 = xten_nn.subgraph (%arg5 = %295: tensor<1x672x1x240xbf16>) attributes { LayerName = "GlobalAveragePool_222", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "641", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 240]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_222_Duplicated#1", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "642", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x240xbf16>) attributes { LayerName = "GlobalAveragePool_222", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "641", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 240]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_222_Duplicated#1", PadValue = 0.000000e+00 : bf16, Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "642", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], Specializes = "ReduceMeanC8Bf16", Traits = { Reduce = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.full_channel = 672 : ui32, config.full_height = 1 : ui32, config.full_width = 240 : ui32, config.reduce_dim = "W" }} { %462 = xten_nn.reduce_mean %arg6 {axes = array, keepdims = 1 : i64} : (tensor<1x672x1x240xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc152) xten_nn.output %462 : tensor<1x672x1x1xbf16> loc(#loc152) } -> tensor<1x672x1x1xbf16> loc(#loc152) xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc152) } -> tensor<1x672x1x1xbf16> loc(#loc152) %297 = xten_nn.subgraph (%arg5 = %296: tensor<1x672x1x1xbf16>, %arg6 = %74: tensor<168x672x1x1xbf16>, %arg7 = %73: tensor<168xbf16>) attributes { LayerName = "Conv_223", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "642", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> }, { Name = "641", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[168, 672, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.12.block.2.fc1.weight", UnknownDataFormat = true } ], OutputName = "Relu_224", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "644", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x672x1x1xbf16>, %arg9 = %arg6: tensor<168x672x1x1xbf16>, %arg10 = %arg7: tensor<168xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_223", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "642", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> }, { Name = "641", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[168, 672, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.12.block.2.fc1.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_224", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "644", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true, NonNegativeOut = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 1 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 0.000000e+00 : bf16, config.lrelu_alpha_kernel = 0.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<168x672x1x1xbf16>) -> tensor<168x1x1x672xbf16> loc(#loc347) %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x672x1x1xbf16>) -> tensor<1x1x1x672xbf16> loc(#loc347) %464 = tosa.conv2d %463, %462, %arg10 { PartOfLayerName = "Conv_223", PartOfOutputName = "Conv_223", dilation = array, pad = array, stride = array} : (tensor<1x1x1x672xbf16>, tensor<168x1x1x672xbf16>, tensor<168xbf16>) -> tensor<1x1x1x168xbf16> loc(#loc153) %465 = tosa.clamp %464 { LayerName = "Relu_224", OutputName = "Relu_224", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x1x1x168xbf16>) -> tensor<1x1x1x168xbf16> loc(#loc154) %466 = tosa.reshape %465 {new_shape = array} : (tensor<1x1x1x168xbf16>) -> tensor<1x168x1x1xbf16> loc(#loc347) xten_nn.output %466 : tensor<1x168x1x1xbf16> loc(#loc154) } -> tensor<1x168x1x1xbf16> loc(#loc347) xten_nn.output %461 : tensor<1x168x1x1xbf16> loc(#loc347) } -> tensor<1x168x1x1xbf16> loc(#loc347) %298 = xten_nn.subgraph (%arg5 = %297: tensor<1x168x1x1xbf16>, %arg6 = %72: tensor<672x168x1x1xbf16>, %arg7 = %71: tensor<672xbf16>) attributes { LayerName = "Conv_225", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "644", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex> }, { Name = "643", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[672, 168, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.12.block.2.fc2.weight", UnknownDataFormat = true } ], OutputName = "Conv_225", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "645", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x168x1x1xbf16>, %arg9 = %arg6: tensor<672x168x1x1xbf16>, %arg10 = %arg7: tensor<672xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_225", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "644", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex> }, { Name = "643", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[672, 168, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.12.block.2.fc2.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_225", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "645", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<672x168x1x1xbf16>) -> tensor<672x1x1x168xbf16> loc(#loc155) %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x168x1x1xbf16>) -> tensor<1x1x1x168xbf16> loc(#loc155) %464 = tosa.conv2d %463, %462, %arg10 { PartOfLayerName = "Conv_225", PartOfOutputName = "Conv_225", dilation = array, pad = array, stride = array} : (tensor<1x1x1x168xbf16>, tensor<672x1x1x168xbf16>, tensor<672xbf16>) -> tensor<1x1x1x672xbf16> loc(#loc155) %465 = tosa.reshape %464 {new_shape = array} : (tensor<1x1x1x672xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc155) xten_nn.output %465 : tensor<1x672x1x1xbf16> loc(#loc155) } -> tensor<1x672x1x1xbf16> loc(#loc155) xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc155) } -> tensor<1x672x1x1xbf16> loc(#loc155) %299 = xten_nn.subgraph (%arg5 = %298: tensor<1x672x1x1xbf16>) attributes { LayerName = "Add_227", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "645", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], OutputName = "Add_227", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "647", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x1xbf16>) attributes { LayerName = "Add_227", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "645", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], OutputName = "Add_227", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "647", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_227", OutputName = "Add_227"} : (tensor<1x672x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc156) xten_nn.output %463 : tensor<1x672x1x1xbf16> loc(#loc156) } -> tensor<1x672x1x1xbf16> loc(#loc156) xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc156) } -> tensor<1x672x1x1xbf16> loc(#loc156) %300 = xten_nn.subgraph (%arg5 = %299: tensor<1x672x1x1xbf16>) attributes { LayerName = "Clip_230", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "647", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], OutputName = "Clip_230", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "650", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x1xbf16>) attributes { LayerName = "Clip_230", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "647", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], OutputName = "Clip_230", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "650", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_230", OutputName = "Clip_230", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x672x1x1xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc157) xten_nn.output %462 : tensor<1x672x1x1xbf16> loc(#loc157) } -> tensor<1x672x1x1xbf16> loc(#loc157) xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc157) } -> tensor<1x672x1x1xbf16> loc(#loc157) %301 = xten_nn.subgraph (%arg5 = %300: tensor<1x672x1x1xbf16>) attributes { LayerName = "Div_232", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "650", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], OutputName = "Div_232", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "652", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x1xbf16>) attributes { LayerName = "Div_232", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "650", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], OutputName = "Div_232", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "652", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_232", OutputName = "Div_232", shift = 0 : i8} : (tensor<1x672x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc158) xten_nn.output %463 : tensor<1x672x1x1xbf16> loc(#loc158) } -> tensor<1x672x1x1xbf16> loc(#loc158) xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc158) } -> tensor<1x672x1x1xbf16> loc(#loc158) %302 = xten_nn.subgraph (%arg5 = %301: tensor<1x672x1x1xbf16>) attributes { LayerName = "Mul_233_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "652", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], OutputName = "Mul_233_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "653", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "TileAdf", With = { config.aie_arch = "aie2p", config.dtype = "bfloat16", config.i_dim_c = 672 : ui32, config.i_dim_h = 1 : ui32, config.i_dim_n = 1 : ui32, config.i_dim_w = 1 : ui32, config.rep_dim_c = 1 : ui32, config.rep_dim_h = 12 : ui32, config.rep_dim_w = 20 : ui32 }} { %461 = tosa.tile %arg5 {multiples = array} : (tensor<1x672x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc159) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc159) } -> tensor<1x672x12x20xbf16> loc(#loc159) %303 = xten_nn.subgraph (%arg5 = %302: tensor<1x672x12x20xbf16>, %arg6 = %294: tensor<1x672x12x20xbf16>) attributes { LayerName = "Mul_233_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "652", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "650", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_233_Duplicated#1", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "653", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x672x12x20xbf16>, %arg8 = %arg6: tensor<1x672x12x20xbf16>) attributes { LayerName = "Mul_233_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "652", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "650", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_233_Duplicated#1", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "653", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_233", OutputName = "Mul_233", shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc159) xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc159) } -> tensor<1x672x12x20xbf16> loc(#loc159) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc159) } -> tensor<1x672x12x20xbf16> loc(#loc159) %304 = xten_nn.subgraph (%arg5 = %303: tensor<1x672x12x20xbf16>, %arg6 = %70: tensor<112x672x1x1xbf16>, %arg7 = %69: tensor<112xbf16>, %arg8 = %284: tensor<1x112x12x20xbf16>) attributes { LayerName = "Conv_234", OfmShare = 3 : index, Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "653", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> }, { Name = "652", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[112, 672, 1, 1]> : vector<4xindex> }, { Name = "653", UnknownDataFormat = true }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "653", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> } ], OutputName = "Add_235", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "656", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x672x12x20xbf16>, %arg10 = %arg6: tensor<112x672x1x1xbf16>, %arg11 = %arg7: tensor<112xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_234", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "653", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> }, { Name = "652", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[112, 672, 1, 1]> : vector<4xindex> }, { Name = "653", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_234", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1033", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc160) %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<112x672x1x1xbf16>) -> tensor<112x1x1x672xbf16> loc(#loc160) %466 = tosa.transpose %arg9, %464 : (tensor<1x672x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x672xbf16> loc(#loc160) %467 = tosa.conv2d %466, %465, %arg11 { PartOfLayerName = "Conv_234", PartOfOutputName = "Conv_234", dilation = array, pad = array, stride = array} : (tensor<1x12x20x672xbf16>, tensor<112x1x1x672xbf16>, tensor<112xbf16>) -> tensor<1x12x20x112xbf16> loc(#loc160) %468 = tosa.transpose %467, %463 : (tensor<1x12x20x112xbf16>, tensor<4xi32>) -> tensor<1x112x12x20xbf16> loc(#loc160) xten_nn.output %468 : tensor<1x112x12x20xbf16> loc(#loc160) } -> tensor<1x112x12x20xbf16> loc(#loc160) %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x112x12x20xbf16>, %arg10 = %arg8: tensor<1x112x12x20xbf16>) attributes { LayerName = "Add_235", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1033", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "653", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> } ], OutputName = "Add_235", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "656", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> } ], Specializes = "AddBf16", Traits = { Binary = true, Elementwise = true }, With = { config.act = 0 : ui8, config.act_type = "LINEAR", config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %463 = tosa.add %arg9, %arg10 {LayerName = "Add_235", OutputName = "Add_235"} : (tensor<1x112x12x20xbf16>, tensor<1x112x12x20xbf16>) -> tensor<1x112x12x20xbf16> loc(#loc161) xten_nn.output %463 : tensor<1x112x12x20xbf16> loc(#loc161) } -> tensor<1x112x12x20xbf16> loc(#loc161) xten_nn.output %462 : tensor<1x112x12x20xbf16> loc(#loc161) } -> tensor<1x112x12x20xbf16> loc(#loc348) %305 = xten_nn.subgraph (%arg5 = %304: tensor<1x112x12x20xbf16>, %arg6 = %68: tensor<672x112x1x1xbf16>, %arg7 = %67: tensor<672xbf16>) attributes { LayerName = "Conv_236", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "656", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> }, { Name = "1033", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[672, 112, 1, 1]> : vector<4xindex> }, { Name = "656", UnknownDataFormat = true } ], OutputName = "Conv_236", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1036", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x112x12x20xbf16>, %arg9 = %arg6: tensor<672x112x1x1xbf16>, %arg10 = %arg7: tensor<672xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_236", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "656", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> }, { Name = "1033", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[672, 112, 1, 1]> : vector<4xindex> }, { Name = "656", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_236", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1036", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc162) %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<672x112x1x1xbf16>) -> tensor<672x1x1x112xbf16> loc(#loc162) %465 = tosa.transpose %arg8, %463 : (tensor<1x112x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x112xbf16> loc(#loc162) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_236", PartOfOutputName = "Conv_236", dilation = array, pad = array, stride = array} : (tensor<1x12x20x112xbf16>, tensor<672x1x1x112xbf16>, tensor<672xbf16>) -> tensor<1x12x20x672xbf16> loc(#loc162) %467 = tosa.transpose %466, %462 : (tensor<1x12x20x672xbf16>, tensor<4xi32>) -> tensor<1x672x12x20xbf16> loc(#loc162) xten_nn.output %467 : tensor<1x672x12x20xbf16> loc(#loc162) } -> tensor<1x672x12x20xbf16> loc(#loc162) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc162) } -> tensor<1x672x12x20xbf16> loc(#loc162) %306 = xten_nn.subgraph (%arg5 = %305: tensor<1x672x12x20xbf16>) attributes { LayerName = "Add_238", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1036", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Add_238", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "660", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { LayerName = "Add_238", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1036", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Add_238", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "660", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_238", OutputName = "Add_238"} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc163) xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc163) } -> tensor<1x672x12x20xbf16> loc(#loc163) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc163) } -> tensor<1x672x12x20xbf16> loc(#loc163) %307 = xten_nn.subgraph (%arg5 = %306: tensor<1x672x12x20xbf16>) attributes { LayerName = "Clip_241", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "660", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_241", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "663", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { LayerName = "Clip_241", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "660", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_241", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "663", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_241", OutputName = "Clip_241", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc164) xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc164) } -> tensor<1x672x12x20xbf16> loc(#loc164) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc164) } -> tensor<1x672x12x20xbf16> loc(#loc164) %308 = xten_nn.subgraph (%arg5 = %307: tensor<1x672x12x20xbf16>) attributes { LayerName = "Div_243", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "663", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Div_243", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "665", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { LayerName = "Div_243", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "663", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Div_243", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "665", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_243", OutputName = "Div_243", shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc165) xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc165) } -> tensor<1x672x12x20xbf16> loc(#loc165) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc165) } -> tensor<1x672x12x20xbf16> loc(#loc165) %309 = xten_nn.subgraph (%arg5 = %305: tensor<1x672x12x20xbf16>, %arg6 = %308: tensor<1x672x12x20xbf16>) attributes { LayerName = "Mul_244", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1036", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "656", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_244", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "666", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x672x12x20xbf16>, %arg8 = %arg6: tensor<1x672x12x20xbf16>) attributes { LayerName = "Mul_244", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1036", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "656", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_244", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "666", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_244", OutputName = "Mul_244", shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc166) xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc166) } -> tensor<1x672x12x20xbf16> loc(#loc166) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc166) } -> tensor<1x672x12x20xbf16> loc(#loc166) %310 = xten_nn.subgraph (%arg5 = %309: tensor<1x672x12x20xbf16>, %arg6 = %66: tensor<672x1x9x9xbf16>, %arg7 = %65: tensor<672xbf16>) attributes { LayerName = "Conv_245", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "666", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1036", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[672, 1, 9, 9]> : vector<4xindex> }, { Name = "666", UnknownDataFormat = true } ], OutputName = "Conv_245", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1039", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x672x12x20xbf16>, %arg9 = %arg6: tensor<672x1x9x9xbf16>, %arg10 = %arg7: tensor<672xbf16>) attributes { Dilations = array, HWPadding = [[4, 4], [4, 4]], LayerName = "Conv_245", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "666", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1036", Port = "data_io.wts", SubPort = "wts_data", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[672, 1, 9, 9]> : vector<4xindex> }, { Name = "666", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_245", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1039", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "DepthwiseConv2dBf16", With = { config.act = 0 : ui8, config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.kernel_height = 9 : ui8, config.kernel_width = 9 : ui8, config.stride = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc167) %465 = tosa.transpose %arg9, %464 : (tensor<672x1x9x9xbf16>, tensor<4xi32>) -> tensor<9x9x672x1xbf16> loc(#loc167) %466 = tosa.transpose %arg8, %463 : (tensor<1x672x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x672xbf16> loc(#loc167) %467 = tosa.depthwise_conv2d %466, %465, %arg10 { PartOfLayerName = "Conv_245", PartOfOutputName = "Conv_245", dilation = array, pad = array, stride = array} : (tensor<1x12x20x672xbf16>, tensor<9x9x672x1xbf16>, tensor<672xbf16>) -> tensor<1x12x20x672xbf16> loc(#loc167) %468 = tosa.transpose %467, %462 : (tensor<1x12x20x672xbf16>, tensor<4xi32>) -> tensor<1x672x12x20xbf16> loc(#loc167) xten_nn.output %468 : tensor<1x672x12x20xbf16> loc(#loc167) } -> tensor<1x672x12x20xbf16> loc(#loc167) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc167) } -> tensor<1x672x12x20xbf16> loc(#loc167) %311 = xten_nn.subgraph (%arg5 = %310: tensor<1x672x12x20xbf16>) attributes { LayerName = "Add_247", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1039", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Add_247", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "670", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { LayerName = "Add_247", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1039", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Add_247", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "670", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_247", OutputName = "Add_247"} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc168) xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc168) } -> tensor<1x672x12x20xbf16> loc(#loc168) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc168) } -> tensor<1x672x12x20xbf16> loc(#loc168) %312 = xten_nn.subgraph (%arg5 = %311: tensor<1x672x12x20xbf16>) attributes { LayerName = "Clip_250", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "670", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_250", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "673", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { LayerName = "Clip_250", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "670", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_250", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "673", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_250", OutputName = "Clip_250", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc169) xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc169) } -> tensor<1x672x12x20xbf16> loc(#loc169) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc169) } -> tensor<1x672x12x20xbf16> loc(#loc169) %313 = xten_nn.subgraph (%arg5 = %312: tensor<1x672x12x20xbf16>) attributes { LayerName = "Div_252", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "673", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Div_252", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "675", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { LayerName = "Div_252", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "673", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Div_252", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "675", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_252", OutputName = "Div_252", shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc170) xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc170) } -> tensor<1x672x12x20xbf16> loc(#loc170) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc170) } -> tensor<1x672x12x20xbf16> loc(#loc170) %314 = xten_nn.subgraph (%arg5 = %310: tensor<1x672x12x20xbf16>, %arg6 = %313: tensor<1x672x12x20xbf16>) attributes { LayerName = "Mul_253_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1039", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "666", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_253", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "676", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x672x12x20xbf16>, %arg8 = %arg6: tensor<1x672x12x20xbf16>) attributes { LayerName = "Mul_253_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1039", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "666", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_253", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "676", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_253", OutputName = "Mul_253", shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc171) xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc171) } -> tensor<1x672x12x20xbf16> loc(#loc171) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc171) } -> tensor<1x672x12x20xbf16> loc(#loc171) %315 = xten_nn.subgraph (%arg5 = %314: tensor<1x672x12x20xbf16>) attributes { LayerName = "Mul_253_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1039", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_254_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "677", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 240]> : vector<4xindex> } ], Specializes = "Transpose4dAdf", With = { config.aie_arch = "aie2p", config.dim_0 = 12 : ui32, config.dim_1 = 84 : ui32, config.dim_2 = 20 : ui32, config.dim_3 = 8 : ui32, config.dtype = "bfloat16", config.perm = 6 : ui32 }} { %461 = tosa.reshape %arg5 {new_shape = array} : (tensor<1x672x12x20xbf16>) -> tensor<1x672x1x240xbf16> loc(#loc349) xten_nn.output %461 : tensor<1x672x1x240xbf16> loc(#loc349) } -> tensor<1x672x1x240xbf16> loc(#loc349) %316 = xten_nn.subgraph (%arg5 = %315: tensor<1x672x1x240xbf16>) attributes { LayerName = "GlobalAveragePool_254", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "676", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 240]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_254_Duplicated#1", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "677", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x240xbf16>) attributes { LayerName = "GlobalAveragePool_254", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "676", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 240]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_254_Duplicated#1", PadValue = 0.000000e+00 : bf16, Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "677", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], Specializes = "ReduceMeanC8Bf16", Traits = { Reduce = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.full_channel = 672 : ui32, config.full_height = 1 : ui32, config.full_width = 240 : ui32, config.reduce_dim = "W" }} { %462 = xten_nn.reduce_mean %arg6 {axes = array, keepdims = 1 : i64} : (tensor<1x672x1x240xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc172) xten_nn.output %462 : tensor<1x672x1x1xbf16> loc(#loc172) } -> tensor<1x672x1x1xbf16> loc(#loc172) xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc172) } -> tensor<1x672x1x1xbf16> loc(#loc172) %317 = xten_nn.subgraph (%arg5 = %316: tensor<1x672x1x1xbf16>, %arg6 = %64: tensor<168x672x1x1xbf16>, %arg7 = %63: tensor<168xbf16>) attributes { LayerName = "Conv_255", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "677", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> }, { Name = "676", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[168, 672, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.13.block.2.fc1.weight", UnknownDataFormat = true } ], OutputName = "Relu_256", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "679", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x672x1x1xbf16>, %arg9 = %arg6: tensor<168x672x1x1xbf16>, %arg10 = %arg7: tensor<168xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_255", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "677", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> }, { Name = "676", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[168, 672, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.13.block.2.fc1.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_256", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "679", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true, NonNegativeOut = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 1 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 0.000000e+00 : bf16, config.lrelu_alpha_kernel = 0.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<168x672x1x1xbf16>) -> tensor<168x1x1x672xbf16> loc(#loc350) %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x672x1x1xbf16>) -> tensor<1x1x1x672xbf16> loc(#loc350) %464 = tosa.conv2d %463, %462, %arg10 { PartOfLayerName = "Conv_255", PartOfOutputName = "Conv_255", dilation = array, pad = array, stride = array} : (tensor<1x1x1x672xbf16>, tensor<168x1x1x672xbf16>, tensor<168xbf16>) -> tensor<1x1x1x168xbf16> loc(#loc173) %465 = tosa.clamp %464 { LayerName = "Relu_256", OutputName = "Relu_256", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x1x1x168xbf16>) -> tensor<1x1x1x168xbf16> loc(#loc174) %466 = tosa.reshape %465 {new_shape = array} : (tensor<1x1x1x168xbf16>) -> tensor<1x168x1x1xbf16> loc(#loc350) xten_nn.output %466 : tensor<1x168x1x1xbf16> loc(#loc174) } -> tensor<1x168x1x1xbf16> loc(#loc350) xten_nn.output %461 : tensor<1x168x1x1xbf16> loc(#loc350) } -> tensor<1x168x1x1xbf16> loc(#loc350) %318 = xten_nn.subgraph (%arg5 = %317: tensor<1x168x1x1xbf16>, %arg6 = %62: tensor<672x168x1x1xbf16>, %arg7 = %61: tensor<672xbf16>) attributes { LayerName = "Conv_257", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "679", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex> }, { Name = "678", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[672, 168, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.13.block.2.fc2.weight", UnknownDataFormat = true } ], OutputName = "Conv_257", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "680", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x168x1x1xbf16>, %arg9 = %arg6: tensor<672x168x1x1xbf16>, %arg10 = %arg7: tensor<672xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_257", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "679", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex> }, { Name = "678", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[672, 168, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.13.block.2.fc2.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_257", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "680", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<672x168x1x1xbf16>) -> tensor<672x1x1x168xbf16> loc(#loc175) %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x168x1x1xbf16>) -> tensor<1x1x1x168xbf16> loc(#loc175) %464 = tosa.conv2d %463, %462, %arg10 { PartOfLayerName = "Conv_257", PartOfOutputName = "Conv_257", dilation = array, pad = array, stride = array} : (tensor<1x1x1x168xbf16>, tensor<672x1x1x168xbf16>, tensor<672xbf16>) -> tensor<1x1x1x672xbf16> loc(#loc175) %465 = tosa.reshape %464 {new_shape = array} : (tensor<1x1x1x672xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc175) xten_nn.output %465 : tensor<1x672x1x1xbf16> loc(#loc175) } -> tensor<1x672x1x1xbf16> loc(#loc175) xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc175) } -> tensor<1x672x1x1xbf16> loc(#loc175) %319 = xten_nn.subgraph (%arg5 = %318: tensor<1x672x1x1xbf16>) attributes { LayerName = "Add_259", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "680", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], OutputName = "Add_259", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "682", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x1xbf16>) attributes { LayerName = "Add_259", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "680", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], OutputName = "Add_259", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "682", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_259", OutputName = "Add_259"} : (tensor<1x672x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc176) xten_nn.output %463 : tensor<1x672x1x1xbf16> loc(#loc176) } -> tensor<1x672x1x1xbf16> loc(#loc176) xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc176) } -> tensor<1x672x1x1xbf16> loc(#loc176) %320 = xten_nn.subgraph (%arg5 = %319: tensor<1x672x1x1xbf16>) attributes { LayerName = "Clip_262", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "682", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], OutputName = "Clip_262", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "685", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x1xbf16>) attributes { LayerName = "Clip_262", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "682", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], OutputName = "Clip_262", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "685", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_262", OutputName = "Clip_262", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x672x1x1xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc177) xten_nn.output %462 : tensor<1x672x1x1xbf16> loc(#loc177) } -> tensor<1x672x1x1xbf16> loc(#loc177) xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc177) } -> tensor<1x672x1x1xbf16> loc(#loc177) %321 = xten_nn.subgraph (%arg5 = %320: tensor<1x672x1x1xbf16>) attributes { LayerName = "Div_264", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "685", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], OutputName = "Div_264", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "687", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x1xbf16>) attributes { LayerName = "Div_264", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "685", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], OutputName = "Div_264", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "687", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_264", OutputName = "Div_264", shift = 0 : i8} : (tensor<1x672x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc178) xten_nn.output %463 : tensor<1x672x1x1xbf16> loc(#loc178) } -> tensor<1x672x1x1xbf16> loc(#loc178) xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc178) } -> tensor<1x672x1x1xbf16> loc(#loc178) %322 = xten_nn.subgraph (%arg5 = %321: tensor<1x672x1x1xbf16>) attributes { LayerName = "Mul_265_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "687", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> } ], OutputName = "Mul_265_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "688", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "TileAdf", With = { config.aie_arch = "aie2p", config.dtype = "bfloat16", config.i_dim_c = 672 : ui32, config.i_dim_h = 1 : ui32, config.i_dim_n = 1 : ui32, config.i_dim_w = 1 : ui32, config.rep_dim_c = 1 : ui32, config.rep_dim_h = 12 : ui32, config.rep_dim_w = 20 : ui32 }} { %461 = tosa.tile %arg5 {multiples = array} : (tensor<1x672x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc179) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc179) } -> tensor<1x672x12x20xbf16> loc(#loc179) %323 = xten_nn.subgraph (%arg5 = %322: tensor<1x672x12x20xbf16>, %arg6 = %314: tensor<1x672x12x20xbf16>) attributes { LayerName = "Mul_265_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "687", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "685", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_265_Duplicated#1", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "688", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x672x12x20xbf16>, %arg8 = %arg6: tensor<1x672x12x20xbf16>) attributes { LayerName = "Mul_265_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "687", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "685", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_265_Duplicated#1", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "688", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_265", OutputName = "Mul_265", shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc179) xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc179) } -> tensor<1x672x12x20xbf16> loc(#loc179) xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc179) } -> tensor<1x672x12x20xbf16> loc(#loc179) %324 = xten_nn.subgraph (%arg5 = %323: tensor<1x672x12x20xbf16>, %arg6 = %60: tensor<160x672x1x1xbf16>, %arg7 = %59: tensor<160xbf16>) attributes { LayerName = "Conv_266", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "688", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> }, { Name = "687", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[160, 672, 1, 1]> : vector<4xindex> }, { Name = "688", UnknownDataFormat = true } ], OutputName = "Conv_266", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1042", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x672x12x20xbf16>, %arg9 = %arg6: tensor<160x672x1x1xbf16>, %arg10 = %arg7: tensor<160xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_266", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "688", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> }, { Name = "687", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[160, 672, 1, 1]> : vector<4xindex> }, { Name = "688", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_266", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1042", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc180) %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<160x672x1x1xbf16>) -> tensor<160x1x1x672xbf16> loc(#loc180) %465 = tosa.transpose %arg8, %463 : (tensor<1x672x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x672xbf16> loc(#loc180) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_266", PartOfOutputName = "Conv_266", dilation = array, pad = array, stride = array} : (tensor<1x12x20x672xbf16>, tensor<160x1x1x672xbf16>, tensor<160xbf16>) -> tensor<1x12x20x160xbf16> loc(#loc180) %467 = tosa.transpose %466, %462 : (tensor<1x12x20x160xbf16>, tensor<4xi32>) -> tensor<1x160x12x20xbf16> loc(#loc180) xten_nn.output %467 : tensor<1x160x12x20xbf16> loc(#loc180) } -> tensor<1x160x12x20xbf16> loc(#loc180) xten_nn.output %461 : tensor<1x160x12x20xbf16> loc(#loc180) } -> tensor<1x160x12x20xbf16> loc(#loc180) %325 = xten_nn.subgraph (%arg5 = %324: tensor<1x160x12x20xbf16>, %arg6 = %58: tensor<960x160x1x1xbf16>, %arg7 = %57: tensor<960xbf16>) attributes { LayerName = "Conv_267", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1042", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> }, { Name = "688", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[960, 160, 1, 1]> : vector<4xindex> }, { Name = "1046", UnknownDataFormat = true } ], OutputName = "Conv_267", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1045", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x160x12x20xbf16>, %arg9 = %arg6: tensor<960x160x1x1xbf16>, %arg10 = %arg7: tensor<960xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_267", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1042", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> }, { Name = "688", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[960, 160, 1, 1]> : vector<4xindex> }, { Name = "1046", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_267", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1045", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc181) %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<960x160x1x1xbf16>) -> tensor<960x1x1x160xbf16> loc(#loc181) %465 = tosa.transpose %arg8, %463 : (tensor<1x160x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x160xbf16> loc(#loc181) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_267", PartOfOutputName = "Conv_267", dilation = array, pad = array, stride = array} : (tensor<1x12x20x160xbf16>, tensor<960x1x1x160xbf16>, tensor<960xbf16>) -> tensor<1x12x20x960xbf16> loc(#loc181) %467 = tosa.transpose %466, %462 : (tensor<1x12x20x960xbf16>, tensor<4xi32>) -> tensor<1x960x12x20xbf16> loc(#loc181) xten_nn.output %467 : tensor<1x960x12x20xbf16> loc(#loc181) } -> tensor<1x960x12x20xbf16> loc(#loc181) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc181) } -> tensor<1x960x12x20xbf16> loc(#loc181) %326 = xten_nn.subgraph (%arg5 = %325: tensor<1x960x12x20xbf16>) attributes { LayerName = "Add_269", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1045", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Add_269", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "694", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { LayerName = "Add_269", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1045", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Add_269", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "694", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_269", OutputName = "Add_269"} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc182) xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc182) } -> tensor<1x960x12x20xbf16> loc(#loc182) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc182) } -> tensor<1x960x12x20xbf16> loc(#loc182) %327 = xten_nn.subgraph (%arg5 = %326: tensor<1x960x12x20xbf16>) attributes { LayerName = "Clip_272", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "694", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_272", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "697", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { LayerName = "Clip_272", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "694", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_272", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "697", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_272", OutputName = "Clip_272", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc183) xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc183) } -> tensor<1x960x12x20xbf16> loc(#loc183) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc183) } -> tensor<1x960x12x20xbf16> loc(#loc183) %328 = xten_nn.subgraph (%arg5 = %327: tensor<1x960x12x20xbf16>) attributes { LayerName = "Div_274", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "697", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Div_274", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "699", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { LayerName = "Div_274", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "697", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Div_274", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "699", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_274", OutputName = "Div_274", shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc184) xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc184) } -> tensor<1x960x12x20xbf16> loc(#loc184) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc184) } -> tensor<1x960x12x20xbf16> loc(#loc184) %329 = xten_nn.subgraph (%arg5 = %325: tensor<1x960x12x20xbf16>, %arg6 = %328: tensor<1x960x12x20xbf16>) attributes { LayerName = "Mul_275", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1045", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1042", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_275", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "700", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x960x12x20xbf16>, %arg8 = %arg6: tensor<1x960x12x20xbf16>) attributes { LayerName = "Mul_275", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1045", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1042", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_275", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "700", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_275", OutputName = "Mul_275", shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc185) xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc185) } -> tensor<1x960x12x20xbf16> loc(#loc185) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc185) } -> tensor<1x960x12x20xbf16> loc(#loc185) %330 = xten_nn.subgraph (%arg5 = %329: tensor<1x960x12x20xbf16>, %arg6 = %56: tensor<960x1x9x9xbf16>, %arg7 = %55: tensor<960xbf16>) attributes { LayerName = "Conv_276", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "700", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1045", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[960, 1, 9, 9]> : vector<4xindex> }, { Name = "700", UnknownDataFormat = true } ], OutputName = "Conv_276", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1048", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x960x12x20xbf16>, %arg9 = %arg6: tensor<960x1x9x9xbf16>, %arg10 = %arg7: tensor<960xbf16>) attributes { Dilations = array, HWPadding = [[4, 4], [4, 4]], LayerName = "Conv_276", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "700", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1045", Port = "data_io.wts", SubPort = "wts_data", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[960, 1, 9, 9]> : vector<4xindex> }, { Name = "700", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_276", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1048", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "DepthwiseConv2dBf16", With = { config.act = 0 : ui8, config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.kernel_height = 9 : ui8, config.kernel_width = 9 : ui8, config.stride = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc186) %465 = tosa.transpose %arg9, %464 : (tensor<960x1x9x9xbf16>, tensor<4xi32>) -> tensor<9x9x960x1xbf16> loc(#loc186) %466 = tosa.transpose %arg8, %463 : (tensor<1x960x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x960xbf16> loc(#loc186) %467 = tosa.depthwise_conv2d %466, %465, %arg10 { PartOfLayerName = "Conv_276", PartOfOutputName = "Conv_276", dilation = array, pad = array, stride = array} : (tensor<1x12x20x960xbf16>, tensor<9x9x960x1xbf16>, tensor<960xbf16>) -> tensor<1x12x20x960xbf16> loc(#loc186) %468 = tosa.transpose %467, %462 : (tensor<1x12x20x960xbf16>, tensor<4xi32>) -> tensor<1x960x12x20xbf16> loc(#loc186) xten_nn.output %468 : tensor<1x960x12x20xbf16> loc(#loc186) } -> tensor<1x960x12x20xbf16> loc(#loc186) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc186) } -> tensor<1x960x12x20xbf16> loc(#loc186) %331 = xten_nn.subgraph (%arg5 = %330: tensor<1x960x12x20xbf16>) attributes { LayerName = "Add_278", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1048", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Add_278", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "704", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { LayerName = "Add_278", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1048", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Add_278", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "704", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_278", OutputName = "Add_278"} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc187) xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc187) } -> tensor<1x960x12x20xbf16> loc(#loc187) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc187) } -> tensor<1x960x12x20xbf16> loc(#loc187) %332 = xten_nn.subgraph (%arg5 = %331: tensor<1x960x12x20xbf16>) attributes { LayerName = "Clip_281", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "704", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_281", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "707", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { LayerName = "Clip_281", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "704", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_281", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "707", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_281", OutputName = "Clip_281", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc188) xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc188) } -> tensor<1x960x12x20xbf16> loc(#loc188) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc188) } -> tensor<1x960x12x20xbf16> loc(#loc188) %333 = xten_nn.subgraph (%arg5 = %332: tensor<1x960x12x20xbf16>) attributes { LayerName = "Div_283", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "707", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Div_283", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "709", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { LayerName = "Div_283", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "707", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Div_283", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "709", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_283", OutputName = "Div_283", shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc189) xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc189) } -> tensor<1x960x12x20xbf16> loc(#loc189) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc189) } -> tensor<1x960x12x20xbf16> loc(#loc189) %334 = xten_nn.subgraph (%arg5 = %330: tensor<1x960x12x20xbf16>, %arg6 = %333: tensor<1x960x12x20xbf16>) attributes { LayerName = "Mul_284_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1048", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "700", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_284", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "710", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x960x12x20xbf16>, %arg8 = %arg6: tensor<1x960x12x20xbf16>) attributes { LayerName = "Mul_284_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1048", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "700", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_284", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "710", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_284", OutputName = "Mul_284", shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc190) xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc190) } -> tensor<1x960x12x20xbf16> loc(#loc190) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc190) } -> tensor<1x960x12x20xbf16> loc(#loc190) %335 = xten_nn.subgraph (%arg5 = %334: tensor<1x960x12x20xbf16>) attributes { LayerName = "Mul_284_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1048", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_285_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "711", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex> } ], Specializes = "Transpose4dAdf", With = { config.aie_arch = "aie2p", config.dim_0 = 12 : ui32, config.dim_1 = 120 : ui32, config.dim_2 = 20 : ui32, config.dim_3 = 8 : ui32, config.dtype = "bfloat16", config.perm = 6 : ui32 }} { %461 = tosa.reshape %arg5 {new_shape = array} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x1x240xbf16> loc(#loc351) xten_nn.output %461 : tensor<1x960x1x240xbf16> loc(#loc351) } -> tensor<1x960x1x240xbf16> loc(#loc351) %336 = xten_nn.subgraph (%arg5 = %335: tensor<1x960x1x240xbf16>) attributes { LayerName = "GlobalAveragePool_285", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "710", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_285_Duplicated#1", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "711", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x240xbf16>) attributes { LayerName = "GlobalAveragePool_285", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "710", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_285_Duplicated#1", PadValue = 0.000000e+00 : bf16, Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "711", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], Specializes = "ReduceMeanC8Bf16", Traits = { Reduce = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.full_channel = 960 : ui32, config.full_height = 1 : ui32, config.full_width = 240 : ui32, config.reduce_dim = "W" }} { %462 = xten_nn.reduce_mean %arg6 {axes = array, keepdims = 1 : i64} : (tensor<1x960x1x240xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc191) xten_nn.output %462 : tensor<1x960x1x1xbf16> loc(#loc191) } -> tensor<1x960x1x1xbf16> loc(#loc191) xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc191) } -> tensor<1x960x1x1xbf16> loc(#loc191) %337 = xten_nn.subgraph (%arg5 = %336: tensor<1x960x1x1xbf16>, %arg6 = %54: tensor<240x960x1x1xbf16>, %arg7 = %53: tensor<240xbf16>) attributes { LayerName = "Conv_286", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "711", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> }, { Name = "710", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[240, 960, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.14.block.2.fc1.weight", UnknownDataFormat = true } ], OutputName = "Relu_287", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "713", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x960x1x1xbf16>, %arg9 = %arg6: tensor<240x960x1x1xbf16>, %arg10 = %arg7: tensor<240xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_286", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "711", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> }, { Name = "710", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[240, 960, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.14.block.2.fc1.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_287", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "713", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true, NonNegativeOut = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 1 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 0.000000e+00 : bf16, config.lrelu_alpha_kernel = 0.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<240x960x1x1xbf16>) -> tensor<240x1x1x960xbf16> loc(#loc352) %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x960x1x1xbf16>) -> tensor<1x1x1x960xbf16> loc(#loc352) %464 = tosa.conv2d %463, %462, %arg10 { PartOfLayerName = "Conv_286", PartOfOutputName = "Conv_286", dilation = array, pad = array, stride = array} : (tensor<1x1x1x960xbf16>, tensor<240x1x1x960xbf16>, tensor<240xbf16>) -> tensor<1x1x1x240xbf16> loc(#loc192) %465 = tosa.clamp %464 { LayerName = "Relu_287", OutputName = "Relu_287", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x1x1x240xbf16>) -> tensor<1x1x1x240xbf16> loc(#loc193) %466 = tosa.reshape %465 {new_shape = array} : (tensor<1x1x1x240xbf16>) -> tensor<1x240x1x1xbf16> loc(#loc352) xten_nn.output %466 : tensor<1x240x1x1xbf16> loc(#loc193) } -> tensor<1x240x1x1xbf16> loc(#loc352) xten_nn.output %461 : tensor<1x240x1x1xbf16> loc(#loc352) } -> tensor<1x240x1x1xbf16> loc(#loc352) %338 = xten_nn.subgraph (%arg5 = %337: tensor<1x240x1x1xbf16>, %arg6 = %52: tensor<960x240x1x1xbf16>, %arg7 = %51: tensor<960xbf16>) attributes { LayerName = "Conv_288", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "713", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex> }, { Name = "712", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[960, 240, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.14.block.2.fc2.weight", UnknownDataFormat = true } ], OutputName = "Conv_288", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "714", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x240x1x1xbf16>, %arg9 = %arg6: tensor<960x240x1x1xbf16>, %arg10 = %arg7: tensor<960xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_288", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "713", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex> }, { Name = "712", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[960, 240, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.14.block.2.fc2.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_288", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "714", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<960x240x1x1xbf16>) -> tensor<960x1x1x240xbf16> loc(#loc194) %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x240x1x1xbf16>) -> tensor<1x1x1x240xbf16> loc(#loc194) %464 = tosa.conv2d %463, %462, %arg10 { PartOfLayerName = "Conv_288", PartOfOutputName = "Conv_288", dilation = array, pad = array, stride = array} : (tensor<1x1x1x240xbf16>, tensor<960x1x1x240xbf16>, tensor<960xbf16>) -> tensor<1x1x1x960xbf16> loc(#loc194) %465 = tosa.reshape %464 {new_shape = array} : (tensor<1x1x1x960xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc194) xten_nn.output %465 : tensor<1x960x1x1xbf16> loc(#loc194) } -> tensor<1x960x1x1xbf16> loc(#loc194) xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc194) } -> tensor<1x960x1x1xbf16> loc(#loc194) %339 = xten_nn.subgraph (%arg5 = %338: tensor<1x960x1x1xbf16>) attributes { LayerName = "Add_290", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "714", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], OutputName = "Add_290", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "716", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x1xbf16>) attributes { LayerName = "Add_290", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "714", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], OutputName = "Add_290", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "716", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_290", OutputName = "Add_290"} : (tensor<1x960x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc195) xten_nn.output %463 : tensor<1x960x1x1xbf16> loc(#loc195) } -> tensor<1x960x1x1xbf16> loc(#loc195) xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc195) } -> tensor<1x960x1x1xbf16> loc(#loc195) %340 = xten_nn.subgraph (%arg5 = %339: tensor<1x960x1x1xbf16>) attributes { LayerName = "Clip_293", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "716", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], OutputName = "Clip_293", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "719", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x1xbf16>) attributes { LayerName = "Clip_293", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "716", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], OutputName = "Clip_293", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "719", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_293", OutputName = "Clip_293", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x960x1x1xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc196) xten_nn.output %462 : tensor<1x960x1x1xbf16> loc(#loc196) } -> tensor<1x960x1x1xbf16> loc(#loc196) xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc196) } -> tensor<1x960x1x1xbf16> loc(#loc196) %341 = xten_nn.subgraph (%arg5 = %340: tensor<1x960x1x1xbf16>) attributes { LayerName = "Div_295", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "719", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], OutputName = "Div_295", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "721", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x1xbf16>) attributes { LayerName = "Div_295", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "719", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], OutputName = "Div_295", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "721", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_295", OutputName = "Div_295", shift = 0 : i8} : (tensor<1x960x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc197) xten_nn.output %463 : tensor<1x960x1x1xbf16> loc(#loc197) } -> tensor<1x960x1x1xbf16> loc(#loc197) xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc197) } -> tensor<1x960x1x1xbf16> loc(#loc197) %342 = xten_nn.subgraph (%arg5 = %341: tensor<1x960x1x1xbf16>) attributes { LayerName = "Mul_296_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "721", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], OutputName = "Mul_296_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "722", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "TileAdf", With = { config.aie_arch = "aie2p", config.dtype = "bfloat16", config.i_dim_c = 960 : ui32, config.i_dim_h = 1 : ui32, config.i_dim_n = 1 : ui32, config.i_dim_w = 1 : ui32, config.rep_dim_c = 1 : ui32, config.rep_dim_h = 12 : ui32, config.rep_dim_w = 20 : ui32 }} { %461 = tosa.tile %arg5 {multiples = array} : (tensor<1x960x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc198) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc198) } -> tensor<1x960x12x20xbf16> loc(#loc198) %343 = xten_nn.subgraph (%arg5 = %342: tensor<1x960x12x20xbf16>, %arg6 = %334: tensor<1x960x12x20xbf16>) attributes { LayerName = "Mul_296_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "721", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "719", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_296_Duplicated#1", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "722", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x960x12x20xbf16>, %arg8 = %arg6: tensor<1x960x12x20xbf16>) attributes { LayerName = "Mul_296_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "721", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "719", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_296_Duplicated#1", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "722", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_296", OutputName = "Mul_296", shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc198) xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc198) } -> tensor<1x960x12x20xbf16> loc(#loc198) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc198) } -> tensor<1x960x12x20xbf16> loc(#loc198) %344 = xten_nn.subgraph (%arg5 = %343: tensor<1x960x12x20xbf16>, %arg6 = %50: tensor<160x960x1x1xbf16>, %arg7 = %49: tensor<160xbf16>, %arg8 = %324: tensor<1x160x12x20xbf16>) attributes { LayerName = "Conv_297", OfmShare = 3 : index, Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "722", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { Name = "721", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[160, 960, 1, 1]> : vector<4xindex> }, { Name = "722", UnknownDataFormat = true }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "722", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> } ], OutputName = "Add_298", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "725", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x960x12x20xbf16>, %arg10 = %arg6: tensor<160x960x1x1xbf16>, %arg11 = %arg7: tensor<160xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_297", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "722", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { Name = "721", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[160, 960, 1, 1]> : vector<4xindex> }, { Name = "722", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_297", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1051", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc199) %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<160x960x1x1xbf16>) -> tensor<160x1x1x960xbf16> loc(#loc199) %466 = tosa.transpose %arg9, %464 : (tensor<1x960x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x960xbf16> loc(#loc199) %467 = tosa.conv2d %466, %465, %arg11 { PartOfLayerName = "Conv_297", PartOfOutputName = "Conv_297", dilation = array, pad = array, stride = array} : (tensor<1x12x20x960xbf16>, tensor<160x1x1x960xbf16>, tensor<160xbf16>) -> tensor<1x12x20x160xbf16> loc(#loc199) %468 = tosa.transpose %467, %463 : (tensor<1x12x20x160xbf16>, tensor<4xi32>) -> tensor<1x160x12x20xbf16> loc(#loc199) xten_nn.output %468 : tensor<1x160x12x20xbf16> loc(#loc199) } -> tensor<1x160x12x20xbf16> loc(#loc199) %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x160x12x20xbf16>, %arg10 = %arg8: tensor<1x160x12x20xbf16>) attributes { LayerName = "Add_298", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1051", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "722", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> } ], OutputName = "Add_298", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "725", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> } ], Specializes = "AddBf16", Traits = { Binary = true, Elementwise = true }, With = { config.act = 0 : ui8, config.act_type = "LINEAR", config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %463 = tosa.add %arg9, %arg10 {LayerName = "Add_298", OutputName = "Add_298"} : (tensor<1x160x12x20xbf16>, tensor<1x160x12x20xbf16>) -> tensor<1x160x12x20xbf16> loc(#loc200) xten_nn.output %463 : tensor<1x160x12x20xbf16> loc(#loc200) } -> tensor<1x160x12x20xbf16> loc(#loc200) xten_nn.output %462 : tensor<1x160x12x20xbf16> loc(#loc200) } -> tensor<1x160x12x20xbf16> loc(#loc353) %345 = xten_nn.subgraph (%arg5 = %344: tensor<1x160x12x20xbf16>, %arg6 = %48: tensor<960x160x1x1xbf16>, %arg7 = %47: tensor<960xbf16>) attributes { LayerName = "Conv_299", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "725", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> }, { Name = "1051", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[960, 160, 1, 1]> : vector<4xindex> }, { Name = "725", UnknownDataFormat = true } ], OutputName = "Conv_299", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1054", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x160x12x20xbf16>, %arg9 = %arg6: tensor<960x160x1x1xbf16>, %arg10 = %arg7: tensor<960xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_299", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "725", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> }, { Name = "1051", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[960, 160, 1, 1]> : vector<4xindex> }, { Name = "725", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_299", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1054", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc201) %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<960x160x1x1xbf16>) -> tensor<960x1x1x160xbf16> loc(#loc201) %465 = tosa.transpose %arg8, %463 : (tensor<1x160x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x160xbf16> loc(#loc201) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_299", PartOfOutputName = "Conv_299", dilation = array, pad = array, stride = array} : (tensor<1x12x20x160xbf16>, tensor<960x1x1x160xbf16>, tensor<960xbf16>) -> tensor<1x12x20x960xbf16> loc(#loc201) %467 = tosa.transpose %466, %462 : (tensor<1x12x20x960xbf16>, tensor<4xi32>) -> tensor<1x960x12x20xbf16> loc(#loc201) xten_nn.output %467 : tensor<1x960x12x20xbf16> loc(#loc201) } -> tensor<1x960x12x20xbf16> loc(#loc201) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc201) } -> tensor<1x960x12x20xbf16> loc(#loc201) %346 = xten_nn.subgraph (%arg5 = %345: tensor<1x960x12x20xbf16>) attributes { LayerName = "Add_301", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1054", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Add_301", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "729", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { LayerName = "Add_301", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1054", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Add_301", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "729", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_301", OutputName = "Add_301"} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc202) xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc202) } -> tensor<1x960x12x20xbf16> loc(#loc202) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc202) } -> tensor<1x960x12x20xbf16> loc(#loc202) %347 = xten_nn.subgraph (%arg5 = %346: tensor<1x960x12x20xbf16>) attributes { LayerName = "Clip_304", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "729", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_304", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "732", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { LayerName = "Clip_304", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "729", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_304", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "732", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_304", OutputName = "Clip_304", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc203) xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc203) } -> tensor<1x960x12x20xbf16> loc(#loc203) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc203) } -> tensor<1x960x12x20xbf16> loc(#loc203) %348 = xten_nn.subgraph (%arg5 = %347: tensor<1x960x12x20xbf16>) attributes { LayerName = "Div_306", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "732", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Div_306", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "734", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { LayerName = "Div_306", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "732", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Div_306", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "734", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_306", OutputName = "Div_306", shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc204) xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc204) } -> tensor<1x960x12x20xbf16> loc(#loc204) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc204) } -> tensor<1x960x12x20xbf16> loc(#loc204) %349 = xten_nn.subgraph (%arg5 = %345: tensor<1x960x12x20xbf16>, %arg6 = %348: tensor<1x960x12x20xbf16>) attributes { LayerName = "Mul_307", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1054", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "725", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_307", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "735", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x960x12x20xbf16>, %arg8 = %arg6: tensor<1x960x12x20xbf16>) attributes { LayerName = "Mul_307", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1054", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "725", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_307", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "735", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_307", OutputName = "Mul_307", shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc205) xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc205) } -> tensor<1x960x12x20xbf16> loc(#loc205) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc205) } -> tensor<1x960x12x20xbf16> loc(#loc205) %350 = xten_nn.subgraph (%arg5 = %349: tensor<1x960x12x20xbf16>, %arg6 = %46: tensor<960x1x9x9xbf16>, %arg7 = %45: tensor<960xbf16>) attributes { LayerName = "Conv_308", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "735", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1054", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[960, 1, 9, 9]> : vector<4xindex> }, { Name = "735", UnknownDataFormat = true } ], OutputName = "Conv_308", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1057", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x960x12x20xbf16>, %arg9 = %arg6: tensor<960x1x9x9xbf16>, %arg10 = %arg7: tensor<960xbf16>) attributes { Dilations = array, HWPadding = [[4, 4], [4, 4]], LayerName = "Conv_308", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "735", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "CMHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1054", Port = "data_io.wts", SubPort = "wts_data", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[960, 1, 9, 9]> : vector<4xindex> }, { Name = "735", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_308", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1057", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "DepthwiseConv2dBf16", With = { config.act = 0 : ui8, config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.kernel_height = 9 : ui8, config.kernel_width = 9 : ui8, config.stride = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc206) %465 = tosa.transpose %arg9, %464 : (tensor<960x1x9x9xbf16>, tensor<4xi32>) -> tensor<9x9x960x1xbf16> loc(#loc206) %466 = tosa.transpose %arg8, %463 : (tensor<1x960x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x960xbf16> loc(#loc206) %467 = tosa.depthwise_conv2d %466, %465, %arg10 { PartOfLayerName = "Conv_308", PartOfOutputName = "Conv_308", dilation = array, pad = array, stride = array} : (tensor<1x12x20x960xbf16>, tensor<9x9x960x1xbf16>, tensor<960xbf16>) -> tensor<1x12x20x960xbf16> loc(#loc206) %468 = tosa.transpose %467, %462 : (tensor<1x12x20x960xbf16>, tensor<4xi32>) -> tensor<1x960x12x20xbf16> loc(#loc206) xten_nn.output %468 : tensor<1x960x12x20xbf16> loc(#loc206) } -> tensor<1x960x12x20xbf16> loc(#loc206) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc206) } -> tensor<1x960x12x20xbf16> loc(#loc206) %351 = xten_nn.subgraph (%arg5 = %350: tensor<1x960x12x20xbf16>) attributes { LayerName = "Add_310", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1057", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Add_310", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "739", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { LayerName = "Add_310", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1057", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Add_310", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "739", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_310", OutputName = "Add_310"} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc207) xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc207) } -> tensor<1x960x12x20xbf16> loc(#loc207) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc207) } -> tensor<1x960x12x20xbf16> loc(#loc207) %352 = xten_nn.subgraph (%arg5 = %351: tensor<1x960x12x20xbf16>) attributes { LayerName = "Clip_313", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "739", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_313", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "742", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { LayerName = "Clip_313", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "739", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_313", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "742", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_313", OutputName = "Clip_313", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc208) xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc208) } -> tensor<1x960x12x20xbf16> loc(#loc208) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc208) } -> tensor<1x960x12x20xbf16> loc(#loc208) %353 = xten_nn.subgraph (%arg5 = %352: tensor<1x960x12x20xbf16>) attributes { LayerName = "Div_315", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "742", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Div_315", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "744", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { LayerName = "Div_315", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "742", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Div_315", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "744", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_315", OutputName = "Div_315", shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc209) xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc209) } -> tensor<1x960x12x20xbf16> loc(#loc209) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc209) } -> tensor<1x960x12x20xbf16> loc(#loc209) %354 = xten_nn.subgraph (%arg5 = %350: tensor<1x960x12x20xbf16>, %arg6 = %353: tensor<1x960x12x20xbf16>) attributes { LayerName = "Mul_316_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1057", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "735", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_316", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "745", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x960x12x20xbf16>, %arg8 = %arg6: tensor<1x960x12x20xbf16>) attributes { LayerName = "Mul_316_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1057", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "735", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_316", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "745", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_316", OutputName = "Mul_316", shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc210) xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc210) } -> tensor<1x960x12x20xbf16> loc(#loc210) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc210) } -> tensor<1x960x12x20xbf16> loc(#loc210) %355 = xten_nn.subgraph (%arg5 = %354: tensor<1x960x12x20xbf16>) attributes { LayerName = "Mul_316_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1057", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_317_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "746", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex> } ], Specializes = "Transpose4dAdf", With = { config.aie_arch = "aie2p", config.dim_0 = 12 : ui32, config.dim_1 = 120 : ui32, config.dim_2 = 20 : ui32, config.dim_3 = 8 : ui32, config.dtype = "bfloat16", config.perm = 6 : ui32 }} { %461 = tosa.reshape %arg5 {new_shape = array} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x1x240xbf16> loc(#loc354) xten_nn.output %461 : tensor<1x960x1x240xbf16> loc(#loc354) } -> tensor<1x960x1x240xbf16> loc(#loc354) %356 = xten_nn.subgraph (%arg5 = %355: tensor<1x960x1x240xbf16>) attributes { LayerName = "GlobalAveragePool_317", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "745", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_317_Duplicated#1", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "746", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x240xbf16>) attributes { LayerName = "GlobalAveragePool_317", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "745", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_317_Duplicated#1", PadValue = 0.000000e+00 : bf16, Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "746", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], Specializes = "ReduceMeanC8Bf16", Traits = { Reduce = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.full_channel = 960 : ui32, config.full_height = 1 : ui32, config.full_width = 240 : ui32, config.reduce_dim = "W" }} { %462 = xten_nn.reduce_mean %arg6 {axes = array, keepdims = 1 : i64} : (tensor<1x960x1x240xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc211) xten_nn.output %462 : tensor<1x960x1x1xbf16> loc(#loc211) } -> tensor<1x960x1x1xbf16> loc(#loc211) xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc211) } -> tensor<1x960x1x1xbf16> loc(#loc211) %357 = xten_nn.subgraph (%arg5 = %356: tensor<1x960x1x1xbf16>, %arg6 = %44: tensor<240x960x1x1xbf16>, %arg7 = %43: tensor<240xbf16>) attributes { LayerName = "Conv_318", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "746", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> }, { Name = "745", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[240, 960, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.15.block.2.fc1.weight", UnknownDataFormat = true } ], OutputName = "Relu_319", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "748", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x960x1x1xbf16>, %arg9 = %arg6: tensor<240x960x1x1xbf16>, %arg10 = %arg7: tensor<240xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_318", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "746", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> }, { Name = "745", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[240, 960, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.15.block.2.fc1.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_319", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "748", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true, NonNegativeOut = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 1 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 0.000000e+00 : bf16, config.lrelu_alpha_kernel = 0.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<240x960x1x1xbf16>) -> tensor<240x1x1x960xbf16> loc(#loc355) %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x960x1x1xbf16>) -> tensor<1x1x1x960xbf16> loc(#loc355) %464 = tosa.conv2d %463, %462, %arg10 { PartOfLayerName = "Conv_318", PartOfOutputName = "Conv_318", dilation = array, pad = array, stride = array} : (tensor<1x1x1x960xbf16>, tensor<240x1x1x960xbf16>, tensor<240xbf16>) -> tensor<1x1x1x240xbf16> loc(#loc212) %465 = tosa.clamp %464 { LayerName = "Relu_319", OutputName = "Relu_319", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x1x1x240xbf16>) -> tensor<1x1x1x240xbf16> loc(#loc213) %466 = tosa.reshape %465 {new_shape = array} : (tensor<1x1x1x240xbf16>) -> tensor<1x240x1x1xbf16> loc(#loc355) xten_nn.output %466 : tensor<1x240x1x1xbf16> loc(#loc213) } -> tensor<1x240x1x1xbf16> loc(#loc355) xten_nn.output %461 : tensor<1x240x1x1xbf16> loc(#loc355) } -> tensor<1x240x1x1xbf16> loc(#loc355) %358 = xten_nn.subgraph (%arg5 = %357: tensor<1x240x1x1xbf16>, %arg6 = %42: tensor<960x240x1x1xbf16>, %arg7 = %41: tensor<960xbf16>) attributes { LayerName = "Conv_320", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "748", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex> }, { Name = "747", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[960, 240, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.15.block.2.fc2.weight", UnknownDataFormat = true } ], OutputName = "Conv_320", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "749", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x240x1x1xbf16>, %arg9 = %arg6: tensor<960x240x1x1xbf16>, %arg10 = %arg7: tensor<960xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_320", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "748", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex> }, { Name = "747", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[960, 240, 1, 1]> : vector<4xindex> }, { Name = "backbone.features.15.block.2.fc2.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_320", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "749", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<960x240x1x1xbf16>) -> tensor<960x1x1x240xbf16> loc(#loc214) %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x240x1x1xbf16>) -> tensor<1x1x1x240xbf16> loc(#loc214) %464 = tosa.conv2d %463, %462, %arg10 { PartOfLayerName = "Conv_320", PartOfOutputName = "Conv_320", dilation = array, pad = array, stride = array} : (tensor<1x1x1x240xbf16>, tensor<960x1x1x240xbf16>, tensor<960xbf16>) -> tensor<1x1x1x960xbf16> loc(#loc214) %465 = tosa.reshape %464 {new_shape = array} : (tensor<1x1x1x960xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc214) xten_nn.output %465 : tensor<1x960x1x1xbf16> loc(#loc214) } -> tensor<1x960x1x1xbf16> loc(#loc214) xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc214) } -> tensor<1x960x1x1xbf16> loc(#loc214) %359 = xten_nn.subgraph (%arg5 = %358: tensor<1x960x1x1xbf16>) attributes { LayerName = "Add_322", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "749", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], OutputName = "Add_322", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "751", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x1xbf16>) attributes { LayerName = "Add_322", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "749", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], OutputName = "Add_322", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "751", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_322", OutputName = "Add_322"} : (tensor<1x960x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc215) xten_nn.output %463 : tensor<1x960x1x1xbf16> loc(#loc215) } -> tensor<1x960x1x1xbf16> loc(#loc215) xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc215) } -> tensor<1x960x1x1xbf16> loc(#loc215) %360 = xten_nn.subgraph (%arg5 = %359: tensor<1x960x1x1xbf16>) attributes { LayerName = "Clip_325", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "751", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], OutputName = "Clip_325", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "754", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x1xbf16>) attributes { LayerName = "Clip_325", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "751", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], OutputName = "Clip_325", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "754", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_325", OutputName = "Clip_325", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x960x1x1xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc216) xten_nn.output %462 : tensor<1x960x1x1xbf16> loc(#loc216) } -> tensor<1x960x1x1xbf16> loc(#loc216) xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc216) } -> tensor<1x960x1x1xbf16> loc(#loc216) %361 = xten_nn.subgraph (%arg5 = %360: tensor<1x960x1x1xbf16>) attributes { LayerName = "Div_327", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "754", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], OutputName = "Div_327", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "756", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x1xbf16>) attributes { LayerName = "Div_327", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "754", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], OutputName = "Div_327", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "756", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_327", OutputName = "Div_327", shift = 0 : i8} : (tensor<1x960x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc217) xten_nn.output %463 : tensor<1x960x1x1xbf16> loc(#loc217) } -> tensor<1x960x1x1xbf16> loc(#loc217) xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc217) } -> tensor<1x960x1x1xbf16> loc(#loc217) %362 = xten_nn.subgraph (%arg5 = %361: tensor<1x960x1x1xbf16>) attributes { LayerName = "Mul_328_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "756", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], OutputName = "Mul_328_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "757", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "TileAdf", With = { config.aie_arch = "aie2p", config.dtype = "bfloat16", config.i_dim_c = 960 : ui32, config.i_dim_h = 1 : ui32, config.i_dim_n = 1 : ui32, config.i_dim_w = 1 : ui32, config.rep_dim_c = 1 : ui32, config.rep_dim_h = 12 : ui32, config.rep_dim_w = 20 : ui32 }} { %461 = tosa.tile %arg5 {multiples = array} : (tensor<1x960x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc218) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc218) } -> tensor<1x960x12x20xbf16> loc(#loc218) %363 = xten_nn.subgraph (%arg5 = %362: tensor<1x960x12x20xbf16>, %arg6 = %354: tensor<1x960x12x20xbf16>) attributes { LayerName = "Mul_328_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "756", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "754", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_328_Duplicated#1", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "757", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x960x12x20xbf16>, %arg8 = %arg6: tensor<1x960x12x20xbf16>) attributes { LayerName = "Mul_328_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "756", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "754", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_328_Duplicated#1", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "757", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_328", OutputName = "Mul_328", shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc218) xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc218) } -> tensor<1x960x12x20xbf16> loc(#loc218) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc218) } -> tensor<1x960x12x20xbf16> loc(#loc218) %364 = xten_nn.subgraph (%arg5 = %363: tensor<1x960x12x20xbf16>, %arg6 = %40: tensor<160x960x1x1xbf16>, %arg7 = %39: tensor<160xbf16>, %arg8 = %344: tensor<1x160x12x20xbf16>) attributes { LayerName = "Conv_329", OfmShare = 3 : index, Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "757", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { Name = "756", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[160, 960, 1, 1]> : vector<4xindex> }, { Name = "757", UnknownDataFormat = true }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "757", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> } ], OutputName = "Add_330", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "760", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x960x12x20xbf16>, %arg10 = %arg6: tensor<160x960x1x1xbf16>, %arg11 = %arg7: tensor<160xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_329", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "757", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { Name = "756", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[160, 960, 1, 1]> : vector<4xindex> }, { Name = "757", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_329", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1060", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc219) %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<160x960x1x1xbf16>) -> tensor<160x1x1x960xbf16> loc(#loc219) %466 = tosa.transpose %arg9, %464 : (tensor<1x960x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x960xbf16> loc(#loc219) %467 = tosa.conv2d %466, %465, %arg11 { PartOfLayerName = "Conv_329", PartOfOutputName = "Conv_329", dilation = array, pad = array, stride = array} : (tensor<1x12x20x960xbf16>, tensor<160x1x1x960xbf16>, tensor<160xbf16>) -> tensor<1x12x20x160xbf16> loc(#loc219) %468 = tosa.transpose %467, %463 : (tensor<1x12x20x160xbf16>, tensor<4xi32>) -> tensor<1x160x12x20xbf16> loc(#loc219) xten_nn.output %468 : tensor<1x160x12x20xbf16> loc(#loc219) } -> tensor<1x160x12x20xbf16> loc(#loc219) %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x160x12x20xbf16>, %arg10 = %arg8: tensor<1x160x12x20xbf16>) attributes { LayerName = "Add_330", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1060", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "757", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> } ], OutputName = "Add_330", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "760", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> } ], Specializes = "AddBf16", Traits = { Binary = true, Elementwise = true }, With = { config.act = 0 : ui8, config.act_type = "LINEAR", config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %463 = tosa.add %arg9, %arg10 {LayerName = "Add_330", OutputName = "Add_330"} : (tensor<1x160x12x20xbf16>, tensor<1x160x12x20xbf16>) -> tensor<1x160x12x20xbf16> loc(#loc220) xten_nn.output %463 : tensor<1x160x12x20xbf16> loc(#loc220) } -> tensor<1x160x12x20xbf16> loc(#loc220) xten_nn.output %462 : tensor<1x160x12x20xbf16> loc(#loc220) } -> tensor<1x160x12x20xbf16> loc(#loc356) %365 = xten_nn.subgraph (%arg5 = %364: tensor<1x160x12x20xbf16>, %arg6 = %38: tensor<960x160x1x1xbf16>, %arg7 = %37: tensor<960xbf16>) attributes { LayerName = "Conv_331", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "760", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> }, { Name = "1060", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[960, 160, 1, 1]> : vector<4xindex> }, { Name = "760", UnknownDataFormat = true } ], OutputName = "Conv_331", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1063", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x160x12x20xbf16>, %arg9 = %arg6: tensor<960x160x1x1xbf16>, %arg10 = %arg7: tensor<960xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_331", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "760", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> }, { Name = "1060", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[960, 160, 1, 1]> : vector<4xindex> }, { Name = "760", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_331", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1063", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc221) %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<960x160x1x1xbf16>) -> tensor<960x1x1x160xbf16> loc(#loc221) %465 = tosa.transpose %arg8, %463 : (tensor<1x160x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x160xbf16> loc(#loc221) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_331", PartOfOutputName = "Conv_331", dilation = array, pad = array, stride = array} : (tensor<1x12x20x160xbf16>, tensor<960x1x1x160xbf16>, tensor<960xbf16>) -> tensor<1x12x20x960xbf16> loc(#loc221) %467 = tosa.transpose %466, %462 : (tensor<1x12x20x960xbf16>, tensor<4xi32>) -> tensor<1x960x12x20xbf16> loc(#loc221) xten_nn.output %467 : tensor<1x960x12x20xbf16> loc(#loc221) } -> tensor<1x960x12x20xbf16> loc(#loc221) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc221) } -> tensor<1x960x12x20xbf16> loc(#loc221) %366 = xten_nn.subgraph (%arg5 = %365: tensor<1x960x12x20xbf16>) attributes { LayerName = "Add_333", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1063", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Add_333", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "764", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { LayerName = "Add_333", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1063", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Add_333", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "764", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "AddAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 3.000000e+00 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.add %arg6, %462 {LayerName = "Add_333", OutputName = "Add_333"} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc222) xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc222) } -> tensor<1x960x12x20xbf16> loc(#loc222) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc222) } -> tensor<1x960x12x20xbf16> loc(#loc222) %367 = xten_nn.subgraph (%arg5 = %366: tensor<1x960x12x20xbf16>) attributes { LayerName = "Clip_336", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "764", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_336", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "767", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { LayerName = "Clip_336", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "764", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Clip_336", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "767", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 6.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_336", OutputName = "Clip_336", max_fp = 6.000000e+00 : f32, max_int = 6 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc223) xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc223) } -> tensor<1x960x12x20xbf16> loc(#loc223) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc223) } -> tensor<1x960x12x20xbf16> loc(#loc223) %368 = xten_nn.subgraph (%arg5 = %367: tensor<1x960x12x20xbf16>) attributes { LayerName = "Div_338", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "767", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Div_338", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "769", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { LayerName = "Div_338", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "767", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Div_338", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "769", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "MulAttributeBroadcastingBf16", Traits = { Elementwise = true, Unary = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.num_kernel_iters = 0 : ui16, config.scalar = 1.660160e-01 : bf16, config.scalar_position = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) %463 = tosa.mul %arg6, %462 { LayerName = "Div_338", OutputName = "Div_338", shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc224) xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc224) } -> tensor<1x960x12x20xbf16> loc(#loc224) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc224) } -> tensor<1x960x12x20xbf16> loc(#loc224) %369 = xten_nn.subgraph (%arg5 = %365: tensor<1x960x12x20xbf16>, %arg6 = %368: tensor<1x960x12x20xbf16>) attributes { LayerName = "Mul_339_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1063", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "760", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_339", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "770", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x960x12x20xbf16>, %arg8 = %arg6: tensor<1x960x12x20xbf16>) attributes { LayerName = "Mul_339_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1063", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "760", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_339", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "770", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_339", OutputName = "Mul_339", shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc225) xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc225) } -> tensor<1x960x12x20xbf16> loc(#loc225) xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc225) } -> tensor<1x960x12x20xbf16> loc(#loc225) %370 = xten_nn.subgraph (%arg5 = %369: tensor<1x960x12x20xbf16>) attributes { LayerName = "Mul_339_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1063", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_342_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "774", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex> } ], Specializes = "Transpose4dAdf", With = { config.aie_arch = "aie2p", config.dim_0 = 12 : ui32, config.dim_1 = 120 : ui32, config.dim_2 = 20 : ui32, config.dim_3 = 8 : ui32, config.dtype = "bfloat16", config.perm = 6 : ui32 }} { %461 = tosa.reshape %arg5 {new_shape = array} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x1x240xbf16> loc(#loc357) xten_nn.output %461 : tensor<1x960x1x240xbf16> loc(#loc357) } -> tensor<1x960x1x240xbf16> loc(#loc357) %371 = xten_nn.subgraph (%arg5 = %370: tensor<1x960x1x240xbf16>) attributes { LayerName = "GlobalAveragePool_342", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "770", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_342_Duplicated#1", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "774", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x240xbf16>) attributes { LayerName = "GlobalAveragePool_342", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "770", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex> } ], OutputName = "GlobalAveragePool_342_Duplicated#1", PadValue = 0.000000e+00 : bf16, Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "774", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> } ], Specializes = "ReduceMeanC8Bf16", Traits = { Reduce = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.full_channel = 960 : ui32, config.full_height = 1 : ui32, config.full_width = 240 : ui32, config.reduce_dim = "W" }} { %462 = xten_nn.reduce_mean %arg6 {axes = array, keepdims = 1 : i64} : (tensor<1x960x1x240xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc226) xten_nn.output %462 : tensor<1x960x1x1xbf16> loc(#loc226) } -> tensor<1x960x1x1xbf16> loc(#loc226) xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc226) } -> tensor<1x960x1x1xbf16> loc(#loc226) %372 = xten_nn.subgraph (%arg5 = %371: tensor<1x960x1x1xbf16>, %arg6 = %36: tensor<128x960x1x1xbf16>, %arg7 = %35: tensor<128xbf16>) attributes { LayerName = "Conv_343", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "774", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> }, { Name = "770", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[128, 960, 1, 1]> : vector<4xindex> }, { Name = "aspp.aspp2.1.weight", UnknownDataFormat = true } ], OutputName = "Conv_343", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "775", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x960x1x1xbf16>, %arg9 = %arg6: tensor<128x960x1x1xbf16>, %arg10 = %arg7: tensor<128xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_343", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "774", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> }, { Name = "770", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[128, 960, 1, 1]> : vector<4xindex> }, { Name = "aspp.aspp2.1.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_343", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "775", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 1, 1]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<128x960x1x1xbf16>) -> tensor<128x1x1x960xbf16> loc(#loc227) %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x960x1x1xbf16>) -> tensor<1x1x1x960xbf16> loc(#loc227) %464 = tosa.conv2d %463, %462, %arg10 { PartOfLayerName = "Conv_343", PartOfOutputName = "Conv_343", dilation = array, pad = array, stride = array} : (tensor<1x1x1x960xbf16>, tensor<128x1x1x960xbf16>, tensor<128xbf16>) -> tensor<1x1x1x128xbf16> loc(#loc227) %465 = tosa.reshape %464 {new_shape = array} : (tensor<1x1x1x128xbf16>) -> tensor<1x128x1x1xbf16> loc(#loc227) xten_nn.output %465 : tensor<1x128x1x1xbf16> loc(#loc227) } -> tensor<1x128x1x1xbf16> loc(#loc227) xten_nn.output %461 : tensor<1x128x1x1xbf16> loc(#loc227) } -> tensor<1x128x1x1xbf16> loc(#loc227) %373 = xten_nn.subgraph (%arg5 = %372: tensor<1x128x1x1xbf16>) attributes { LayerName = "Sigmoid_344", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "775", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 1, 1]> : vector<4xindex> } ], OutputName = "Sigmoid_344", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "776", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 1, 1]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x128x1x1xbf16>) attributes { LayerName = "Sigmoid_344", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "775", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 1, 1]> : vector<4xindex> } ], OutputName = "Sigmoid_344", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "776", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 1, 1]> : vector<4xindex> } ], Specializes = "SigmoidTemplatedBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.ENABLE_FP16_AS_BF16 = 0 : ui8, config.aie_arch = "aie2p", config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.sigmoid %arg6 {LayerName = "Sigmoid_344", OutputName = "Sigmoid_344"} : (tensor<1x128x1x1xbf16>) -> tensor<1x128x1x1xbf16> loc(#loc228) xten_nn.output %462 : tensor<1x128x1x1xbf16> loc(#loc228) } -> tensor<1x128x1x1xbf16> loc(#loc228) xten_nn.output %461 : tensor<1x128x1x1xbf16> loc(#loc228) } -> tensor<1x128x1x1xbf16> loc(#loc228) %374 = xten_nn.subgraph (%arg5 = %373: tensor<1x128x1x1xbf16>) attributes { LayerName = "Mul_345_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "773", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 1, 1]> : vector<4xindex> } ], OutputName = "Mul_345_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "777", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> } ], Specializes = "TileAdf", With = { config.aie_arch = "aie2p", config.dtype = "bfloat16", config.i_dim_c = 128 : ui32, config.i_dim_h = 1 : ui32, config.i_dim_n = 1 : ui32, config.i_dim_w = 1 : ui32, config.rep_dim_c = 1 : ui32, config.rep_dim_h = 12 : ui32, config.rep_dim_w = 20 : ui32 }} { %461 = tosa.tile %arg5 {multiples = array} : (tensor<1x128x1x1xbf16>) -> tensor<1x128x12x20xbf16> loc(#loc229) xten_nn.output %461 : tensor<1x128x12x20xbf16> loc(#loc229) } -> tensor<1x128x12x20xbf16> loc(#loc229) %375 = xten_nn.subgraph (%arg5 = %369: tensor<1x960x12x20xbf16>, %arg6 = %34: tensor<128x960x1x1xbf16>, %arg7 = %33: tensor<128xbf16>, %arg8 = %374: tensor<1x128x12x20xbf16>) attributes { LayerName = "Conv_340", OfmShare = 3 : index, Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "770", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { Name = "1063", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[128, 960, 1, 1]> : vector<4xindex> }, { Name = "770", UnknownDataFormat = true }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1066", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_345_Duplicated#1", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "777", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x960x12x20xbf16>, %arg10 = %arg6: tensor<128x960x1x1xbf16>, %arg11 = %arg7: tensor<128xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_340", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "770", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> }, { Name = "1063", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[128, 960, 1, 1]> : vector<4xindex> }, { Name = "770", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_341", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "773", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true, NonNegativeOut = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 1 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 0.000000e+00 : bf16, config.lrelu_alpha_kernel = 0.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc359) %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<128x960x1x1xbf16>) -> tensor<128x1x1x960xbf16> loc(#loc359) %466 = tosa.transpose %arg9, %464 : (tensor<1x960x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x960xbf16> loc(#loc359) %467 = tosa.conv2d %466, %465, %arg11 { PartOfLayerName = "Conv_340", PartOfOutputName = "Conv_340", dilation = array, pad = array, stride = array} : (tensor<1x12x20x960xbf16>, tensor<128x1x1x960xbf16>, tensor<128xbf16>) -> tensor<1x12x20x128xbf16> loc(#loc230) %468 = tosa.clamp %467 { LayerName = "Relu_341", OutputName = "Relu_341", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x12x20x128xbf16>) -> tensor<1x12x20x128xbf16> loc(#loc231) %469 = tosa.transpose %468, %463 : (tensor<1x12x20x128xbf16>, tensor<4xi32>) -> tensor<1x128x12x20xbf16> loc(#loc359) xten_nn.output %469 : tensor<1x128x12x20xbf16> loc(#loc231) } -> tensor<1x128x12x20xbf16> loc(#loc359) %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x128x12x20xbf16>, %arg10 = %arg8: tensor<1x128x12x20xbf16>) attributes { LayerName = "Mul_345_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "773", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "1066", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_345_Duplicated#1", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "777", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %463 = tosa.mul %arg9, %arg10 { LayerName = "Mul_345", OutputName = "Mul_345", shift = 0 : i8} : (tensor<1x128x12x20xbf16>, tensor<1x128x12x20xbf16>) -> tensor<1x128x12x20xbf16> loc(#loc229) xten_nn.output %463 : tensor<1x128x12x20xbf16> loc(#loc229) } -> tensor<1x128x12x20xbf16> loc(#loc229) xten_nn.output %462 : tensor<1x128x12x20xbf16> loc(#loc229) } -> tensor<1x128x12x20xbf16> loc(#loc358) %376 = xten_nn.subgraph (%arg5 = %375: tensor<1x128x12x20xbf16>) attributes { LayerName = "Split_349_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "777", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> } ], OutputName = "Split_349_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "781", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], Specializes = "SliceHCWC8Adf", With = { config.aie_arch = "aie2p", config.axis_letter = "C", config.dim_c = 128 : ui32, config.dim_h = 12 : ui32, config.dim_w = 20 : ui32, config.dtype = "bfloat16", config.end = 64 : ui32, config.start = 0 : ui32, config.step = 1 : ui32 }} { %461 = tosa.slice %arg5 { PartOfLayerName = "Split_349", PartOfOutputName = "Split_349", size = array, start = array} : (tensor<1x128x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc232) xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc232) } -> tensor<1x64x12x20xbf16> loc(#loc232) %377 = xten_nn.subgraph (%arg5 = %375: tensor<1x128x12x20xbf16>) attributes { LayerName = "Split_349_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "777", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> } ], OutputName = "Split_349_Duplicated#1", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "781", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], Specializes = "SliceHCWC8Adf", With = { config.aie_arch = "aie2p", config.axis_letter = "C", config.dim_c = 128 : ui32, config.dim_h = 12 : ui32, config.dim_w = 20 : ui32, config.dtype = "bfloat16", config.end = 128 : ui32, config.start = 64 : ui32, config.step = 1 : ui32 }} { %461 = tosa.slice %arg5 { PartOfLayerName = "Split_349", PartOfOutputName = "Split_349", size = array, start = array} : (tensor<1x128x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc232) xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc232) } -> tensor<1x64x12x20xbf16> loc(#loc232) %378 = xten_nn.subgraph (%arg5 = %377: tensor<1x64x12x20xbf16>, %arg6 = %arg4: tensor<1x64x12x20xbf16>) attributes { Axis = 1 : i32, LayerName = "Concat_350", Op = "Concat", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], OutputName = "Concat_350", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "PseudoOp", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "783", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> } ], current_data_format = "NCHW", data_format = "HCWN"} { %461 = tosa.concat %arg5, %arg6 { LayerName = "Concat_350", OutputName = "Concat_350", axis = 1 : i32} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x128x12x20xbf16> loc(#loc233) xten_nn.output %461 : tensor<1x128x12x20xbf16> loc(#loc233) } -> tensor<1x128x12x20xbf16> loc(#loc233) %379 = xten_nn.subgraph (%arg5 = %378: tensor<1x128x12x20xbf16>, %arg6 = %32: tensor<128x128x3x3xbf16>, %arg7 = %31: tensor<128xbf16>) attributes { LayerName = "Conv_351", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "783", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> }, { Name = "397", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[128, 128, 3, 3]> : vector<4xindex> }, { Name = "decoder.decode4.gru.ih.0.weight", UnknownDataFormat = true } ], OutputName = "Conv_351", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "784", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x128x12x20xbf16>, %arg9 = %arg6: tensor<128x128x3x3xbf16>, %arg10 = %arg7: tensor<128xbf16>) attributes { Dilations = array, HWPadding = [[1, 1], [1, 1]], LayerName = "Conv_351", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "783", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> }, { Name = "397", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[128, 128, 3, 3]> : vector<4xindex> }, { Name = "decoder.decode4.gru.ih.0.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_351", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "784", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 3 : ui8, config.ksize.width = 3 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = tosa.transpose %arg9, %463 : (tensor<128x128x3x3xbf16>, tensor<4xi32>) -> tensor<128x3x3x128xbf16> loc(#loc234) %465 = tosa.transpose %arg8, %463 : (tensor<1x128x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x128xbf16> loc(#loc234) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_351", PartOfOutputName = "Conv_351", dilation = array, pad = array, stride = array} : (tensor<1x12x20x128xbf16>, tensor<128x3x3x128xbf16>, tensor<128xbf16>) -> tensor<1x12x20x128xbf16> loc(#loc234) %467 = tosa.transpose %466, %462 : (tensor<1x12x20x128xbf16>, tensor<4xi32>) -> tensor<1x128x12x20xbf16> loc(#loc234) xten_nn.output %467 : tensor<1x128x12x20xbf16> loc(#loc234) } -> tensor<1x128x12x20xbf16> loc(#loc234) xten_nn.output %461 : tensor<1x128x12x20xbf16> loc(#loc234) } -> tensor<1x128x12x20xbf16> loc(#loc234) %380 = xten_nn.subgraph (%arg5 = %379: tensor<1x128x12x20xbf16>) attributes { LayerName = "Sigmoid_352", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "784", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> } ], OutputName = "Sigmoid_352", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "785", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x128x12x20xbf16>) attributes { LayerName = "Sigmoid_352", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "784", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> } ], OutputName = "Sigmoid_352", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "785", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> } ], Specializes = "SigmoidTemplatedBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.ENABLE_FP16_AS_BF16 = 0 : ui8, config.aie_arch = "aie2p", config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.sigmoid %arg6 {LayerName = "Sigmoid_352", OutputName = "Sigmoid_352"} : (tensor<1x128x12x20xbf16>) -> tensor<1x128x12x20xbf16> loc(#loc235) xten_nn.output %462 : tensor<1x128x12x20xbf16> loc(#loc235) } -> tensor<1x128x12x20xbf16> loc(#loc235) xten_nn.output %461 : tensor<1x128x12x20xbf16> loc(#loc235) } -> tensor<1x128x12x20xbf16> loc(#loc235) %381 = xten_nn.subgraph (%arg5 = %380: tensor<1x128x12x20xbf16>) attributes { LayerName = "Split_353_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "785", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> } ], OutputName = "Split_353_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "786", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], Specializes = "SliceHCWC8Adf", With = { config.aie_arch = "aie2p", config.axis_letter = "C", config.dim_c = 128 : ui32, config.dim_h = 12 : ui32, config.dim_w = 20 : ui32, config.dtype = "bfloat16", config.end = 64 : ui32, config.start = 0 : ui32, config.step = 1 : ui32 }} { %461 = tosa.slice %arg5 { PartOfLayerName = "Split_353", PartOfOutputName = "Split_353", size = array, start = array} : (tensor<1x128x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc236) xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc236) } -> tensor<1x64x12x20xbf16> loc(#loc236) %382 = xten_nn.subgraph (%arg5 = %380: tensor<1x128x12x20xbf16>) attributes { LayerName = "Split_353_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "785", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> } ], OutputName = "Split_353_Duplicated#1", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "786", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], Specializes = "SliceHCWC8Adf", With = { config.aie_arch = "aie2p", config.axis_letter = "C", config.dim_c = 128 : ui32, config.dim_h = 12 : ui32, config.dim_w = 20 : ui32, config.dtype = "bfloat16", config.end = 128 : ui32, config.start = 64 : ui32, config.step = 1 : ui32 }} { %461 = tosa.slice %arg5 { PartOfLayerName = "Split_353", PartOfOutputName = "Split_353", size = array, start = array} : (tensor<1x128x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc236) xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc236) } -> tensor<1x64x12x20xbf16> loc(#loc236) %383 = xten_nn.subgraph (%arg5 = %30: tensor<1x64x12x20xbf16>, %arg6 = %382: tensor<1x64x12x20xbf16>) attributes { LayerName = "Sub_359", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "890", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "787", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], OutputName = "Sub_359", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "793", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x64x12x20xbf16>, %arg8 = %arg6: tensor<1x64x12x20xbf16>) attributes { LayerName = "Sub_359", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "890", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "787", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], OutputName = "Sub_359", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "793", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], Specializes = "SubBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.sub %arg7, %arg8 {LayerName = "Sub_359", OutputName = "Sub_359"} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc5) xten_nn.output %462 : tensor<1x64x12x20xbf16> loc(#loc5) } -> tensor<1x64x12x20xbf16> loc(#loc5) xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc5) } -> tensor<1x64x12x20xbf16> loc(#loc5) %384 = xten_nn.subgraph (%arg5 = %383: tensor<1x64x12x20xbf16>, %arg6 = %arg4: tensor<1x64x12x20xbf16>) attributes { LayerName = "Mul_360", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_360", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x64x12x20xbf16>, %arg8 = %arg6: tensor<1x64x12x20xbf16>) attributes { LayerName = "Mul_360", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_360", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_360", OutputName = "Mul_360", shift = 0 : i8} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc237) xten_nn.output %462 : tensor<1x64x12x20xbf16> loc(#loc237) } -> tensor<1x64x12x20xbf16> loc(#loc237) xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc237) } -> tensor<1x64x12x20xbf16> loc(#loc237) %385 = xten_nn.subgraph (%arg5 = %381: tensor<1x64x12x20xbf16>, %arg6 = %arg4: tensor<1x64x12x20xbf16>) attributes { LayerName = "Mul_354", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "786", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "785", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_354", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "788", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x64x12x20xbf16>, %arg8 = %arg6: tensor<1x64x12x20xbf16>) attributes { LayerName = "Mul_354", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "786", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "785", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_354", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "788", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_354", OutputName = "Mul_354", shift = 0 : i8} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc238) xten_nn.output %462 : tensor<1x64x12x20xbf16> loc(#loc238) } -> tensor<1x64x12x20xbf16> loc(#loc238) xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc238) } -> tensor<1x64x12x20xbf16> loc(#loc238) %386 = xten_nn.subgraph (%arg5 = %377: tensor<1x64x12x20xbf16>, %arg6 = %385: tensor<1x64x12x20xbf16>) attributes { Axis = 1 : i32, LayerName = "Concat_355", Op = "Concat", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "782", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "788", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], OutputName = "Concat_355", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "PseudoOp", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "789", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> } ], current_data_format = "NCHW", data_format = "HCWN"} { %461 = tosa.concat %arg5, %arg6 { LayerName = "Concat_355", OutputName = "Concat_355", axis = 1 : i32} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x128x12x20xbf16> loc(#loc239) xten_nn.output %461 : tensor<1x128x12x20xbf16> loc(#loc239) } -> tensor<1x128x12x20xbf16> loc(#loc239) %387 = xten_nn.subgraph (%arg5 = %386: tensor<1x128x12x20xbf16>, %arg6 = %29: tensor<64x128x3x3xbf16>, %arg7 = %28: tensor<64xbf16>) attributes { LayerName = "Conv_356", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "789", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> }, { Name = "788", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[64, 128, 3, 3]> : vector<4xindex> }, { Name = "decoder.decode4.gru.hh.0.weight", UnknownDataFormat = true } ], OutputName = "Conv_356", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "790", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x128x12x20xbf16>, %arg9 = %arg6: tensor<64x128x3x3xbf16>, %arg10 = %arg7: tensor<64xbf16>) attributes { Dilations = array, HWPadding = [[1, 1], [1, 1]], LayerName = "Conv_356", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "789", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> }, { Name = "788", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[64, 128, 3, 3]> : vector<4xindex> }, { Name = "decoder.decode4.gru.hh.0.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_356", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "790", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 3 : ui8, config.ksize.width = 3 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = tosa.transpose %arg9, %463 : (tensor<64x128x3x3xbf16>, tensor<4xi32>) -> tensor<64x3x3x128xbf16> loc(#loc240) %465 = tosa.transpose %arg8, %463 : (tensor<1x128x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x128xbf16> loc(#loc240) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_356", PartOfOutputName = "Conv_356", dilation = array, pad = array, stride = array} : (tensor<1x12x20x128xbf16>, tensor<64x3x3x128xbf16>, tensor<64xbf16>) -> tensor<1x12x20x64xbf16> loc(#loc240) %467 = tosa.transpose %466, %462 : (tensor<1x12x20x64xbf16>, tensor<4xi32>) -> tensor<1x64x12x20xbf16> loc(#loc240) xten_nn.output %467 : tensor<1x64x12x20xbf16> loc(#loc240) } -> tensor<1x64x12x20xbf16> loc(#loc240) xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc240) } -> tensor<1x64x12x20xbf16> loc(#loc240) %388 = xten_nn.subgraph (%arg5 = %387: tensor<1x64x12x20xbf16>) attributes { LayerName = "Tanh_357", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "790", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], OutputName = "Tanh_357", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "791", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x64x12x20xbf16>) attributes { LayerName = "Tanh_357", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "790", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], OutputName = "Tanh_357", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "791", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], Specializes = "TanhTemplatedBf16", Traits = { Elementwise = true, Unary = true }, With = { config.ENABLE_FP16_AS_BF16 = 0 : ui8, config.aie_arch = "aie2p", config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.tanh %arg6 {LayerName = "Tanh_357", OutputName = "Tanh_357"} : (tensor<1x64x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc241) xten_nn.output %462 : tensor<1x64x12x20xbf16> loc(#loc241) } -> tensor<1x64x12x20xbf16> loc(#loc241) xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc241) } -> tensor<1x64x12x20xbf16> loc(#loc241) %389 = xten_nn.subgraph (%arg5 = %382: tensor<1x64x12x20xbf16>, %arg6 = %388: tensor<1x64x12x20xbf16>) attributes { LayerName = "Mul_361", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "787", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "791", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_361", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "795", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x64x12x20xbf16>, %arg8 = %arg6: tensor<1x64x12x20xbf16>) attributes { LayerName = "Mul_361", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "787", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "791", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], OutputName = "Mul_361", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "795", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_361", OutputName = "Mul_361", shift = 0 : i8} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc242) xten_nn.output %462 : tensor<1x64x12x20xbf16> loc(#loc242) } -> tensor<1x64x12x20xbf16> loc(#loc242) xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc242) } -> tensor<1x64x12x20xbf16> loc(#loc242) %390 = xten_nn.subgraph (%arg5 = %384: tensor<1x64x12x20xbf16>, %arg6 = %389: tensor<1x64x12x20xbf16>) attributes { LayerName = "Add_362", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "794", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "793", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], OutputName = "Add_362", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "796", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x64x12x20xbf16>, %arg8 = %arg6: tensor<1x64x12x20xbf16>) attributes { LayerName = "Add_362", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "794", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "793", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], OutputName = "Add_362", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "796", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], Specializes = "AddBf16", Traits = { Binary = true, Elementwise = true }, With = { config.act = 0 : ui8, config.act_type = "LINEAR", config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.add %arg7, %arg8 {LayerName = "Add_362", OutputName = "Add_362"} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc243) xten_nn.output %462 : tensor<1x64x12x20xbf16> loc(#loc243) } -> tensor<1x64x12x20xbf16> loc(#loc243) xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc243) } -> tensor<1x64x12x20xbf16> loc(#loc243) %391 = xten_nn.subgraph (%arg5 = %376: tensor<1x64x12x20xbf16>, %arg6 = %390: tensor<1x64x12x20xbf16>) attributes { Axis = 1 : i32, LayerName = "Concat_363", Op = "Concat", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "781", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "777", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> } ], OutputName = "Concat_363", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "PseudoOp", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "797", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> } ], current_data_format = "NCHW", data_format = "HCWN"} { %461 = tosa.concat %arg5, %arg6 { LayerName = "Concat_363", OutputName = "Concat_363", axis = 1 : i32} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x128x12x20xbf16> loc(#loc244) xten_nn.output %461 : tensor<1x128x12x20xbf16> loc(#loc244) } -> tensor<1x128x12x20xbf16> loc(#loc244) %392 = xten_nn.subgraph (%arg5 = %391: tensor<1x128x12x20xbf16>) attributes { LayerName = "Resize_365", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "797", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> } ], OutputName = "Resize_365", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "802", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 24, 40]> : vector<4xindex> } ], Specializes = "ResizeAdf", With = { config.co_trans_mode = 1 : ui32, config.dim_0 = 1 : ui32, config.dim_1 = 128 : ui32, config.dim_2 = 12 : ui32, config.dim_3 = 20 : ui32, config.dtype = "bfloat16", config.mode = 1 : ui32, config.nearest_mode = 0 : ui32, config.output_H = 24 : ui32, config.output_W = 40 : ui32 }} { %461 = xten_nn.resize %arg5 { LayerName = "Resize_365", OutputName = "Resize_365", coordinate_transformation_mode = 1 : i64, mode = 1 : i64, nearest_mode = 0 : i64, scales = array} : (tensor<1x128x12x20xbf16>) -> tensor<1x128x24x40xbf16> loc(#loc245) xten_nn.output %461 : tensor<1x128x24x40xbf16> loc(#loc245) } -> tensor<1x128x24x40xbf16> loc(#loc245) %393 = xten_nn.subgraph (%arg5 = %392: tensor<1x128x24x40xbf16>) attributes { LayerName = "Slice_371", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "802", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 24, 40]> : vector<4xindex> } ], OutputName = "Slice_371", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "812", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 23, 40]> : vector<4xindex> } ], Specializes = "SliceHCWC8Adf", With = { config.aie_arch = "aie2p", config.axis_letter = "H", config.dim_c = 128 : ui32, config.dim_h = 24 : ui32, config.dim_w = 40 : ui32, config.dtype = "bfloat16", config.end = 23 : ui32, config.start = 0 : ui32, config.step = 1 : ui32 }} { %461 = tosa.slice %arg5 { LayerName = "Slice_371", OutputName = "Slice_371", size = array, start = array} : (tensor<1x128x24x40xbf16>) -> tensor<1x128x23x40xbf16> loc(#loc246) xten_nn.output %461 : tensor<1x128x23x40xbf16> loc(#loc246) } -> tensor<1x128x23x40xbf16> loc(#loc246) %394 = xten_nn.subgraph (%arg5 = %393: tensor<1x128x23x40xbf16>, %arg6 = %220: tensor<1x40x23x40xbf16>, %arg7 = %169: tensor<1x3x23x40xbf16>) attributes { Axis = 1 : i32, LayerName = "Concat_372", Op = "Concat", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "812", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 128, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "802", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "496", Port = "data_io.ifm3", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 3, 23, 40]> : vector<4xindex> } ], OutputName = "Concat_372", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "PseudoOp", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "813", Port = "data_io.ofm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 171, 23, 40]> : vector<4xindex> } ], current_data_format = "NCHW", data_format = "HCWN"} { %461 = tosa.concat %arg5, %arg6, %arg7 { LayerName = "Concat_372", OutputName = "Concat_372", axis = 1 : i32} : (tensor<1x128x23x40xbf16>, tensor<1x40x23x40xbf16>, tensor<1x3x23x40xbf16>) -> tensor<1x171x23x40xbf16> loc(#loc247) xten_nn.output %461 : tensor<1x171x23x40xbf16> loc(#loc247) } -> tensor<1x171x23x40xbf16> loc(#loc247) %395 = xten_nn.subgraph (%arg5 = %394: tensor<1x171x23x40xbf16>, %arg6 = %27: tensor<80x171x3x3xbf16>, %arg7 = %26: tensor<80xbf16>) attributes { LayerName = "Conv_373", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "813", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 171, 23, 40]> : vector<4xindex> }, { Name = "812", UnknownDataFormat = true, l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[80, 171, 3, 3]> : vector<4xindex> }, { Name = "813", UnknownDataFormat = true } ], OutputName = "Relu_374", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "816", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x171x23x40xbf16>, %arg9 = %arg6: tensor<80x171x3x3xbf16>, %arg10 = %arg7: tensor<80xbf16>) attributes { Dilations = array, HWPadding = [[1, 1], [1, 1]], LayerName = "Conv_373", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "813", Port = "data_io.ifm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 171, 23, 40]> : vector<4xindex> }, { Name = "812", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[80, 171, 3, 3]> : vector<4xindex> }, { Name = "813", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_374", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "816", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true, NonNegativeOut = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 1 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 3 : ui8, config.ksize.width = 3 : ui8, config.lrelu_alpha = 0.000000e+00 : bf16, config.lrelu_alpha_kernel = 0.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = tosa.transpose %arg9, %463 : (tensor<80x171x3x3xbf16>, tensor<4xi32>) -> tensor<80x3x3x171xbf16> loc(#loc360) %465 = tosa.transpose %arg8, %463 : (tensor<1x171x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x171xbf16> loc(#loc360) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_373", PartOfOutputName = "Conv_373", dilation = array, pad = array, stride = array} : (tensor<1x23x40x171xbf16>, tensor<80x3x3x171xbf16>, tensor<80xbf16>) -> tensor<1x23x40x80xbf16> loc(#loc248) %467 = tosa.clamp %466 { LayerName = "Relu_374", OutputName = "Relu_374", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x23x40x80xbf16>) -> tensor<1x23x40x80xbf16> loc(#loc249) %468 = tosa.transpose %467, %462 : (tensor<1x23x40x80xbf16>, tensor<4xi32>) -> tensor<1x80x23x40xbf16> loc(#loc360) xten_nn.output %468 : tensor<1x80x23x40xbf16> loc(#loc249) } -> tensor<1x80x23x40xbf16> loc(#loc360) xten_nn.output %461 : tensor<1x80x23x40xbf16> loc(#loc360) } -> tensor<1x80x23x40xbf16> loc(#loc360) %396 = xten_nn.subgraph (%arg5 = %395: tensor<1x80x23x40xbf16>) attributes { LayerName = "Split_375_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "816", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> } ], OutputName = "Split_375_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "817", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], Specializes = "SliceHCWC8Adf", With = { config.aie_arch = "aie2p", config.axis_letter = "C", config.dim_c = 80 : ui32, config.dim_h = 23 : ui32, config.dim_w = 40 : ui32, config.dtype = "bfloat16", config.end = 40 : ui32, config.start = 0 : ui32, config.step = 1 : ui32 }} { %461 = tosa.slice %arg5 { PartOfLayerName = "Split_375", PartOfOutputName = "Split_375", size = array, start = array} : (tensor<1x80x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc250) xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc250) } -> tensor<1x40x23x40xbf16> loc(#loc250) %397 = xten_nn.subgraph (%arg5 = %395: tensor<1x80x23x40xbf16>) attributes { LayerName = "Split_375_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "816", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> } ], OutputName = "Split_375_Duplicated#1", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "817", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], Specializes = "SliceHCWC8Adf", With = { config.aie_arch = "aie2p", config.axis_letter = "C", config.dim_c = 80 : ui32, config.dim_h = 23 : ui32, config.dim_w = 40 : ui32, config.dtype = "bfloat16", config.end = 80 : ui32, config.start = 40 : ui32, config.step = 1 : ui32 }} { %461 = tosa.slice %arg5 { PartOfLayerName = "Split_375", PartOfOutputName = "Split_375", size = array, start = array} : (tensor<1x80x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc250) xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc250) } -> tensor<1x40x23x40xbf16> loc(#loc250) %398 = xten_nn.subgraph (%arg5 = %397: tensor<1x40x23x40xbf16>, %arg6 = %arg3: tensor<1x40x23x40xbf16>) attributes { Axis = 1 : i32, LayerName = "Concat_376", Op = "Concat", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], OutputName = "Concat_376", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "PseudoOp", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "819", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> } ], current_data_format = "NCHW", data_format = "HCWN"} { %461 = tosa.concat %arg5, %arg6 { LayerName = "Concat_376", OutputName = "Concat_376", axis = 1 : i32} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x80x23x40xbf16> loc(#loc251) xten_nn.output %461 : tensor<1x80x23x40xbf16> loc(#loc251) } -> tensor<1x80x23x40xbf16> loc(#loc251) %399 = xten_nn.subgraph (%arg5 = %398: tensor<1x80x23x40xbf16>, %arg6 = %25: tensor<80x80x3x3xbf16>, %arg7 = %24: tensor<80xbf16>) attributes { LayerName = "Conv_377", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "819", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> }, { Name = "396", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[80, 80, 3, 3]> : vector<4xindex> }, { Name = "decoder.decode3.gru.ih.0.weight", UnknownDataFormat = true } ], OutputName = "Conv_377", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "820", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x80x23x40xbf16>, %arg9 = %arg6: tensor<80x80x3x3xbf16>, %arg10 = %arg7: tensor<80xbf16>) attributes { Dilations = array, HWPadding = [[1, 1], [1, 1]], LayerName = "Conv_377", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "819", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> }, { Name = "396", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[80, 80, 3, 3]> : vector<4xindex> }, { Name = "decoder.decode3.gru.ih.0.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_377", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "820", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 3 : ui8, config.ksize.width = 3 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = tosa.transpose %arg9, %463 : (tensor<80x80x3x3xbf16>, tensor<4xi32>) -> tensor<80x3x3x80xbf16> loc(#loc252) %465 = tosa.transpose %arg8, %463 : (tensor<1x80x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x80xbf16> loc(#loc252) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_377", PartOfOutputName = "Conv_377", dilation = array, pad = array, stride = array} : (tensor<1x23x40x80xbf16>, tensor<80x3x3x80xbf16>, tensor<80xbf16>) -> tensor<1x23x40x80xbf16> loc(#loc252) %467 = tosa.transpose %466, %462 : (tensor<1x23x40x80xbf16>, tensor<4xi32>) -> tensor<1x80x23x40xbf16> loc(#loc252) xten_nn.output %467 : tensor<1x80x23x40xbf16> loc(#loc252) } -> tensor<1x80x23x40xbf16> loc(#loc252) xten_nn.output %461 : tensor<1x80x23x40xbf16> loc(#loc252) } -> tensor<1x80x23x40xbf16> loc(#loc252) %400 = xten_nn.subgraph (%arg5 = %399: tensor<1x80x23x40xbf16>) attributes { LayerName = "Sigmoid_378", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "820", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> } ], OutputName = "Sigmoid_378", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "821", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x80x23x40xbf16>) attributes { LayerName = "Sigmoid_378", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "820", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> } ], OutputName = "Sigmoid_378", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "821", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> } ], Specializes = "SigmoidTemplatedBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.ENABLE_FP16_AS_BF16 = 0 : ui8, config.aie_arch = "aie2p", config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.sigmoid %arg6 {LayerName = "Sigmoid_378", OutputName = "Sigmoid_378"} : (tensor<1x80x23x40xbf16>) -> tensor<1x80x23x40xbf16> loc(#loc253) xten_nn.output %462 : tensor<1x80x23x40xbf16> loc(#loc253) } -> tensor<1x80x23x40xbf16> loc(#loc253) xten_nn.output %461 : tensor<1x80x23x40xbf16> loc(#loc253) } -> tensor<1x80x23x40xbf16> loc(#loc253) %401 = xten_nn.subgraph (%arg5 = %400: tensor<1x80x23x40xbf16>) attributes { LayerName = "Split_379_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "821", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> } ], OutputName = "Split_379_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "822", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], Specializes = "SliceHCWC8Adf", With = { config.aie_arch = "aie2p", config.axis_letter = "C", config.dim_c = 80 : ui32, config.dim_h = 23 : ui32, config.dim_w = 40 : ui32, config.dtype = "bfloat16", config.end = 40 : ui32, config.start = 0 : ui32, config.step = 1 : ui32 }} { %461 = tosa.slice %arg5 { PartOfLayerName = "Split_379", PartOfOutputName = "Split_379", size = array, start = array} : (tensor<1x80x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc254) xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc254) } -> tensor<1x40x23x40xbf16> loc(#loc254) %402 = xten_nn.subgraph (%arg5 = %400: tensor<1x80x23x40xbf16>) attributes { LayerName = "Split_379_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "821", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> } ], OutputName = "Split_379_Duplicated#1", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "822", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], Specializes = "SliceHCWC8Adf", With = { config.aie_arch = "aie2p", config.axis_letter = "C", config.dim_c = 80 : ui32, config.dim_h = 23 : ui32, config.dim_w = 40 : ui32, config.dtype = "bfloat16", config.end = 80 : ui32, config.start = 40 : ui32, config.step = 1 : ui32 }} { %461 = tosa.slice %arg5 { PartOfLayerName = "Split_379", PartOfOutputName = "Split_379", size = array, start = array} : (tensor<1x80x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc254) xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc254) } -> tensor<1x40x23x40xbf16> loc(#loc254) %403 = xten_nn.subgraph (%arg5 = %23: tensor<1x40x23x40xbf16>, %arg6 = %402: tensor<1x40x23x40xbf16>) attributes { LayerName = "Sub_385", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "890", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "823", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], OutputName = "Sub_385", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "829", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x40x23x40xbf16>, %arg8 = %arg6: tensor<1x40x23x40xbf16>) attributes { LayerName = "Sub_385", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "890", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "823", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], OutputName = "Sub_385", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "829", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], Specializes = "SubBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.sub %arg7, %arg8 {LayerName = "Sub_385", OutputName = "Sub_385"} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc4) xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc4) } -> tensor<1x40x23x40xbf16> loc(#loc4) xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc4) } -> tensor<1x40x23x40xbf16> loc(#loc4) %404 = xten_nn.subgraph (%arg5 = %403: tensor<1x40x23x40xbf16>, %arg6 = %arg3: tensor<1x40x23x40xbf16>) attributes { LayerName = "Mul_386", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], OutputName = "Mul_386", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x40x23x40xbf16>, %arg8 = %arg6: tensor<1x40x23x40xbf16>) attributes { LayerName = "Mul_386", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], OutputName = "Mul_386", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_386", OutputName = "Mul_386", shift = 0 : i8} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc255) xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc255) } -> tensor<1x40x23x40xbf16> loc(#loc255) xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc255) } -> tensor<1x40x23x40xbf16> loc(#loc255) %405 = xten_nn.subgraph (%arg5 = %401: tensor<1x40x23x40xbf16>, %arg6 = %arg3: tensor<1x40x23x40xbf16>) attributes { LayerName = "Mul_380", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "822", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "821", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], OutputName = "Mul_380", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "824", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x40x23x40xbf16>, %arg8 = %arg6: tensor<1x40x23x40xbf16>) attributes { LayerName = "Mul_380", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "822", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "821", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], OutputName = "Mul_380", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "824", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_380", OutputName = "Mul_380", shift = 0 : i8} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc256) xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc256) } -> tensor<1x40x23x40xbf16> loc(#loc256) xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc256) } -> tensor<1x40x23x40xbf16> loc(#loc256) %406 = xten_nn.subgraph (%arg5 = %397: tensor<1x40x23x40xbf16>, %arg6 = %405: tensor<1x40x23x40xbf16>) attributes { Axis = 1 : i32, LayerName = "Concat_381", Op = "Concat", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "818", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "824", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], OutputName = "Concat_381", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "PseudoOp", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "825", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> } ], current_data_format = "NCHW", data_format = "HCWN"} { %461 = tosa.concat %arg5, %arg6 { LayerName = "Concat_381", OutputName = "Concat_381", axis = 1 : i32} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x80x23x40xbf16> loc(#loc257) xten_nn.output %461 : tensor<1x80x23x40xbf16> loc(#loc257) } -> tensor<1x80x23x40xbf16> loc(#loc257) %407 = xten_nn.subgraph (%arg5 = %406: tensor<1x80x23x40xbf16>, %arg6 = %22: tensor<40x80x3x3xbf16>, %arg7 = %21: tensor<40xbf16>) attributes { LayerName = "Conv_382", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "825", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> }, { Name = "824", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[40, 80, 3, 3]> : vector<4xindex> }, { Name = "decoder.decode3.gru.hh.0.weight", UnknownDataFormat = true } ], OutputName = "Conv_382", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "826", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x80x23x40xbf16>, %arg9 = %arg6: tensor<40x80x3x3xbf16>, %arg10 = %arg7: tensor<40xbf16>) attributes { Dilations = array, HWPadding = [[1, 1], [1, 1]], LayerName = "Conv_382", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "825", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> }, { Name = "824", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[40, 80, 3, 3]> : vector<4xindex> }, { Name = "decoder.decode3.gru.hh.0.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_382", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "826", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 3 : ui8, config.ksize.width = 3 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = tosa.transpose %arg9, %463 : (tensor<40x80x3x3xbf16>, tensor<4xi32>) -> tensor<40x3x3x80xbf16> loc(#loc258) %465 = tosa.transpose %arg8, %463 : (tensor<1x80x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x80xbf16> loc(#loc258) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_382", PartOfOutputName = "Conv_382", dilation = array, pad = array, stride = array} : (tensor<1x23x40x80xbf16>, tensor<40x3x3x80xbf16>, tensor<40xbf16>) -> tensor<1x23x40x40xbf16> loc(#loc258) %467 = tosa.transpose %466, %462 : (tensor<1x23x40x40xbf16>, tensor<4xi32>) -> tensor<1x40x23x40xbf16> loc(#loc258) xten_nn.output %467 : tensor<1x40x23x40xbf16> loc(#loc258) } -> tensor<1x40x23x40xbf16> loc(#loc258) xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc258) } -> tensor<1x40x23x40xbf16> loc(#loc258) %408 = xten_nn.subgraph (%arg5 = %407: tensor<1x40x23x40xbf16>) attributes { LayerName = "Tanh_383", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "826", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], OutputName = "Tanh_383", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "827", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x40x23x40xbf16>) attributes { LayerName = "Tanh_383", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "826", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], OutputName = "Tanh_383", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "827", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], Specializes = "TanhTemplatedBf16", Traits = { Elementwise = true, Unary = true }, With = { config.ENABLE_FP16_AS_BF16 = 0 : ui8, config.aie_arch = "aie2p", config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.tanh %arg6 {LayerName = "Tanh_383", OutputName = "Tanh_383"} : (tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc259) xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc259) } -> tensor<1x40x23x40xbf16> loc(#loc259) xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc259) } -> tensor<1x40x23x40xbf16> loc(#loc259) %409 = xten_nn.subgraph (%arg5 = %402: tensor<1x40x23x40xbf16>, %arg6 = %408: tensor<1x40x23x40xbf16>) attributes { LayerName = "Mul_387", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "823", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "827", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], OutputName = "Mul_387", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "831", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x40x23x40xbf16>, %arg8 = %arg6: tensor<1x40x23x40xbf16>) attributes { LayerName = "Mul_387", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "823", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "827", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], OutputName = "Mul_387", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "831", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_387", OutputName = "Mul_387", shift = 0 : i8} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc260) xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc260) } -> tensor<1x40x23x40xbf16> loc(#loc260) xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc260) } -> tensor<1x40x23x40xbf16> loc(#loc260) %410 = xten_nn.subgraph (%arg5 = %404: tensor<1x40x23x40xbf16>, %arg6 = %409: tensor<1x40x23x40xbf16>) attributes { LayerName = "Add_388", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "830", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "829", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], OutputName = "Add_388", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "832", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x40x23x40xbf16>, %arg8 = %arg6: tensor<1x40x23x40xbf16>) attributes { LayerName = "Add_388", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "830", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "829", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], OutputName = "Add_388", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "832", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], Specializes = "AddBf16", Traits = { Binary = true, Elementwise = true }, With = { config.act = 0 : ui8, config.act_type = "LINEAR", config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.add %arg7, %arg8 {LayerName = "Add_388", OutputName = "Add_388"} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc261) xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc261) } -> tensor<1x40x23x40xbf16> loc(#loc261) xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc261) } -> tensor<1x40x23x40xbf16> loc(#loc261) %411 = xten_nn.subgraph (%arg5 = %396: tensor<1x40x23x40xbf16>, %arg6 = %410: tensor<1x40x23x40xbf16>) attributes { Axis = 1 : i32, LayerName = "Concat_389", Op = "Concat", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "817", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "816", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> } ], OutputName = "Concat_389", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "PseudoOp", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "833", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> } ], current_data_format = "NCHW", data_format = "HCWN"} { %461 = tosa.concat %arg5, %arg6 { LayerName = "Concat_389", OutputName = "Concat_389", axis = 1 : i32} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x80x23x40xbf16> loc(#loc262) xten_nn.output %461 : tensor<1x80x23x40xbf16> loc(#loc262) } -> tensor<1x80x23x40xbf16> loc(#loc262) %412 = xten_nn.subgraph (%arg5 = %411: tensor<1x80x23x40xbf16>) attributes { LayerName = "Resize_391", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "833", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> } ], OutputName = "Resize_391", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "838", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 46, 80]> : vector<4xindex> } ], Specializes = "ResizeAdf", With = { config.co_trans_mode = 1 : ui32, config.dim_0 = 1 : ui32, config.dim_1 = 80 : ui32, config.dim_2 = 23 : ui32, config.dim_3 = 40 : ui32, config.dtype = "bfloat16", config.mode = 1 : ui32, config.nearest_mode = 0 : ui32, config.output_H = 46 : ui32, config.output_W = 80 : ui32 }} { %461 = xten_nn.resize %arg5 { LayerName = "Resize_391", OutputName = "Resize_391", coordinate_transformation_mode = 1 : i64, mode = 1 : i64, nearest_mode = 0 : i64, scales = array} : (tensor<1x80x23x40xbf16>) -> tensor<1x80x46x80xbf16> loc(#loc263) xten_nn.output %461 : tensor<1x80x46x80xbf16> loc(#loc263) } -> tensor<1x80x46x80xbf16> loc(#loc263) %413 = xten_nn.subgraph (%arg5 = %412: tensor<1x80x46x80xbf16>) attributes { LayerName = "Slice_397", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "838", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 46, 80]> : vector<4xindex> } ], OutputName = "Slice_397", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "848", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 45, 80]> : vector<4xindex> } ], Specializes = "SliceHCWC8Adf", With = { config.aie_arch = "aie2p", config.axis_letter = "H", config.dim_c = 80 : ui32, config.dim_h = 46 : ui32, config.dim_w = 80 : ui32, config.dtype = "bfloat16", config.end = 45 : ui32, config.start = 0 : ui32, config.step = 1 : ui32 }} { %461 = tosa.slice %arg5 { LayerName = "Slice_397", OutputName = "Slice_397", size = array, start = array} : (tensor<1x80x46x80xbf16>) -> tensor<1x80x45x80xbf16> loc(#loc264) xten_nn.output %461 : tensor<1x80x45x80xbf16> loc(#loc264) } -> tensor<1x80x45x80xbf16> loc(#loc264) %414 = xten_nn.subgraph (%arg5 = %413: tensor<1x80x45x80xbf16>, %arg6 = %184: tensor<1x24x45x80xbf16>, %arg7 = %168: tensor<1x3x45x80xbf16>) attributes { Axis = 1 : i32, LayerName = "Concat_398", Op = "Concat", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "848", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 80, 45, 80]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "838", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "434", Port = "data_io.ifm3", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 3, 45, 80]> : vector<4xindex> } ], OutputName = "Concat_398", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "PseudoOp", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "849", Port = "data_io.ofm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 107, 45, 80]> : vector<4xindex> } ], current_data_format = "NCHW", data_format = "HCWN"} { %461 = tosa.concat %arg5, %arg6, %arg7 { LayerName = "Concat_398", OutputName = "Concat_398", axis = 1 : i32} : (tensor<1x80x45x80xbf16>, tensor<1x24x45x80xbf16>, tensor<1x3x45x80xbf16>) -> tensor<1x107x45x80xbf16> loc(#loc265) xten_nn.output %461 : tensor<1x107x45x80xbf16> loc(#loc265) } -> tensor<1x107x45x80xbf16> loc(#loc265) %415 = xten_nn.subgraph (%arg5 = %414: tensor<1x107x45x80xbf16>, %arg6 = %20: tensor<40x107x3x3xbf16>, %arg7 = %19: tensor<40xbf16>) attributes { LayerName = "Conv_399", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "849", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 107, 45, 80]> : vector<4xindex> }, { Name = "848", UnknownDataFormat = true, l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[40, 107, 3, 3]> : vector<4xindex> }, { Name = "849", UnknownDataFormat = true } ], OutputName = "Relu_400", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "852", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x107x45x80xbf16>, %arg9 = %arg6: tensor<40x107x3x3xbf16>, %arg10 = %arg7: tensor<40xbf16>) attributes { Dilations = array, HWPadding = [[1, 1], [1, 1]], LayerName = "Conv_399", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "849", Port = "data_io.ifm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 107, 45, 80]> : vector<4xindex> }, { Name = "848", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[40, 107, 3, 3]> : vector<4xindex> }, { Name = "849", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_400", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "852", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true, NonNegativeOut = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 1 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 3 : ui8, config.ksize.width = 3 : ui8, config.lrelu_alpha = 0.000000e+00 : bf16, config.lrelu_alpha_kernel = 0.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = tosa.transpose %arg9, %463 : (tensor<40x107x3x3xbf16>, tensor<4xi32>) -> tensor<40x3x3x107xbf16> loc(#loc361) %465 = tosa.transpose %arg8, %463 : (tensor<1x107x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x107xbf16> loc(#loc361) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_399", PartOfOutputName = "Conv_399", dilation = array, pad = array, stride = array} : (tensor<1x45x80x107xbf16>, tensor<40x3x3x107xbf16>, tensor<40xbf16>) -> tensor<1x45x80x40xbf16> loc(#loc266) %467 = tosa.clamp %466 { LayerName = "Relu_400", OutputName = "Relu_400", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x45x80x40xbf16>) -> tensor<1x45x80x40xbf16> loc(#loc267) %468 = tosa.transpose %467, %462 : (tensor<1x45x80x40xbf16>, tensor<4xi32>) -> tensor<1x40x45x80xbf16> loc(#loc361) xten_nn.output %468 : tensor<1x40x45x80xbf16> loc(#loc267) } -> tensor<1x40x45x80xbf16> loc(#loc361) xten_nn.output %461 : tensor<1x40x45x80xbf16> loc(#loc361) } -> tensor<1x40x45x80xbf16> loc(#loc361) %416 = xten_nn.subgraph (%arg5 = %415: tensor<1x40x45x80xbf16>) attributes { LayerName = "Split_401_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "852", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> } ], OutputName = "Split_401_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "853", Port = "data_io.ofm", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], Specializes = "SliceHCWC8Adf", With = { config.aie_arch = "aie2p", config.axis_letter = "C", config.dim_c = 40 : ui32, config.dim_h = 45 : ui32, config.dim_w = 80 : ui32, config.dtype = "bfloat16", config.end = 20 : ui32, config.start = 0 : ui32, config.step = 1 : ui32 }} { %461 = tosa.slice %arg5 { PartOfLayerName = "Split_401", PartOfOutputName = "Split_401", size = array, start = array} : (tensor<1x40x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc268) xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc268) } -> tensor<1x20x45x80xbf16> loc(#loc268) %417 = xten_nn.subgraph (%arg5 = %415: tensor<1x40x45x80xbf16>) attributes { LayerName = "Split_401_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "852", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> } ], OutputName = "Split_401_Duplicated#1", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "853", Port = "data_io.ofm", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], Specializes = "SliceHCWC8Adf", With = { config.aie_arch = "aie2p", config.axis_letter = "C", config.dim_c = 40 : ui32, config.dim_h = 45 : ui32, config.dim_w = 80 : ui32, config.dtype = "bfloat16", config.end = 40 : ui32, config.start = 20 : ui32, config.step = 1 : ui32 }} { %461 = tosa.slice %arg5 { PartOfLayerName = "Split_401", PartOfOutputName = "Split_401", size = array, start = array} : (tensor<1x40x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc268) xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc268) } -> tensor<1x20x45x80xbf16> loc(#loc268) %418 = xten_nn.subgraph (%arg5 = %417: tensor<1x20x45x80xbf16>, %arg6 = %arg2: tensor<1x20x45x80xbf16>) attributes { LayerName = "Concat_402", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ifm1", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ifm2", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], OutputName = "Concat_402", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "855", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> } ], Specializes = "ConcatC8Adf", With = { config.aie_arch = "aie2p", config.dtype = "bfloat16", config.in1_dim_c = 24 : ui32, config.in1_dim_h = 45 : ui32, config.in1_dim_w = 80 : ui32, config.in2_dim_c = 24 : ui32, config.in2_dim_h = 45 : ui32, config.in2_dim_w = 80 : ui32, config.num_eff_concat_input0_size = 20 : ui32, config.num_eff_concat_input0_start = 0 : ui32, config.num_eff_concat_input1_size = 20 : ui32, config.num_eff_concat_input1_start = 0 : ui32 }} { %461 = tosa.concat %arg5, %arg6 { LayerName = "Concat_402", OutputName = "Concat_402", axis = 1 : i32} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x40x45x80xbf16> loc(#loc269) xten_nn.output %461 : tensor<1x40x45x80xbf16> loc(#loc269) } -> tensor<1x40x45x80xbf16> loc(#loc269) %419 = xten_nn.subgraph (%arg5 = %418: tensor<1x40x45x80xbf16>, %arg6 = %18: tensor<40x40x3x3xbf16>, %arg7 = %17: tensor<40xbf16>) attributes { LayerName = "Conv_403", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "855", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> }, { Name = "395", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[40, 40, 3, 3]> : vector<4xindex> }, { Name = "decoder.decode2.gru.ih.0.weight", UnknownDataFormat = true } ], OutputName = "Conv_403", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "856", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x40x45x80xbf16>, %arg9 = %arg6: tensor<40x40x3x3xbf16>, %arg10 = %arg7: tensor<40xbf16>) attributes { Dilations = array, HWPadding = [[1, 1], [1, 1]], LayerName = "Conv_403", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "855", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> }, { Name = "395", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[40, 40, 3, 3]> : vector<4xindex> }, { Name = "decoder.decode2.gru.ih.0.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_403", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "856", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 3 : ui8, config.ksize.width = 3 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = tosa.transpose %arg9, %463 : (tensor<40x40x3x3xbf16>, tensor<4xi32>) -> tensor<40x3x3x40xbf16> loc(#loc270) %465 = tosa.transpose %arg8, %463 : (tensor<1x40x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x40xbf16> loc(#loc270) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_403", PartOfOutputName = "Conv_403", dilation = array, pad = array, stride = array} : (tensor<1x45x80x40xbf16>, tensor<40x3x3x40xbf16>, tensor<40xbf16>) -> tensor<1x45x80x40xbf16> loc(#loc270) %467 = tosa.transpose %466, %462 : (tensor<1x45x80x40xbf16>, tensor<4xi32>) -> tensor<1x40x45x80xbf16> loc(#loc270) xten_nn.output %467 : tensor<1x40x45x80xbf16> loc(#loc270) } -> tensor<1x40x45x80xbf16> loc(#loc270) xten_nn.output %461 : tensor<1x40x45x80xbf16> loc(#loc270) } -> tensor<1x40x45x80xbf16> loc(#loc270) %420 = xten_nn.subgraph (%arg5 = %419: tensor<1x40x45x80xbf16>) attributes { LayerName = "Sigmoid_404", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "856", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> } ], OutputName = "Sigmoid_404", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "857", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x40x45x80xbf16>) attributes { LayerName = "Sigmoid_404", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "856", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> } ], OutputName = "Sigmoid_404", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "857", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> } ], Specializes = "SigmoidTemplatedBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.ENABLE_FP16_AS_BF16 = 0 : ui8, config.aie_arch = "aie2p", config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.sigmoid %arg6 {LayerName = "Sigmoid_404", OutputName = "Sigmoid_404"} : (tensor<1x40x45x80xbf16>) -> tensor<1x40x45x80xbf16> loc(#loc271) xten_nn.output %462 : tensor<1x40x45x80xbf16> loc(#loc271) } -> tensor<1x40x45x80xbf16> loc(#loc271) xten_nn.output %461 : tensor<1x40x45x80xbf16> loc(#loc271) } -> tensor<1x40x45x80xbf16> loc(#loc271) %421 = xten_nn.subgraph (%arg5 = %420: tensor<1x40x45x80xbf16>) attributes { LayerName = "Split_405_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "857", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> } ], OutputName = "Split_405_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "858", Port = "data_io.ofm", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], Specializes = "SliceHCWC8Adf", With = { config.aie_arch = "aie2p", config.axis_letter = "C", config.dim_c = 40 : ui32, config.dim_h = 45 : ui32, config.dim_w = 80 : ui32, config.dtype = "bfloat16", config.end = 20 : ui32, config.start = 0 : ui32, config.step = 1 : ui32 }} { %461 = tosa.slice %arg5 { PartOfLayerName = "Split_405", PartOfOutputName = "Split_405", size = array, start = array} : (tensor<1x40x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc272) xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc272) } -> tensor<1x20x45x80xbf16> loc(#loc272) %422 = xten_nn.subgraph (%arg5 = %420: tensor<1x40x45x80xbf16>) attributes { LayerName = "Split_405_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "857", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> } ], OutputName = "Split_405_Duplicated#1", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "858", Port = "data_io.ofm", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], Specializes = "SliceHCWC8Adf", With = { config.aie_arch = "aie2p", config.axis_letter = "C", config.dim_c = 40 : ui32, config.dim_h = 45 : ui32, config.dim_w = 80 : ui32, config.dtype = "bfloat16", config.end = 40 : ui32, config.start = 20 : ui32, config.step = 1 : ui32 }} { %461 = tosa.slice %arg5 { PartOfLayerName = "Split_405", PartOfOutputName = "Split_405", size = array, start = array} : (tensor<1x40x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc272) xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc272) } -> tensor<1x20x45x80xbf16> loc(#loc272) %423 = xten_nn.subgraph (%arg5 = %16: tensor<1x20x45x80xbf16>, %arg6 = %422: tensor<1x20x45x80xbf16>) attributes { LayerName = "Sub_411", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "890", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "859", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], OutputName = "Sub_411", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "865", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x20x45x80xbf16>, %arg8 = %arg6: tensor<1x20x45x80xbf16>) attributes { LayerName = "Sub_411", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "890", Port = "data_io.ifm1", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "859", Port = "data_io.ifm2", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], OutputName = "Sub_411", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "865", Port = "data_io.ofm", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], Specializes = "SubBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.sub %arg7, %arg8 {LayerName = "Sub_411", OutputName = "Sub_411"} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc3) xten_nn.output %462 : tensor<1x20x45x80xbf16> loc(#loc3) } -> tensor<1x20x45x80xbf16> loc(#loc3) xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc3) } -> tensor<1x20x45x80xbf16> loc(#loc3) %424 = xten_nn.subgraph (%arg5 = %423: tensor<1x20x45x80xbf16>, %arg6 = %arg2: tensor<1x20x45x80xbf16>) attributes { LayerName = "Mul_412", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], OutputName = "Mul_412", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x20x45x80xbf16>, %arg8 = %arg6: tensor<1x20x45x80xbf16>) attributes { LayerName = "Mul_412", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ifm1", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ifm2", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], OutputName = "Mul_412", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ofm", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_412", OutputName = "Mul_412", shift = 0 : i8} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc273) xten_nn.output %462 : tensor<1x20x45x80xbf16> loc(#loc273) } -> tensor<1x20x45x80xbf16> loc(#loc273) xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc273) } -> tensor<1x20x45x80xbf16> loc(#loc273) %425 = xten_nn.subgraph (%arg5 = %421: tensor<1x20x45x80xbf16>, %arg6 = %arg2: tensor<1x20x45x80xbf16>) attributes { LayerName = "Mul_406", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "858", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "857", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], OutputName = "Mul_406", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "860", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x20x45x80xbf16>, %arg8 = %arg6: tensor<1x20x45x80xbf16>) attributes { LayerName = "Mul_406", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "858", Port = "data_io.ifm1", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "857", Port = "data_io.ifm2", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], OutputName = "Mul_406", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "860", Port = "data_io.ofm", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_406", OutputName = "Mul_406", shift = 0 : i8} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc274) xten_nn.output %462 : tensor<1x20x45x80xbf16> loc(#loc274) } -> tensor<1x20x45x80xbf16> loc(#loc274) xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc274) } -> tensor<1x20x45x80xbf16> loc(#loc274) %426 = xten_nn.subgraph (%arg5 = %417: tensor<1x20x45x80xbf16>, %arg6 = %425: tensor<1x20x45x80xbf16>) attributes { LayerName = "Concat_407", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "854", Port = "data_io.ifm1", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "860", Port = "data_io.ifm2", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], OutputName = "Concat_407", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "861", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> } ], Specializes = "ConcatC8Adf", With = { config.aie_arch = "aie2p", config.dtype = "bfloat16", config.in1_dim_c = 24 : ui32, config.in1_dim_h = 45 : ui32, config.in1_dim_w = 80 : ui32, config.in2_dim_c = 24 : ui32, config.in2_dim_h = 45 : ui32, config.in2_dim_w = 80 : ui32, config.num_eff_concat_input0_size = 20 : ui32, config.num_eff_concat_input0_start = 0 : ui32, config.num_eff_concat_input1_size = 20 : ui32, config.num_eff_concat_input1_start = 0 : ui32 }} { %461 = tosa.concat %arg5, %arg6 { LayerName = "Concat_407", OutputName = "Concat_407", axis = 1 : i32} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x40x45x80xbf16> loc(#loc275) xten_nn.output %461 : tensor<1x40x45x80xbf16> loc(#loc275) } -> tensor<1x40x45x80xbf16> loc(#loc275) %427 = xten_nn.subgraph (%arg5 = %426: tensor<1x40x45x80xbf16>, %arg6 = %15: tensor<20x40x3x3xbf16>, %arg7 = %14: tensor<20xbf16>) attributes { LayerName = "Conv_408", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "861", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> }, { Name = "860", UnknownDataFormat = true, l3_extend_end = dense<[4, 0, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[20, 40, 3, 3]> : vector<4xindex> }, { Name = "decoder.decode2.gru.hh.0.weight", UnknownDataFormat = true } ], OutputName = "Conv_408", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "862", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x40x45x80xbf16>, %arg9 = %arg6: tensor<20x40x3x3xbf16>, %arg10 = %arg7: tensor<20xbf16>) attributes { Dilations = array, HWPadding = [[1, 1], [1, 1]], LayerName = "Conv_408", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "861", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> }, { Name = "860", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<[4, 0, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[20, 40, 3, 3]> : vector<4xindex> }, { Name = "decoder.decode2.gru.hh.0.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_408", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "862", Port = "data_io.ofm", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 3 : ui8, config.ksize.width = 3 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = tosa.transpose %arg9, %463 : (tensor<20x40x3x3xbf16>, tensor<4xi32>) -> tensor<20x3x3x40xbf16> loc(#loc276) %465 = tosa.transpose %arg8, %463 : (tensor<1x40x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x40xbf16> loc(#loc276) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_408", PartOfOutputName = "Conv_408", dilation = array, pad = array, stride = array} : (tensor<1x45x80x40xbf16>, tensor<20x3x3x40xbf16>, tensor<20xbf16>) -> tensor<1x45x80x20xbf16> loc(#loc276) %467 = tosa.transpose %466, %462 : (tensor<1x45x80x20xbf16>, tensor<4xi32>) -> tensor<1x20x45x80xbf16> loc(#loc276) xten_nn.output %467 : tensor<1x20x45x80xbf16> loc(#loc276) } -> tensor<1x20x45x80xbf16> loc(#loc276) xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc276) } -> tensor<1x20x45x80xbf16> loc(#loc276) %428 = xten_nn.subgraph (%arg5 = %427: tensor<1x20x45x80xbf16>) attributes { LayerName = "Tanh_409", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "862", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], OutputName = "Tanh_409", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "863", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x20x45x80xbf16>) attributes { LayerName = "Tanh_409", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "862", Port = "data_io.ifm", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], OutputName = "Tanh_409", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "863", Port = "data_io.ofm", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], Specializes = "TanhTemplatedBf16", Traits = { Elementwise = true, Unary = true }, With = { config.ENABLE_FP16_AS_BF16 = 0 : ui8, config.aie_arch = "aie2p", config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.tanh %arg6 {LayerName = "Tanh_409", OutputName = "Tanh_409"} : (tensor<1x20x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc277) xten_nn.output %462 : tensor<1x20x45x80xbf16> loc(#loc277) } -> tensor<1x20x45x80xbf16> loc(#loc277) xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc277) } -> tensor<1x20x45x80xbf16> loc(#loc277) %429 = xten_nn.subgraph (%arg5 = %422: tensor<1x20x45x80xbf16>, %arg6 = %428: tensor<1x20x45x80xbf16>) attributes { LayerName = "Mul_413", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "859", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "863", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], OutputName = "Mul_413", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "867", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x20x45x80xbf16>, %arg8 = %arg6: tensor<1x20x45x80xbf16>) attributes { LayerName = "Mul_413", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "859", Port = "data_io.ifm1", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "863", Port = "data_io.ifm2", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], OutputName = "Mul_413", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "867", Port = "data_io.ofm", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_413", OutputName = "Mul_413", shift = 0 : i8} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc278) xten_nn.output %462 : tensor<1x20x45x80xbf16> loc(#loc278) } -> tensor<1x20x45x80xbf16> loc(#loc278) xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc278) } -> tensor<1x20x45x80xbf16> loc(#loc278) %430 = xten_nn.subgraph (%arg5 = %424: tensor<1x20x45x80xbf16>, %arg6 = %429: tensor<1x20x45x80xbf16>) attributes { LayerName = "Add_414", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "866", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "865", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], OutputName = "Add_414", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "868", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x20x45x80xbf16>, %arg8 = %arg6: tensor<1x20x45x80xbf16>) attributes { LayerName = "Add_414", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "866", Port = "data_io.ifm1", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "865", Port = "data_io.ifm2", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], OutputName = "Add_414", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "868", Port = "data_io.ofm", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], Specializes = "AddBf16", Traits = { Binary = true, Elementwise = true }, With = { config.act = 0 : ui8, config.act_type = "LINEAR", config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.add %arg7, %arg8 {LayerName = "Add_414", OutputName = "Add_414"} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc279) xten_nn.output %462 : tensor<1x20x45x80xbf16> loc(#loc279) } -> tensor<1x20x45x80xbf16> loc(#loc279) xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc279) } -> tensor<1x20x45x80xbf16> loc(#loc279) %431 = xten_nn.subgraph (%arg5 = %416: tensor<1x20x45x80xbf16>, %arg6 = %430: tensor<1x20x45x80xbf16>) attributes { LayerName = "Concat_415", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "853", Port = "data_io.ifm1", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "852", Port = "data_io.ifm2", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> } ], OutputName = "Concat_415", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "869", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> } ], Specializes = "ConcatC8Adf", With = { config.aie_arch = "aie2p", config.dtype = "bfloat16", config.in1_dim_c = 24 : ui32, config.in1_dim_h = 45 : ui32, config.in1_dim_w = 80 : ui32, config.in2_dim_c = 24 : ui32, config.in2_dim_h = 45 : ui32, config.in2_dim_w = 80 : ui32, config.num_eff_concat_input0_size = 20 : ui32, config.num_eff_concat_input0_start = 0 : ui32, config.num_eff_concat_input1_size = 20 : ui32, config.num_eff_concat_input1_start = 0 : ui32 }} { %461 = tosa.concat %arg5, %arg6 { LayerName = "Concat_415", OutputName = "Concat_415", axis = 1 : i32} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x40x45x80xbf16> loc(#loc280) xten_nn.output %461 : tensor<1x40x45x80xbf16> loc(#loc280) } -> tensor<1x40x45x80xbf16> loc(#loc280) %432 = xten_nn.subgraph (%arg5 = %431: tensor<1x40x45x80xbf16>) attributes { LayerName = "Resize_417", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "869", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> } ], OutputName = "Resize_417", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "874", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 90, 160]> : vector<4xindex> } ], Specializes = "ResizeAdf", With = { config.co_trans_mode = 1 : ui32, config.dim_0 = 1 : ui32, config.dim_1 = 40 : ui32, config.dim_2 = 45 : ui32, config.dim_3 = 80 : ui32, config.dtype = "bfloat16", config.mode = 1 : ui32, config.nearest_mode = 0 : ui32, config.output_H = 90 : ui32, config.output_W = 160 : ui32 }} { %461 = xten_nn.resize %arg5 { LayerName = "Resize_417", OutputName = "Resize_417", coordinate_transformation_mode = 1 : i64, mode = 1 : i64, nearest_mode = 0 : i64, scales = array} : (tensor<1x40x45x80xbf16>) -> tensor<1x40x90x160xbf16> loc(#loc281) xten_nn.output %461 : tensor<1x40x90x160xbf16> loc(#loc281) } -> tensor<1x40x90x160xbf16> loc(#loc281) %433 = xten_nn.subgraph (%arg5 = %432: tensor<1x40x90x160xbf16>, %arg6 = %178: tensor<1x16x90x160xbf16>, %arg7 = %167: tensor<1x3x90x160xbf16>) attributes { Axis = 1 : i32, LayerName = "Concat_418", Op = "Concat", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "874", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 40, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "869", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "417", Port = "data_io.ifm3", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 3, 90, 160]> : vector<4xindex> } ], OutputName = "Concat_418", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "PseudoOp", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "875", Port = "data_io.ofm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 59, 90, 160]> : vector<4xindex> } ], current_data_format = "NCHW", data_format = "HCWN"} { %461 = tosa.concat %arg5, %arg6, %arg7 { LayerName = "Concat_418", OutputName = "Concat_418", axis = 1 : i32} : (tensor<1x40x90x160xbf16>, tensor<1x16x90x160xbf16>, tensor<1x3x90x160xbf16>) -> tensor<1x59x90x160xbf16> loc(#loc282) xten_nn.output %461 : tensor<1x59x90x160xbf16> loc(#loc282) } -> tensor<1x59x90x160xbf16> loc(#loc282) %434 = xten_nn.subgraph (%arg5 = %433: tensor<1x59x90x160xbf16>, %arg6 = %13: tensor<32x59x3x3xbf16>, %arg7 = %12: tensor<32xbf16>) attributes { LayerName = "Conv_419", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "875", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 59, 90, 160]> : vector<4xindex> }, { Name = "874", UnknownDataFormat = true, l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[32, 59, 3, 3]> : vector<4xindex> }, { Name = "875", UnknownDataFormat = true } ], OutputName = "Relu_420", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "878", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x59x90x160xbf16>, %arg9 = %arg6: tensor<32x59x3x3xbf16>, %arg10 = %arg7: tensor<32xbf16>) attributes { Dilations = array, HWPadding = [[1, 1], [1, 1]], LayerName = "Conv_419", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "875", Port = "data_io.ifm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 59, 90, 160]> : vector<4xindex> }, { Name = "874", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[32, 59, 3, 3]> : vector<4xindex> }, { Name = "875", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_420", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "878", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true, NonNegativeOut = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 1 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 3 : ui8, config.ksize.width = 3 : ui8, config.lrelu_alpha = 0.000000e+00 : bf16, config.lrelu_alpha_kernel = 0.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = tosa.transpose %arg9, %463 : (tensor<32x59x3x3xbf16>, tensor<4xi32>) -> tensor<32x3x3x59xbf16> loc(#loc362) %465 = tosa.transpose %arg8, %463 : (tensor<1x59x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x59xbf16> loc(#loc362) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_419", PartOfOutputName = "Conv_419", dilation = array, pad = array, stride = array} : (tensor<1x90x160x59xbf16>, tensor<32x3x3x59xbf16>, tensor<32xbf16>) -> tensor<1x90x160x32xbf16> loc(#loc283) %467 = tosa.clamp %466 { LayerName = "Relu_420", OutputName = "Relu_420", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x90x160x32xbf16>) -> tensor<1x90x160x32xbf16> loc(#loc284) %468 = tosa.transpose %467, %462 : (tensor<1x90x160x32xbf16>, tensor<4xi32>) -> tensor<1x32x90x160xbf16> loc(#loc362) xten_nn.output %468 : tensor<1x32x90x160xbf16> loc(#loc284) } -> tensor<1x32x90x160xbf16> loc(#loc362) xten_nn.output %461 : tensor<1x32x90x160xbf16> loc(#loc362) } -> tensor<1x32x90x160xbf16> loc(#loc362) %435 = xten_nn.subgraph (%arg5 = %434: tensor<1x32x90x160xbf16>) attributes { LayerName = "Split_421_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "878", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> } ], OutputName = "Split_421_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "879", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], Specializes = "SliceHCWC8Adf", With = { config.aie_arch = "aie2p", config.axis_letter = "C", config.dim_c = 32 : ui32, config.dim_h = 90 : ui32, config.dim_w = 160 : ui32, config.dtype = "bfloat16", config.end = 16 : ui32, config.start = 0 : ui32, config.step = 1 : ui32 }} { %461 = tosa.slice %arg5 { PartOfLayerName = "Split_421", PartOfOutputName = "Split_421", size = array, start = array} : (tensor<1x32x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc285) xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc285) } -> tensor<1x16x90x160xbf16> loc(#loc285) %436 = xten_nn.subgraph (%arg5 = %434: tensor<1x32x90x160xbf16>) attributes { LayerName = "Split_421_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "878", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> } ], OutputName = "Split_421_Duplicated#1", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "879", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], Specializes = "SliceHCWC8Adf", With = { config.aie_arch = "aie2p", config.axis_letter = "C", config.dim_c = 32 : ui32, config.dim_h = 90 : ui32, config.dim_w = 160 : ui32, config.dtype = "bfloat16", config.end = 32 : ui32, config.start = 16 : ui32, config.step = 1 : ui32 }} { %461 = tosa.slice %arg5 { PartOfLayerName = "Split_421", PartOfOutputName = "Split_421", size = array, start = array} : (tensor<1x32x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc285) xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc285) } -> tensor<1x16x90x160xbf16> loc(#loc285) %437 = xten_nn.subgraph (%arg5 = %436: tensor<1x16x90x160xbf16>, %arg6 = %arg1: tensor<1x16x90x160xbf16>) attributes { Axis = 1 : i32, LayerName = "Concat_422", Op = "Concat", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Concat_422", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "PseudoOp", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "881", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> } ], current_data_format = "NCHW", data_format = "HCWN"} { %461 = tosa.concat %arg5, %arg6 { LayerName = "Concat_422", OutputName = "Concat_422", axis = 1 : i32} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x32x90x160xbf16> loc(#loc286) xten_nn.output %461 : tensor<1x32x90x160xbf16> loc(#loc286) } -> tensor<1x32x90x160xbf16> loc(#loc286) %438 = xten_nn.subgraph (%arg5 = %437: tensor<1x32x90x160xbf16>, %arg6 = %11: tensor<32x32x3x3xbf16>, %arg7 = %10: tensor<32xbf16>) attributes { LayerName = "Conv_423", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "881", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> }, { Name = "394", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[32, 32, 3, 3]> : vector<4xindex> }, { Name = "decoder.decode1.gru.ih.0.weight", UnknownDataFormat = true } ], OutputName = "Conv_423", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "882", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x32x90x160xbf16>, %arg9 = %arg6: tensor<32x32x3x3xbf16>, %arg10 = %arg7: tensor<32xbf16>) attributes { Dilations = array, HWPadding = [[1, 1], [1, 1]], LayerName = "Conv_423", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "881", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> }, { Name = "394", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[32, 32, 3, 3]> : vector<4xindex> }, { Name = "decoder.decode1.gru.ih.0.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_423", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "882", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 3 : ui8, config.ksize.width = 3 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = tosa.transpose %arg9, %463 : (tensor<32x32x3x3xbf16>, tensor<4xi32>) -> tensor<32x3x3x32xbf16> loc(#loc287) %465 = tosa.transpose %arg8, %463 : (tensor<1x32x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x32xbf16> loc(#loc287) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_423", PartOfOutputName = "Conv_423", dilation = array, pad = array, stride = array} : (tensor<1x90x160x32xbf16>, tensor<32x3x3x32xbf16>, tensor<32xbf16>) -> tensor<1x90x160x32xbf16> loc(#loc287) %467 = tosa.transpose %466, %462 : (tensor<1x90x160x32xbf16>, tensor<4xi32>) -> tensor<1x32x90x160xbf16> loc(#loc287) xten_nn.output %467 : tensor<1x32x90x160xbf16> loc(#loc287) } -> tensor<1x32x90x160xbf16> loc(#loc287) xten_nn.output %461 : tensor<1x32x90x160xbf16> loc(#loc287) } -> tensor<1x32x90x160xbf16> loc(#loc287) %439 = xten_nn.subgraph (%arg5 = %438: tensor<1x32x90x160xbf16>) attributes { LayerName = "Sigmoid_424", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "882", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> } ], OutputName = "Sigmoid_424", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "883", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x32x90x160xbf16>) attributes { LayerName = "Sigmoid_424", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "882", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> } ], OutputName = "Sigmoid_424", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "883", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> } ], Specializes = "SigmoidTemplatedBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.ENABLE_FP16_AS_BF16 = 0 : ui8, config.aie_arch = "aie2p", config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.sigmoid %arg6 {LayerName = "Sigmoid_424", OutputName = "Sigmoid_424"} : (tensor<1x32x90x160xbf16>) -> tensor<1x32x90x160xbf16> loc(#loc288) xten_nn.output %462 : tensor<1x32x90x160xbf16> loc(#loc288) } -> tensor<1x32x90x160xbf16> loc(#loc288) xten_nn.output %461 : tensor<1x32x90x160xbf16> loc(#loc288) } -> tensor<1x32x90x160xbf16> loc(#loc288) %440 = xten_nn.subgraph (%arg5 = %439: tensor<1x32x90x160xbf16>) attributes { LayerName = "Split_425_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "883", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> } ], OutputName = "Split_425_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "884", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], Specializes = "SliceHCWC8Adf", With = { config.aie_arch = "aie2p", config.axis_letter = "C", config.dim_c = 32 : ui32, config.dim_h = 90 : ui32, config.dim_w = 160 : ui32, config.dtype = "bfloat16", config.end = 16 : ui32, config.start = 0 : ui32, config.step = 1 : ui32 }} { %461 = tosa.slice %arg5 { PartOfLayerName = "Split_425", PartOfOutputName = "Split_425", size = array, start = array} : (tensor<1x32x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc289) xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc289) } -> tensor<1x16x90x160xbf16> loc(#loc289) %441 = xten_nn.subgraph (%arg5 = %439: tensor<1x32x90x160xbf16>) attributes { LayerName = "Split_425_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "883", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> } ], OutputName = "Split_425_Duplicated#1", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "884", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], Specializes = "SliceHCWC8Adf", With = { config.aie_arch = "aie2p", config.axis_letter = "C", config.dim_c = 32 : ui32, config.dim_h = 90 : ui32, config.dim_w = 160 : ui32, config.dtype = "bfloat16", config.end = 32 : ui32, config.start = 16 : ui32, config.step = 1 : ui32 }} { %461 = tosa.slice %arg5 { PartOfLayerName = "Split_425", PartOfOutputName = "Split_425", size = array, start = array} : (tensor<1x32x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc289) xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc289) } -> tensor<1x16x90x160xbf16> loc(#loc289) %442 = xten_nn.subgraph (%arg5 = %9: tensor<1x16x90x160xbf16>, %arg6 = %441: tensor<1x16x90x160xbf16>) attributes { LayerName = "Sub_431", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "890", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "885", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Sub_431", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "891", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x16x90x160xbf16>, %arg8 = %arg6: tensor<1x16x90x160xbf16>) attributes { LayerName = "Sub_431", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "890", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "885", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Sub_431", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "891", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], Specializes = "SubBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.sub %arg7, %arg8 {LayerName = "Sub_431", OutputName = "Sub_431"} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc2) xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc2) } -> tensor<1x16x90x160xbf16> loc(#loc2) xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc2) } -> tensor<1x16x90x160xbf16> loc(#loc2) %443 = xten_nn.subgraph (%arg5 = %442: tensor<1x16x90x160xbf16>, %arg6 = %arg1: tensor<1x16x90x160xbf16>) attributes { LayerName = "Mul_432", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Mul_432", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x16x90x160xbf16>, %arg8 = %arg6: tensor<1x16x90x160xbf16>) attributes { LayerName = "Mul_432", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Mul_432", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_432", OutputName = "Mul_432", shift = 0 : i8} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc290) xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc290) } -> tensor<1x16x90x160xbf16> loc(#loc290) xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc290) } -> tensor<1x16x90x160xbf16> loc(#loc290) %444 = xten_nn.subgraph (%arg5 = %440: tensor<1x16x90x160xbf16>, %arg6 = %arg1: tensor<1x16x90x160xbf16>) attributes { LayerName = "Mul_426", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "884", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "883", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Mul_426", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "886", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x16x90x160xbf16>, %arg8 = %arg6: tensor<1x16x90x160xbf16>) attributes { LayerName = "Mul_426", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "884", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "883", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Mul_426", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "886", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_426", OutputName = "Mul_426", shift = 0 : i8} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc291) xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc291) } -> tensor<1x16x90x160xbf16> loc(#loc291) xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc291) } -> tensor<1x16x90x160xbf16> loc(#loc291) %445 = xten_nn.subgraph (%arg5 = %436: tensor<1x16x90x160xbf16>, %arg6 = %444: tensor<1x16x90x160xbf16>) attributes { Axis = 1 : i32, LayerName = "Concat_427", Op = "Concat", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "880", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "886", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Concat_427", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "PseudoOp", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "887", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> } ], current_data_format = "NCHW", data_format = "HCWN"} { %461 = tosa.concat %arg5, %arg6 { LayerName = "Concat_427", OutputName = "Concat_427", axis = 1 : i32} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x32x90x160xbf16> loc(#loc292) xten_nn.output %461 : tensor<1x32x90x160xbf16> loc(#loc292) } -> tensor<1x32x90x160xbf16> loc(#loc292) %446 = xten_nn.subgraph (%arg5 = %445: tensor<1x32x90x160xbf16>, %arg6 = %8: tensor<16x32x3x3xbf16>, %arg7 = %7: tensor<16xbf16>) attributes { LayerName = "Conv_428", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "887", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> }, { Name = "886", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[16, 32, 3, 3]> : vector<4xindex> }, { Name = "decoder.decode1.gru.hh.0.weight", UnknownDataFormat = true } ], OutputName = "Conv_428", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "888", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x32x90x160xbf16>, %arg9 = %arg6: tensor<16x32x3x3xbf16>, %arg10 = %arg7: tensor<16xbf16>) attributes { Dilations = array, HWPadding = [[1, 1], [1, 1]], LayerName = "Conv_428", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "887", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> }, { Name = "886", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[16, 32, 3, 3]> : vector<4xindex> }, { Name = "decoder.decode1.gru.hh.0.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_428", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "888", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 3 : ui8, config.ksize.width = 3 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = tosa.transpose %arg9, %463 : (tensor<16x32x3x3xbf16>, tensor<4xi32>) -> tensor<16x3x3x32xbf16> loc(#loc293) %465 = tosa.transpose %arg8, %463 : (tensor<1x32x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x32xbf16> loc(#loc293) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_428", PartOfOutputName = "Conv_428", dilation = array, pad = array, stride = array} : (tensor<1x90x160x32xbf16>, tensor<16x3x3x32xbf16>, tensor<16xbf16>) -> tensor<1x90x160x16xbf16> loc(#loc293) %467 = tosa.transpose %466, %462 : (tensor<1x90x160x16xbf16>, tensor<4xi32>) -> tensor<1x16x90x160xbf16> loc(#loc293) xten_nn.output %467 : tensor<1x16x90x160xbf16> loc(#loc293) } -> tensor<1x16x90x160xbf16> loc(#loc293) xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc293) } -> tensor<1x16x90x160xbf16> loc(#loc293) %447 = xten_nn.subgraph (%arg5 = %446: tensor<1x16x90x160xbf16>) attributes { LayerName = "Tanh_429", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "888", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Tanh_429", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "889", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "single", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x16x90x160xbf16>) attributes { LayerName = "Tanh_429", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "888", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Tanh_429", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "889", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], Specializes = "TanhTemplatedBf16", Traits = { Elementwise = true, Unary = true }, With = { config.ENABLE_FP16_AS_BF16 = 0 : ui8, config.aie_arch = "aie2p", config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.tanh %arg6 {LayerName = "Tanh_429", OutputName = "Tanh_429"} : (tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc294) xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc294) } -> tensor<1x16x90x160xbf16> loc(#loc294) xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc294) } -> tensor<1x16x90x160xbf16> loc(#loc294) %448 = xten_nn.subgraph (%arg5 = %441: tensor<1x16x90x160xbf16>, %arg6 = %447: tensor<1x16x90x160xbf16>) attributes { LayerName = "Mul_433", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "885", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "889", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Mul_433", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "893", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x16x90x160xbf16>, %arg8 = %arg6: tensor<1x16x90x160xbf16>) attributes { LayerName = "Mul_433", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "885", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "889", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Mul_433", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "893", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], Specializes = "MulBf16", Traits = { Binary = true, Elementwise = true }, With = { config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.mul %arg7, %arg8 { LayerName = "Mul_433", OutputName = "Mul_433", shift = 0 : i8} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc295) xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc295) } -> tensor<1x16x90x160xbf16> loc(#loc295) xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc295) } -> tensor<1x16x90x160xbf16> loc(#loc295) %449 = xten_nn.subgraph (%arg5 = %443: tensor<1x16x90x160xbf16>, %arg6 = %448: tensor<1x16x90x160xbf16>) attributes { LayerName = "Add_434", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "892", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "891", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Add_434", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "894", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x16x90x160xbf16>, %arg8 = %arg6: tensor<1x16x90x160xbf16>) attributes { LayerName = "Add_434", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "892", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "891", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Add_434", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "894", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], Specializes = "AddBf16", Traits = { Binary = true, Elementwise = true }, With = { config.act = 0 : ui8, config.act_type = "LINEAR", config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.add %arg7, %arg8 {LayerName = "Add_434", OutputName = "Add_434"} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc296) xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc296) } -> tensor<1x16x90x160xbf16> loc(#loc296) xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc296) } -> tensor<1x16x90x160xbf16> loc(#loc296) %450 = xten_nn.subgraph (%arg5 = %435: tensor<1x16x90x160xbf16>, %arg6 = %449: tensor<1x16x90x160xbf16>) attributes { Axis = 1 : i32, LayerName = "Concat_435", Op = "Concat", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "879", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "878", Port = "data_io.ifm2", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> } ], OutputName = "Concat_435", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "PseudoOp", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "895", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> } ], current_data_format = "NCHW", data_format = "HCWN"} { %461 = tosa.concat %arg5, %arg6 { LayerName = "Concat_435", OutputName = "Concat_435", axis = 1 : i32} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x32x90x160xbf16> loc(#loc297) xten_nn.output %461 : tensor<1x32x90x160xbf16> loc(#loc297) } -> tensor<1x32x90x160xbf16> loc(#loc297) %451 = xten_nn.subgraph (%arg5 = %450: tensor<1x32x90x160xbf16>) attributes { LayerName = "Resize_437", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "895", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> } ], OutputName = "Resize_437", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "900", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 180, 320]> : vector<4xindex> } ], Specializes = "ResizeAdf", With = { config.co_trans_mode = 1 : ui32, config.dim_0 = 1 : ui32, config.dim_1 = 32 : ui32, config.dim_2 = 90 : ui32, config.dim_3 = 160 : ui32, config.dtype = "bfloat16", config.mode = 1 : ui32, config.nearest_mode = 0 : ui32, config.output_H = 180 : ui32, config.output_W = 320 : ui32 }} { %461 = xten_nn.resize %arg5 { LayerName = "Resize_437", OutputName = "Resize_437", coordinate_transformation_mode = 1 : i64, mode = 1 : i64, nearest_mode = 0 : i64, scales = array} : (tensor<1x32x90x160xbf16>) -> tensor<1x32x180x320xbf16> loc(#loc298) xten_nn.output %461 : tensor<1x32x180x320xbf16> loc(#loc298) } -> tensor<1x32x180x320xbf16> loc(#loc298) %452 = xten_nn.subgraph (%arg5 = %451: tensor<1x32x180x320xbf16>, %arg6 = %166: tensor<1x3x180x320xbf16>) attributes { Axis = 1 : i32, LayerName = "Concat_438", Op = "Concat", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "900", Port = "data_io.ifm1", l3_extend_end = dense<0> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 32, 180, 320]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "895", Port = "data_io.ifm2", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], OutputName = "Concat_438", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "PseudoOp", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "901", Port = "data_io.ofm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 35, 180, 320]> : vector<4xindex> } ], current_data_format = "NCHW", data_format = "HCWN"} { %461 = tosa.concat %arg5, %arg6 { LayerName = "Concat_438", OutputName = "Concat_438", axis = 1 : i32} : (tensor<1x32x180x320xbf16>, tensor<1x3x180x320xbf16>) -> tensor<1x35x180x320xbf16> loc(#loc299) xten_nn.output %461 : tensor<1x35x180x320xbf16> loc(#loc299) } -> tensor<1x35x180x320xbf16> loc(#loc299) %453 = xten_nn.subgraph (%arg5 = %452: tensor<1x35x180x320xbf16>, %arg6 = %6: tensor<16x35x3x3xbf16>, %arg7 = %5: tensor<16xbf16>) attributes { LayerName = "Conv_439", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "901", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 35, 180, 320]> : vector<4xindex> }, { Name = "900", UnknownDataFormat = true, l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[16, 35, 3, 3]> : vector<4xindex> }, { Name = "901", UnknownDataFormat = true } ], OutputName = "Relu_440", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "904", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x35x180x320xbf16>, %arg9 = %arg6: tensor<16x35x3x3xbf16>, %arg10 = %arg7: tensor<16xbf16>) attributes { Dilations = array, HWPadding = [[1, 1], [1, 1]], LayerName = "Conv_439", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "901", Port = "data_io.ifm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 35, 180, 320]> : vector<4xindex> }, { Name = "900", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[16, 35, 3, 3]> : vector<4xindex> }, { Name = "901", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_440", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "904", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true, NonNegativeOut = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 1 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 3 : ui8, config.ksize.width = 3 : ui8, config.lrelu_alpha = 0.000000e+00 : bf16, config.lrelu_alpha_kernel = 0.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = tosa.transpose %arg9, %463 : (tensor<16x35x3x3xbf16>, tensor<4xi32>) -> tensor<16x3x3x35xbf16> loc(#loc363) %465 = tosa.transpose %arg8, %463 : (tensor<1x35x180x320xbf16>, tensor<4xi32>) -> tensor<1x180x320x35xbf16> loc(#loc363) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_439", PartOfOutputName = "Conv_439", dilation = array, pad = array, stride = array} : (tensor<1x180x320x35xbf16>, tensor<16x3x3x35xbf16>, tensor<16xbf16>) -> tensor<1x180x320x16xbf16> loc(#loc300) %467 = tosa.clamp %466 { LayerName = "Relu_440", OutputName = "Relu_440", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x180x320x16xbf16>) -> tensor<1x180x320x16xbf16> loc(#loc301) %468 = tosa.transpose %467, %462 : (tensor<1x180x320x16xbf16>, tensor<4xi32>) -> tensor<1x16x180x320xbf16> loc(#loc363) xten_nn.output %468 : tensor<1x16x180x320xbf16> loc(#loc301) } -> tensor<1x16x180x320xbf16> loc(#loc363) xten_nn.output %461 : tensor<1x16x180x320xbf16> loc(#loc363) } -> tensor<1x16x180x320xbf16> loc(#loc363) %454 = xten_nn.subgraph (%arg5 = %453: tensor<1x16x180x320xbf16>, %arg6 = %4: tensor<16x16x3x3xbf16>, %arg7 = %3: tensor<16xbf16>) attributes { LayerName = "Conv_441", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "904", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex> }, { Name = "1078", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[16, 16, 3, 3]> : vector<4xindex> }, { Name = "1082", UnknownDataFormat = true } ], OutputName = "Relu_442", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "907", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x16x180x320xbf16>, %arg9 = %arg6: tensor<16x16x3x3xbf16>, %arg10 = %arg7: tensor<16xbf16>) attributes { Dilations = array, HWPadding = [[1, 1], [1, 1]], LayerName = "Conv_441", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "904", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex> }, { Name = "1078", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[16, 16, 3, 3]> : vector<4xindex> }, { Name = "1082", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Relu_442", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "907", Port = "data_io.ofm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true, NonNegativeOut = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 1 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 3 : ui8, config.ksize.width = 3 : ui8, config.lrelu_alpha = 0.000000e+00 : bf16, config.lrelu_alpha_kernel = 0.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %464 = tosa.transpose %arg9, %463 : (tensor<16x16x3x3xbf16>, tensor<4xi32>) -> tensor<16x3x3x16xbf16> loc(#loc364) %465 = tosa.transpose %arg8, %463 : (tensor<1x16x180x320xbf16>, tensor<4xi32>) -> tensor<1x180x320x16xbf16> loc(#loc364) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_441", PartOfOutputName = "Conv_441", dilation = array, pad = array, stride = array} : (tensor<1x180x320x16xbf16>, tensor<16x3x3x16xbf16>, tensor<16xbf16>) -> tensor<1x180x320x16xbf16> loc(#loc302) %467 = tosa.clamp %466 { LayerName = "Relu_442", OutputName = "Relu_442", max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x180x320x16xbf16>) -> tensor<1x180x320x16xbf16> loc(#loc303) %468 = tosa.transpose %467, %462 : (tensor<1x180x320x16xbf16>, tensor<4xi32>) -> tensor<1x16x180x320xbf16> loc(#loc364) xten_nn.output %468 : tensor<1x16x180x320xbf16> loc(#loc303) } -> tensor<1x16x180x320xbf16> loc(#loc364) xten_nn.output %461 : tensor<1x16x180x320xbf16> loc(#loc364) } -> tensor<1x16x180x320xbf16> loc(#loc364) %455 = xten_nn.subgraph (%arg5 = %454: tensor<1x16x180x320xbf16>, %arg6 = %2: tensor<4x16x1x1xbf16>, %arg7 = %1: tensor<4xbf16>) attributes { LayerName = "Conv_443", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "907", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex> }, { Name = "1081", UnknownDataFormat = true, l3_extend_end = dense<[4, 0, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[4, 16, 1, 1]> : vector<4xindex> }, { Name = "project_mat.conv.weight", UnknownDataFormat = true } ], OutputName = "Conv_443", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "908", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 4, 180, 320]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x16x180x320xbf16>, %arg9 = %arg6: tensor<4x16x1x1xbf16>, %arg10 = %arg7: tensor<4xbf16>) attributes { Dilations = array, HWPadding = [[0, 0], [0, 0]], LayerName = "Conv_443", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "907", Port = "data_io.ifm", l3_extend_end = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex> }, { Name = "1081", Port = "data_io.wts", SubPort = "wts_data", UnknownDataFormat = true, l3_extend_end = dense<[4, 0, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[4, 16, 1, 1]> : vector<4xindex> }, { Name = "project_mat.conv.weight", Port = "data_io.wts", SubPort = "bias", UnknownDataFormat = true } ], OutputName = "Conv_443", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "908", Port = "data_io.ofm", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 4, 180, 320]> : vector<4xindex> } ], Specializes = "Conv2DBf16", Traits = { AllowDMAOptimization = true }, With = { config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, config.act = 0 : ui8, config.act_type = "RELU", config.aie_arch = "aie2p", config.batch_size = 1 : ui8, config.compiler = "chess", config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], config.dtype_ifm = "bfloat16", config.dtype_ofm = "bfloat16", config.dtype_wts = "bfloat16", config.ksize.height = 1 : ui8, config.ksize.width = 1 : ui8, config.lrelu_alpha = 1.000000e+00 : bf16, config.lrelu_alpha_kernel = 1.000000e+00 : bf16, config.stride_h = 1 : ui8, config.stride_w = 1 : ui8 }} { %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc304) %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<4x16x1x1xbf16>) -> tensor<4x1x1x16xbf16> loc(#loc304) %465 = tosa.transpose %arg8, %463 : (tensor<1x16x180x320xbf16>, tensor<4xi32>) -> tensor<1x180x320x16xbf16> loc(#loc304) %466 = tosa.conv2d %465, %464, %arg10 { PartOfLayerName = "Conv_443", PartOfOutputName = "Conv_443", dilation = array, pad = array, stride = array} : (tensor<1x180x320x16xbf16>, tensor<4x1x1x16xbf16>, tensor<4xbf16>) -> tensor<1x180x320x4xbf16> loc(#loc304) %467 = tosa.transpose %466, %462 : (tensor<1x180x320x4xbf16>, tensor<4xi32>) -> tensor<1x4x180x320xbf16> loc(#loc304) xten_nn.output %467 : tensor<1x4x180x320xbf16> loc(#loc304) } -> tensor<1x4x180x320xbf16> loc(#loc304) xten_nn.output %461 : tensor<1x4x180x320xbf16> loc(#loc304) } -> tensor<1x4x180x320xbf16> loc(#loc304) %456 = xten_nn.subgraph (%arg5 = %455: tensor<1x4x180x320xbf16>) attributes { LayerName = "Split_444_Duplicated#0", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "908", Port = "data_io.ifm", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 4, 180, 320]> : vector<4xindex> } ], OutputName = "Split_444_Duplicated#0", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "909", Port = "data_io.ofm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], Specializes = "SliceHCWC8Adf", With = { config.aie_arch = "aie2p", config.axis_letter = "C", config.dim_c = 8 : ui32, config.dim_h = 180 : ui32, config.dim_w = 320 : ui32, config.dtype = "bfloat16", config.end = 3 : ui32, config.start = 0 : ui32, config.step = 1 : ui32 }} { %461 = tosa.slice %arg5 { PartOfLayerName = "Split_444", PartOfOutputName = "Split_444", size = array, start = array} : (tensor<1x4x180x320xbf16>) -> tensor<1x3x180x320xbf16> loc(#loc305) xten_nn.output %461 : tensor<1x3x180x320xbf16> loc(#loc305) } -> tensor<1x3x180x320xbf16> loc(#loc305) %457 = xten_nn.subgraph (%arg5 = %456: tensor<1x3x180x320xbf16>, %arg6 = %166: tensor<1x3x180x320xbf16>) attributes { LayerName = "Add_445", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "909", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "908", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], OutputName = "Add_445_Duplicated#1", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "911", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x3x180x320xbf16>, %arg8 = %arg6: tensor<1x3x180x320xbf16>) attributes { LayerName = "Add_445", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "909", Port = "data_io.ifm1", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> }, { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "908", Port = "data_io.ifm2", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], OutputName = "Add_445_Duplicated#1", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "911", Port = "data_io.ofm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], Specializes = "AddBf16", Traits = { Binary = true, Elementwise = true }, With = { config.act = 0 : ui8, config.act_type = "LINEAR", config.aie_arch = "aie2p", config.compiler = "chess", config.dtype = "bfloat16", config.num_kernel_iters = 0 : ui16 }} { %462 = tosa.add %arg7, %arg8 {LayerName = "Add_445", OutputName = "Add_445"} : (tensor<1x3x180x320xbf16>, tensor<1x3x180x320xbf16>) -> tensor<1x3x180x320xbf16> loc(#loc11) xten_nn.output %462 : tensor<1x3x180x320xbf16> loc(#loc11) } -> tensor<1x3x180x320xbf16> loc(#loc11) xten_nn.output %461 : tensor<1x3x180x320xbf16> loc(#loc11) } -> tensor<1x3x180x320xbf16> loc(#loc11) %458 = xten_nn.subgraph (%arg5 = %457: tensor<1x3x180x320xbf16>) attributes { LayerName = "Clip_446", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "911", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], OutputName = "Clip_446", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "916", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x3x180x320xbf16>) attributes { LayerName = "Clip_446", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "911", Port = "data_io.ifm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], OutputName = "Clip_446", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "916", Port = "data_io.ofm", l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 1.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_446", OutputName = "Clip_446", max_fp = 1.000000e+00 : f32, max_int = 1 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x3x180x320xbf16>) -> tensor<1x3x180x320xbf16> loc(#loc306) xten_nn.output %462 : tensor<1x3x180x320xbf16> loc(#loc306) } -> tensor<1x3x180x320xbf16> loc(#loc306) xten_nn.output %461 : tensor<1x3x180x320xbf16> loc(#loc306) } -> tensor<1x3x180x320xbf16> loc(#loc306) %459 = xten_nn.subgraph (%arg5 = %455: tensor<1x4x180x320xbf16>) attributes { LayerName = "Split_444_Duplicated#1", Operands = [ { CurrentDataFormat = "NCHW", External = false, L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "908", Port = "data_io.ifm", l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 4, 180, 320]> : vector<4xindex> } ], OutputName = "Split_444_Duplicated#1", Overlay = "1x1_1x1_unspecifiedConnectivity", Reason = "TemplatedGraph", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "909", Port = "data_io.ofm", l3_extend_end = dense<[0, 7, 0, 0]> : vector<4xindex>, l3_extend_start = dense<0> : vector<4xindex>, l3_tile_count = dense<[1, 1, 180, 320]> : vector<4xindex> } ], Specializes = "SliceHCWC8Adf", With = { config.aie_arch = "aie2p", config.axis_letter = "C", config.dim_c = 8 : ui32, config.dim_h = 180 : ui32, config.dim_w = 320 : ui32, config.dtype = "bfloat16", config.end = 4 : ui32, config.start = 3 : ui32, config.step = 1 : ui32 }} { %461 = tosa.slice %arg5 { PartOfLayerName = "Split_444", PartOfOutputName = "Split_444", size = array, start = array} : (tensor<1x4x180x320xbf16>) -> tensor<1x1x180x320xbf16> loc(#loc305) xten_nn.output %461 : tensor<1x1x180x320xbf16> loc(#loc305) } -> tensor<1x1x180x320xbf16> loc(#loc305) %460 = xten_nn.subgraph (%arg5 = %459: tensor<1x1x180x320xbf16>) attributes { LayerName = "Clip_447", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "910", l3_extend_end = dense<[0, 7, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 1, 180, 320]> : vector<4xindex> } ], OutputName = "Clip_447", Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", Reason = "InCoreChain", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "921", l3_extend_end = dense<[0, 7, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 1, 180, 320]> : vector<4xindex> } ], memory_configuration = { L1 = {layout = "strict"}, L2 = {feature_maps_buffering = "double", layout = "flexible"} }} { %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x1x180x320xbf16>) attributes { LayerName = "Clip_447", Operands = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "910", Port = "data_io.ifm", l3_extend_end = dense<[0, 7, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 1, 180, 320]> : vector<4xindex> } ], OutputName = "Clip_447", Reason = "MllibKernel", Results = [ { CurrentDataFormat = "NCHW", L3DataFormat = "HCWN", L3Vectorization = "C:8", Name = "921", Port = "data_io.ofm", l3_extend_end = dense<[0, 7, 0, 0]> : vector<4xindex>, l3_tile_count = dense<[1, 1, 180, 320]> : vector<4xindex> } ], Specializes = "ClipBf16", Traits = { Elementwise = true, NonNegativeOut = true, Unary = true }, With = { config.aie_arch = "aie2p", config.clamp_max = 1.000000e+00 : bf16, config.clamp_min = 0.000000e+00 : bf16, config.compiler = "chess", config.ifm_shift = 0 : si8, config.num_kernel_iters = 0 : ui16, config.ofm_shift = 0 : si8 }} { %462 = tosa.clamp %arg6 { LayerName = "Clip_447", OutputName = "Clip_447", max_fp = 1.000000e+00 : f32, max_int = 1 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x1x180x320xbf16>) -> tensor<1x1x180x320xbf16> loc(#loc307) xten_nn.output %462 : tensor<1x1x180x320xbf16> loc(#loc307) } -> tensor<1x1x180x320xbf16> loc(#loc307) xten_nn.output %461 : tensor<1x1x180x320xbf16> loc(#loc307) } -> tensor<1x1x180x320xbf16> loc(#loc307) return %449, %430, %410, %390, %458, %460 : tensor<1x16x90x160xbf16>, tensor<1x20x45x80xbf16>, tensor<1x40x23x40xbf16>, tensor<1x64x12x20xbf16>, tensor<1x3x180x320xbf16>, tensor<1x1x180x320xbf16> loc(#loc319) } loc(#loc319) func.func @forward(%arg0: tensor<1x180x320x4xui8> {onnx.name = "src"} loc(unknown), %arg1: tensor<1x90x160x16xbf16> {onnx.name = "r1i"} loc(unknown), %arg2: tensor<1x45x80x20xbf16> {onnx.name = "r2i"} loc(unknown), %arg3: tensor<1x23x40x40xbf16> {onnx.name = "r3i"} loc(unknown), %arg4: tensor<1x12x20x64xbf16> {onnx.name = "r4i"} loc(unknown)) -> (tensor<1x180x320x3xbf16> {onnx.name = "fgr"}, tensor<1x180x320x1xbf16> {onnx.name = "pha"}, tensor<1x90x160x16xbf16> {onnx.name = "r1o"}, tensor<1x45x80x20xbf16> {onnx.name = "r2o"}, tensor<1x23x40x40xbf16> {onnx.name = "r3o"}, tensor<1x12x20x64xbf16> {onnx.name = "r4o"}) { %0 = xten_nn.subgraph (%arg5 = %arg1: tensor<1x90x160x16xbf16>) attributes {Message = "onnx.Transpose in function edge.", Reason = "CpuBecause"} { %12 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc309) %13 = tosa.transpose %arg5, %12 : (tensor<1x90x160x16xbf16>, tensor<4xi32>) -> tensor<1x16x90x160xbf16> loc(#loc309) xten_nn.output %13 : tensor<1x16x90x160xbf16> loc(#loc309) } -> tensor<1x16x90x160xbf16> loc(#loc309) %1 = xten_nn.subgraph (%arg5 = %arg2: tensor<1x45x80x20xbf16>) attributes {Message = "onnx.Transpose in function edge.", Reason = "CpuBecause"} { %12 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc310) %13 = tosa.transpose %arg5, %12 : (tensor<1x45x80x20xbf16>, tensor<4xi32>) -> tensor<1x20x45x80xbf16> loc(#loc310) xten_nn.output %13 : tensor<1x20x45x80xbf16> loc(#loc310) } -> tensor<1x20x45x80xbf16> loc(#loc310) %2 = xten_nn.subgraph (%arg5 = %arg3: tensor<1x23x40x40xbf16>) attributes {Message = "onnx.Transpose in function edge.", Reason = "CpuBecause"} { %12 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc311) %13 = tosa.transpose %arg5, %12 : (tensor<1x23x40x40xbf16>, tensor<4xi32>) -> tensor<1x40x23x40xbf16> loc(#loc311) xten_nn.output %13 : tensor<1x40x23x40xbf16> loc(#loc311) } -> tensor<1x40x23x40xbf16> loc(#loc311) %3 = tosa.cast %arg0 {LayerName = "Cast_0", OutputName = "Cast_0"} : (tensor<1x180x320x4xui8>) -> tensor<1x180x320x4xbf16> loc(#loc308) %4 = xten_nn.subgraph (%arg5 = %arg4: tensor<1x12x20x64xbf16>) attributes {Message = "onnx.Transpose in function edge.", Reason = "CpuBecause"} { %12 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc312) %13 = tosa.transpose %arg5, %12 : (tensor<1x12x20x64xbf16>, tensor<4xi32>) -> tensor<1x64x12x20xbf16> loc(#loc312) xten_nn.output %13 : tensor<1x64x12x20xbf16> loc(#loc312) } -> tensor<1x64x12x20xbf16> loc(#loc312) %5:6 = call @forward_outlined_part_0(%3, %0, %1, %2, %4) : (tensor<1x180x320x4xbf16>, tensor<1x16x90x160xbf16>, tensor<1x20x45x80xbf16>, tensor<1x40x23x40xbf16>, tensor<1x64x12x20xbf16>) -> (tensor<1x16x90x160xbf16>, tensor<1x20x45x80xbf16>, tensor<1x40x23x40xbf16>, tensor<1x64x12x20xbf16>, tensor<1x3x180x320xbf16>, tensor<1x1x180x320xbf16>) loc(#loc319) %6 = xten_nn.subgraph (%arg5 = %5#4: tensor<1x3x180x320xbf16>) attributes {Message = "onnx.Transpose in function edge.", Reason = "CpuBecause"} { %12 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc313) %13 = tosa.transpose %arg5, %12 : (tensor<1x3x180x320xbf16>, tensor<4xi32>) -> tensor<1x180x320x3xbf16> loc(#loc313) xten_nn.output %13 : tensor<1x180x320x3xbf16> loc(#loc313) } -> tensor<1x180x320x3xbf16> loc(#loc313) %7 = xten_nn.subgraph (%arg5 = %5#3: tensor<1x64x12x20xbf16>) attributes {Message = "onnx.Transpose in function edge.", Reason = "CpuBecause"} { %12 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc314) %13 = tosa.transpose %arg5, %12 : (tensor<1x64x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x64xbf16> loc(#loc314) xten_nn.output %13 : tensor<1x12x20x64xbf16> loc(#loc314) } -> tensor<1x12x20x64xbf16> loc(#loc314) %8 = xten_nn.subgraph (%arg5 = %5#2: tensor<1x40x23x40xbf16>) attributes {Message = "onnx.Transpose in function edge.", Reason = "CpuBecause"} { %12 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc315) %13 = tosa.transpose %arg5, %12 : (tensor<1x40x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x40xbf16> loc(#loc315) xten_nn.output %13 : tensor<1x23x40x40xbf16> loc(#loc315) } -> tensor<1x23x40x40xbf16> loc(#loc315) %9 = xten_nn.subgraph (%arg5 = %5#1: tensor<1x20x45x80xbf16>) attributes {Message = "onnx.Transpose in function edge.", Reason = "CpuBecause"} { %12 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc316) %13 = tosa.transpose %arg5, %12 : (tensor<1x20x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x20xbf16> loc(#loc316) xten_nn.output %13 : tensor<1x45x80x20xbf16> loc(#loc316) } -> tensor<1x45x80x20xbf16> loc(#loc316) %10 = xten_nn.subgraph (%arg5 = %5#0: tensor<1x16x90x160xbf16>) attributes {Message = "onnx.Transpose in function edge.", Reason = "CpuBecause"} { %12 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc317) %13 = tosa.transpose %arg5, %12 : (tensor<1x16x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x16xbf16> loc(#loc317) xten_nn.output %13 : tensor<1x90x160x16xbf16> loc(#loc317) } -> tensor<1x90x160x16xbf16> loc(#loc317) %11 = xten_nn.subgraph (%arg5 = %5#5: tensor<1x1x180x320xbf16>) attributes {Message = "onnx.Transpose in function edge.", Reason = "CpuBecause"} { %12 = tosa.reshape %arg5 {new_shape = array} : (tensor<1x1x180x320xbf16>) -> tensor<1x180x320x1xbf16> loc(#loc318) xten_nn.output %12 : tensor<1x180x320x1xbf16> loc(#loc318) } -> tensor<1x180x320x1xbf16> loc(#loc318) return %6, %11, %10, %9, %8, %7 : tensor<1x180x320x3xbf16>, tensor<1x180x320x1xbf16>, tensor<1x90x160x16xbf16>, tensor<1x45x80x20xbf16>, tensor<1x23x40x40xbf16>, tensor<1x12x20x64xbf16> loc(#loc) } loc(#loc) } loc(#loc) #loc1 = loc("Div_2") #loc2 = loc("Sub_431") #loc3 = loc("Sub_411") #loc4 = loc("Sub_385") #loc5 = loc("Sub_359") #loc6 = loc("Div_16") #loc7 = loc("Sub_14") #loc8 = loc("Initializer_398") #loc9 = loc("Slice_7") #loc10 = loc("CompilerGeneratedLoc") #loc11 = loc("Add_445") #loc12 = loc("AveragePool_346") #loc13 = loc("AveragePool_347") #loc14 = loc("AveragePool_348") #loc15 = loc("Conv_17") #loc16 = loc("Add_19") #loc17 = loc("Clip_22") #loc18 = loc("Div_24") #loc19 = loc("Mul_25") #loc20 = loc("Conv_26") #loc21 = loc("Relu_27") #loc22 = loc("Conv_28") #loc23 = loc("Add_29") #loc24 = loc("Conv_30") #loc25 = loc("Relu_31") #loc26 = loc("Conv_32") #loc27 = loc("Relu_33") #loc28 = loc("Conv_34") #loc29 = loc("Conv_35") #loc30 = loc("Relu_36") #loc31 = loc("Conv_37") #loc32 = loc("Relu_38") #loc33 = loc("Conv_39") #loc34 = loc("Add_40") #loc35 = loc("Conv_41") #loc36 = loc("Relu_42") #loc37 = loc("Conv_43") #loc38 = loc("Relu_44") #loc39 = loc("GlobalAveragePool_45") #loc40 = loc("Conv_46") #loc41 = loc("Relu_47") #loc42 = loc("Conv_48") #loc43 = loc("Add_50") #loc44 = loc("Clip_53") #loc45 = loc("Div_55") #loc46 = loc("Mul_56") #loc47 = loc("Conv_57") #loc48 = loc("Conv_58") #loc49 = loc("Relu_59") #loc50 = loc("Conv_60") #loc51 = loc("Relu_61") #loc52 = loc("GlobalAveragePool_62") #loc53 = loc("Conv_63") #loc54 = loc("Relu_64") #loc55 = loc("Conv_65") #loc56 = loc("Add_67") #loc57 = loc("Clip_70") #loc58 = loc("Div_72") #loc59 = loc("Mul_73") #loc60 = loc("Conv_74") #loc61 = loc("Add_75") #loc62 = loc("Conv_76") #loc63 = loc("Relu_77") #loc64 = loc("Conv_78") #loc65 = loc("Relu_79") #loc66 = loc("GlobalAveragePool_80") #loc67 = loc("Conv_81") #loc68 = loc("Relu_82") #loc69 = loc("Conv_83") #loc70 = loc("Add_85") #loc71 = loc("Clip_88") #loc72 = loc("Div_90") #loc73 = loc("Mul_91") #loc74 = loc("Conv_92") #loc75 = loc("Add_93") #loc76 = loc("Conv_94") #loc77 = loc("Add_96") #loc78 = loc("Clip_99") #loc79 = loc("Div_101") #loc80 = loc("Mul_102") #loc81 = loc("Conv_103") #loc82 = loc("Add_105") #loc83 = loc("Clip_108") #loc84 = loc("Div_110") #loc85 = loc("Mul_111") #loc86 = loc("Conv_112") #loc87 = loc("Conv_113") #loc88 = loc("Add_115") #loc89 = loc("Clip_118") #loc90 = loc("Div_120") #loc91 = loc("Mul_121") #loc92 = loc("Conv_122") #loc93 = loc("Add_124") #loc94 = loc("Clip_127") #loc95 = loc("Div_129") #loc96 = loc("Mul_130") #loc97 = loc("Conv_131") #loc98 = loc("Add_132") #loc99 = loc("Conv_133") #loc100 = loc("Add_135") #loc101 = loc("Clip_138") #loc102 = loc("Div_140") #loc103 = loc("Mul_141") #loc104 = loc("Conv_142") #loc105 = loc("Add_144") #loc106 = loc("Clip_147") #loc107 = loc("Div_149") #loc108 = loc("Mul_150") #loc109 = loc("Conv_151") #loc110 = loc("Add_152") #loc111 = loc("Conv_153") #loc112 = loc("Add_155") #loc113 = loc("Clip_158") #loc114 = loc("Div_160") #loc115 = loc("Mul_161") #loc116 = loc("Conv_162") #loc117 = loc("Add_164") #loc118 = loc("Clip_167") #loc119 = loc("Div_169") #loc120 = loc("Mul_170") #loc121 = loc("Conv_171") #loc122 = loc("Add_172") #loc123 = loc("Conv_173") #loc124 = loc("Add_175") #loc125 = loc("Clip_178") #loc126 = loc("Div_180") #loc127 = loc("Mul_181") #loc128 = loc("Conv_182") #loc129 = loc("Add_184") #loc130 = loc("Clip_187") #loc131 = loc("Div_189") #loc132 = loc("Mul_190") #loc133 = loc("GlobalAveragePool_191") #loc134 = loc("Conv_192") #loc135 = loc("Relu_193") #loc136 = loc("Conv_194") #loc137 = loc("Add_196") #loc138 = loc("Clip_199") #loc139 = loc("Div_201") #loc140 = loc("Mul_202") #loc141 = loc("Conv_203") #loc142 = loc("Conv_204") #loc143 = loc("Add_206") #loc144 = loc("Clip_209") #loc145 = loc("Div_211") #loc146 = loc("Mul_212") #loc147 = loc("Conv_213") #loc148 = loc("Add_215") #loc149 = loc("Clip_218") #loc150 = loc("Div_220") #loc151 = loc("Mul_221") #loc152 = loc("GlobalAveragePool_222") #loc153 = loc("Conv_223") #loc154 = loc("Relu_224") #loc155 = loc("Conv_225") #loc156 = loc("Add_227") #loc157 = loc("Clip_230") #loc158 = loc("Div_232") #loc159 = loc("Mul_233") #loc160 = loc("Conv_234") #loc161 = loc("Add_235") #loc162 = loc("Conv_236") #loc163 = loc("Add_238") #loc164 = loc("Clip_241") #loc165 = loc("Div_243") #loc166 = loc("Mul_244") #loc167 = loc("Conv_245") #loc168 = loc("Add_247") #loc169 = loc("Clip_250") #loc170 = loc("Div_252") #loc171 = loc("Mul_253") #loc172 = loc("GlobalAveragePool_254") #loc173 = loc("Conv_255") #loc174 = loc("Relu_256") #loc175 = loc("Conv_257") #loc176 = loc("Add_259") #loc177 = loc("Clip_262") #loc178 = loc("Div_264") #loc179 = loc("Mul_265") #loc180 = loc("Conv_266") #loc181 = loc("Conv_267") #loc182 = loc("Add_269") #loc183 = loc("Clip_272") #loc184 = loc("Div_274") #loc185 = loc("Mul_275") #loc186 = loc("Conv_276") #loc187 = loc("Add_278") #loc188 = loc("Clip_281") #loc189 = loc("Div_283") #loc190 = loc("Mul_284") #loc191 = loc("GlobalAveragePool_285") #loc192 = loc("Conv_286") #loc193 = loc("Relu_287") #loc194 = loc("Conv_288") #loc195 = loc("Add_290") #loc196 = loc("Clip_293") #loc197 = loc("Div_295") #loc198 = loc("Mul_296") #loc199 = loc("Conv_297") #loc200 = loc("Add_298") #loc201 = loc("Conv_299") #loc202 = loc("Add_301") #loc203 = loc("Clip_304") #loc204 = loc("Div_306") #loc205 = loc("Mul_307") #loc206 = loc("Conv_308") #loc207 = loc("Add_310") #loc208 = loc("Clip_313") #loc209 = loc("Div_315") #loc210 = loc("Mul_316") #loc211 = loc("GlobalAveragePool_317") #loc212 = loc("Conv_318") #loc213 = loc("Relu_319") #loc214 = loc("Conv_320") #loc215 = loc("Add_322") #loc216 = loc("Clip_325") #loc217 = loc("Div_327") #loc218 = loc("Mul_328") #loc219 = loc("Conv_329") #loc220 = loc("Add_330") #loc221 = loc("Conv_331") #loc222 = loc("Add_333") #loc223 = loc("Clip_336") #loc224 = loc("Div_338") #loc225 = loc("Mul_339") #loc226 = loc("GlobalAveragePool_342") #loc227 = loc("Conv_343") #loc228 = loc("Sigmoid_344") #loc229 = loc("Mul_345") #loc230 = loc("Conv_340") #loc231 = loc("Relu_341") #loc232 = loc("Split_349") #loc233 = loc("Concat_350") #loc234 = loc("Conv_351") #loc235 = loc("Sigmoid_352") #loc236 = loc("Split_353") #loc237 = loc("Mul_360") #loc238 = loc("Mul_354") #loc239 = loc("Concat_355") #loc240 = loc("Conv_356") #loc241 = loc("Tanh_357") #loc242 = loc("Mul_361") #loc243 = loc("Add_362") #loc244 = loc("Concat_363") #loc245 = loc("Resize_365") #loc246 = loc("Slice_371") #loc247 = loc("Concat_372") #loc248 = loc("Conv_373") #loc249 = loc("Relu_374") #loc250 = loc("Split_375") #loc251 = loc("Concat_376") #loc252 = loc("Conv_377") #loc253 = loc("Sigmoid_378") #loc254 = loc("Split_379") #loc255 = loc("Mul_386") #loc256 = loc("Mul_380") #loc257 = loc("Concat_381") #loc258 = loc("Conv_382") #loc259 = loc("Tanh_383") #loc260 = loc("Mul_387") #loc261 = loc("Add_388") #loc262 = loc("Concat_389") #loc263 = loc("Resize_391") #loc264 = loc("Slice_397") #loc265 = loc("Concat_398") #loc266 = loc("Conv_399") #loc267 = loc("Relu_400") #loc268 = loc("Split_401") #loc269 = loc("Concat_402") #loc270 = loc("Conv_403") #loc271 = loc("Sigmoid_404") #loc272 = loc("Split_405") #loc273 = loc("Mul_412") #loc274 = loc("Mul_406") #loc275 = loc("Concat_407") #loc276 = loc("Conv_408") #loc277 = loc("Tanh_409") #loc278 = loc("Mul_413") #loc279 = loc("Add_414") #loc280 = loc("Concat_415") #loc281 = loc("Resize_417") #loc282 = loc("Concat_418") #loc283 = loc("Conv_419") #loc284 = loc("Relu_420") #loc285 = loc("Split_421") #loc286 = loc("Concat_422") #loc287 = loc("Conv_423") #loc288 = loc("Sigmoid_424") #loc289 = loc("Split_425") #loc290 = loc("Mul_432") #loc291 = loc("Mul_426") #loc292 = loc("Concat_427") #loc293 = loc("Conv_428") #loc294 = loc("Tanh_429") #loc295 = loc("Mul_433") #loc296 = loc("Add_434") #loc297 = loc("Concat_435") #loc298 = loc("Resize_437") #loc299 = loc("Concat_438") #loc300 = loc("Conv_439") #loc301 = loc("Relu_440") #loc302 = loc("Conv_441") #loc303 = loc("Relu_442") #loc304 = loc("Conv_443") #loc305 = loc("Split_444") #loc306 = loc("Clip_446") #loc307 = loc("Clip_447") #loc313 = loc("Transpose_452") #loc314 = loc("Transpose_451") #loc315 = loc("Transpose_450") #loc316 = loc("Transpose_449") #loc317 = loc("Transpose_448") #loc318 = loc("Transpose_453") #loc319 = loc(fused[#loc1, #loc2, #loc3, #loc4, #loc5, #loc6, #loc7, #loc8, #loc9, #loc10, #loc11, #loc12, #loc13, #loc14, #loc15, #loc16, #loc17, #loc18, #loc19, #loc20, #loc21, #loc22, #loc23, #loc24, #loc25, #loc26, #loc27, #loc28, #loc29, #loc30, #loc31, #loc32, #loc33, #loc34, #loc35, #loc36, #loc37, #loc38, #loc39, #loc40, #loc41, #loc42, #loc43, #loc44, #loc45, #loc46, #loc47, #loc48, #loc49, #loc50, #loc51, #loc52, #loc53, #loc54, #loc55, #loc56, #loc57, #loc58, #loc59, #loc60, #loc61, #loc62, #loc63, #loc64, #loc65, #loc66, #loc67, #loc68, #loc69, #loc70, #loc71, #loc72, #loc73, #loc74, #loc75, #loc76, #loc77, #loc78, #loc79, #loc80, #loc81, #loc82, #loc83, #loc84, #loc85, #loc86, #loc87, #loc88, #loc89, #loc90, #loc91, #loc92, #loc93, #loc94, #loc95, #loc96, #loc97, #loc98, #loc99, #loc100, #loc101, #loc102, #loc103, #loc104, #loc105, #loc106, #loc107, #loc108, #loc109, #loc110, #loc111, #loc112, #loc113, #loc114, #loc115, #loc116, #loc117, #loc118, #loc119, #loc120, #loc121, #loc122, #loc123, #loc124, #loc125, #loc126, #loc127, #loc128, #loc129, #loc130, #loc131, #loc132, #loc133, #loc134, #loc135, #loc136, #loc137, #loc138, #loc139, #loc140, #loc141, #loc142, #loc143, #loc144, #loc145, #loc146, #loc147, #loc148, #loc149, #loc150, #loc151, #loc152, #loc153, #loc154, #loc155, #loc156, #loc157, #loc158, #loc159, #loc160, #loc161, #loc162, #loc163, #loc164, #loc165, #loc166, #loc167, #loc168, #loc169, #loc170, #loc171, #loc172, #loc173, #loc174, #loc175, #loc176, #loc177, #loc178, #loc179, #loc180, #loc181, #loc182, #loc183, #loc184, #loc185, #loc186, #loc187, #loc188, #loc189, #loc190, #loc191, #loc192, #loc193, #loc194, #loc195, #loc196, #loc197, #loc198, #loc199, #loc200, #loc201, #loc202, #loc203, #loc204, #loc205, #loc206, #loc207, #loc208, #loc209, #loc210, #loc211, #loc212, #loc213, #loc214, #loc215, #loc216, #loc217, #loc218, #loc219, #loc220, #loc221, #loc222, #loc223, #loc224, #loc225, #loc226, #loc227, #loc228, #loc229, #loc230, #loc231, #loc232, #loc233, #loc234, #loc235, #loc236, #loc237, #loc238, #loc239, #loc240, #loc241, #loc242, #loc243, #loc244, #loc245, #loc246, #loc247, #loc248, #loc249, #loc250, #loc251, #loc252, #loc253, #loc254, #loc255, #loc256, #loc257, #loc258, #loc259, #loc260, #loc261, #loc262, #loc263, #loc264, #loc265, #loc266, #loc267, #loc268, #loc269, #loc270, #loc271, #loc272, #loc273, #loc274, #loc275, #loc276, #loc277, #loc278, #loc279, #loc280, #loc281, #loc282, #loc283, #loc284, #loc285, #loc286, #loc287, #loc288, #loc289, #loc290, #loc291, #loc292, #loc293, #loc294, #loc295, #loc296, #loc297, #loc298, #loc299, #loc300, #loc301, #loc302, #loc303, #loc304, #loc305, #loc306, #loc307]) #loc320 = loc(fused[#loc7, #loc8]) #loc321 = loc(fused[#loc11, #loc9, #loc12]) #loc322 = loc(fused[#loc9, #loc12, #loc11]) #loc323 = loc(fused[#loc20, #loc21]) #loc324 = loc(fused[#loc22, #loc23]) #loc325 = loc(fused[#loc24, #loc25]) #loc326 = loc(fused[#loc26, #loc27]) #loc327 = loc(fused[#loc29, #loc30]) #loc328 = loc(fused[#loc31, #loc32]) #loc329 = loc(fused[#loc33, #loc34]) #loc330 = loc(fused[#loc35, #loc36]) #loc331 = loc(fused[#loc37, #loc38]) #loc332 = loc(fused[#loc40, #loc41]) #loc333 = loc(fused[#loc48, #loc49]) #loc334 = loc(fused[#loc50, #loc51]) #loc335 = loc(fused[#loc53, #loc54]) #loc336 = loc(fused[#loc60, #loc61]) #loc337 = loc(fused[#loc62, #loc63]) #loc338 = loc(fused[#loc64, #loc65]) #loc339 = loc(fused[#loc67, #loc68]) #loc340 = loc(fused[#loc74, #loc75]) #loc341 = loc(fused[#loc97, #loc98]) #loc342 = loc(fused[#loc109, #loc110]) #loc343 = loc(fused[#loc121, #loc122]) #loc344 = loc(fused[#loc132, #loc133]) #loc345 = loc(fused[#loc134, #loc135]) #loc346 = loc(fused[#loc151, #loc152]) #loc347 = loc(fused[#loc153, #loc154]) #loc348 = loc(fused[#loc160, #loc161]) #loc349 = loc(fused[#loc171, #loc172]) #loc350 = loc(fused[#loc173, #loc174]) #loc351 = loc(fused[#loc190, #loc191]) #loc352 = loc(fused[#loc192, #loc193]) #loc353 = loc(fused[#loc199, #loc200]) #loc354 = loc(fused[#loc210, #loc211]) #loc355 = loc(fused[#loc212, #loc213]) #loc356 = loc(fused[#loc219, #loc220]) #loc357 = loc(fused[#loc225, #loc226]) #loc358 = loc(fused[#loc230, #loc231, #loc229]) #loc359 = loc(fused[#loc230, #loc231]) #loc360 = loc(fused[#loc248, #loc249]) #loc361 = loc(fused[#loc266, #loc267]) #loc362 = loc(fused[#loc283, #loc284]) #loc363 = loc(fused[#loc300, #loc301]) #loc364 = loc(fused[#loc302, #loc303])